2020-10-07 12:05:49 +00:00
|
|
|
__filename__ = "newsdaemon.py"
|
|
|
|
__author__ = "Bob Mottram"
|
|
|
|
__license__ = "AGPL3+"
|
|
|
|
__version__ = "1.1.0"
|
|
|
|
__maintainer__ = "Bob Mottram"
|
|
|
|
__email__ = "bob@freedombone.net"
|
|
|
|
__status__ = "Production"
|
|
|
|
|
2020-10-07 13:51:29 +00:00
|
|
|
import os
|
2020-10-07 12:05:49 +00:00
|
|
|
import time
|
2020-10-07 18:46:42 +00:00
|
|
|
from collections import OrderedDict
|
2020-10-07 12:05:49 +00:00
|
|
|
from newswire import getDictFromNewswire
|
2020-10-07 16:55:15 +00:00
|
|
|
from posts import createBlogPost
|
2020-10-07 13:51:29 +00:00
|
|
|
from utils import saveJson
|
2020-10-07 16:55:15 +00:00
|
|
|
from utils import getStatusNumber
|
2020-10-07 12:05:49 +00:00
|
|
|
|
|
|
|
|
2020-10-07 16:55:15 +00:00
|
|
|
def updateFeedsIndex(baseDir: str, domain: str, postId: str) -> None:
|
2020-10-07 13:51:29 +00:00
|
|
|
"""Updates the index used for imported RSS feeds
|
|
|
|
"""
|
2020-10-07 16:55:15 +00:00
|
|
|
basePath = baseDir + '/accounts/news@' + domain
|
|
|
|
indexFilename = basePath + '/outbox.index'
|
2020-10-07 13:51:29 +00:00
|
|
|
|
|
|
|
if os.path.isfile(indexFilename):
|
2020-10-07 18:46:42 +00:00
|
|
|
if postId not in open(indexFilename).read():
|
|
|
|
try:
|
|
|
|
with open(indexFilename, 'r+') as feedsFile:
|
|
|
|
content = feedsFile.read()
|
|
|
|
feedsFile.seek(0, 0)
|
|
|
|
feedsFile.write(postId + '\n' + content)
|
|
|
|
print('DEBUG: feeds post added to index')
|
|
|
|
except Exception as e:
|
|
|
|
print('WARN: Failed to write entry to feeds posts index ' +
|
|
|
|
indexFilename + ' ' + str(e))
|
2020-10-07 13:51:29 +00:00
|
|
|
else:
|
|
|
|
feedsFile = open(indexFilename, 'w+')
|
|
|
|
if feedsFile:
|
2020-10-07 16:55:15 +00:00
|
|
|
feedsFile.write(postId + '\n')
|
2020-10-07 13:51:29 +00:00
|
|
|
feedsFile.close()
|
|
|
|
|
|
|
|
|
|
|
|
def convertRSStoActivityPub(baseDir: str, httpPrefix: str,
|
|
|
|
domain: str, port: int,
|
|
|
|
newswire: {},
|
|
|
|
translate: {}) -> None:
|
|
|
|
"""Converts rss items in a newswire into posts
|
|
|
|
"""
|
2020-10-07 16:55:15 +00:00
|
|
|
basePath = baseDir + '/accounts/news@' + domain + '/outbox'
|
2020-10-07 13:51:29 +00:00
|
|
|
if not os.path.isdir(basePath):
|
|
|
|
os.mkdir(basePath)
|
|
|
|
|
2020-10-07 18:46:42 +00:00
|
|
|
newswireReverse = \
|
|
|
|
OrderedDict(sorted(newswire.items(), reverse=True))
|
|
|
|
|
|
|
|
for dateStr, item in newswireReverse.items():
|
2020-10-07 14:10:06 +00:00
|
|
|
# convert the date to the format used by ActivityPub
|
2020-10-07 13:51:29 +00:00
|
|
|
dateStr = dateStr.replace(' ', 'T')
|
|
|
|
dateStr = dateStr.replace('+00:00', 'Z')
|
|
|
|
|
2020-10-07 16:55:15 +00:00
|
|
|
statusNumber, published = getStatusNumber(dateStr)
|
|
|
|
newPostId = \
|
|
|
|
httpPrefix + '://' + domain + \
|
|
|
|
'/users/news/statuses/' + statusNumber
|
|
|
|
|
2020-10-07 14:10:06 +00:00
|
|
|
# file where the post is stored
|
2020-10-07 16:55:15 +00:00
|
|
|
filename = basePath + '/' + newPostId.replace('/', '#') + '.json'
|
2020-10-07 13:51:29 +00:00
|
|
|
if os.path.isfile(filename):
|
2020-10-07 14:10:06 +00:00
|
|
|
# if a local post exists as html then change the link
|
|
|
|
# to the local one
|
2020-10-07 16:55:15 +00:00
|
|
|
htmlFilename = filename.replace('.json', '.html')
|
2020-10-07 14:10:06 +00:00
|
|
|
if os.path.isfile(htmlFilename):
|
2020-10-07 16:55:15 +00:00
|
|
|
item[1] = '/users/news/statuses/' + statusNumber + '.html'
|
2020-10-07 14:10:06 +00:00
|
|
|
# don't create the post if it already exists
|
2020-10-07 13:51:29 +00:00
|
|
|
continue
|
|
|
|
|
|
|
|
rssTitle = item[0]
|
|
|
|
url = item[1]
|
2020-10-07 13:55:27 +00:00
|
|
|
rssDescription = ''
|
2020-10-07 14:10:06 +00:00
|
|
|
|
|
|
|
# get the rss description if it exists
|
2020-10-07 13:58:39 +00:00
|
|
|
if len(item) >= 5:
|
2020-10-07 13:55:27 +00:00
|
|
|
rssDescription = item[4]
|
2020-10-07 14:10:06 +00:00
|
|
|
|
|
|
|
# add the off-site link to the description
|
2020-10-07 13:51:29 +00:00
|
|
|
if rssDescription:
|
|
|
|
rssDescription += \
|
|
|
|
'\n\n' + translate['Read more...'] + '\n' + url
|
|
|
|
else:
|
|
|
|
rssDescription = url
|
2020-10-07 14:10:06 +00:00
|
|
|
|
2020-10-07 16:55:15 +00:00
|
|
|
followersOnly = False
|
|
|
|
useBlurhash = False
|
|
|
|
blog = createBlogPost(baseDir,
|
|
|
|
'news', domain, port, httpPrefix,
|
|
|
|
rssDescription, followersOnly, False,
|
|
|
|
False,
|
|
|
|
None, None, None, useBlurhash,
|
|
|
|
None, None, rssTitle,
|
|
|
|
False,
|
|
|
|
None, None, None)
|
|
|
|
if not blog:
|
|
|
|
continue
|
|
|
|
|
|
|
|
idStr = \
|
|
|
|
httpPrefix + '://' + domain + '/users/news' + \
|
|
|
|
'/statuses/' + statusNumber + '/replies'
|
|
|
|
blog['object']['replies']['id'] = idStr
|
|
|
|
blog['object']['replies']['first']['partOf'] = idStr
|
|
|
|
|
|
|
|
blog['id'] = newPostId + '/activity'
|
|
|
|
blog['object']['id'] = newPostId
|
|
|
|
blog['object']['atomUri'] = newPostId
|
|
|
|
blog['object']['url'] = \
|
|
|
|
httpPrefix + '://' + domain + '/@news/' + statusNumber
|
|
|
|
blog['object']['published'] = dateStr
|
|
|
|
|
|
|
|
postId = newPostId.replace('/', '#')
|
2020-10-07 14:10:06 +00:00
|
|
|
|
|
|
|
# save the post and update the index
|
2020-10-07 13:51:29 +00:00
|
|
|
if saveJson(blog, filename):
|
2020-10-07 16:55:15 +00:00
|
|
|
updateFeedsIndex(baseDir, domain, postId + '.json')
|
2020-10-07 13:51:29 +00:00
|
|
|
|
|
|
|
|
|
|
|
def runNewswireDaemon(baseDir: str, httpd,
|
|
|
|
httpPrefix: str, domain: str, port: int,
|
|
|
|
translate: {}) -> None:
|
2020-10-07 12:05:49 +00:00
|
|
|
"""Periodically updates RSS feeds
|
|
|
|
"""
|
|
|
|
# initial sleep to allow the system to start up
|
|
|
|
time.sleep(50)
|
|
|
|
while True:
|
|
|
|
# has the session been created yet?
|
|
|
|
if not httpd.session:
|
|
|
|
print('Newswire daemon waiting for session')
|
|
|
|
time.sleep(60)
|
|
|
|
continue
|
|
|
|
|
|
|
|
# try to update the feeds
|
|
|
|
newNewswire = None
|
|
|
|
try:
|
|
|
|
newNewswire = getDictFromNewswire(httpd.session, baseDir)
|
|
|
|
except Exception as e:
|
|
|
|
print('WARN: unable to update newswire ' + str(e))
|
|
|
|
time.sleep(120)
|
|
|
|
continue
|
|
|
|
|
|
|
|
httpd.newswire = newNewswire
|
|
|
|
print('Newswire updated')
|
2020-10-07 13:51:29 +00:00
|
|
|
|
|
|
|
convertRSStoActivityPub(baseDir,
|
|
|
|
httpPrefix, domain, port,
|
|
|
|
newNewswire, translate)
|
|
|
|
print('Newswire feed converted to ActivityPub')
|
|
|
|
|
2020-10-07 12:05:49 +00:00
|
|
|
# wait a while before the next feeds update
|
|
|
|
time.sleep(1200)
|
|
|
|
|
|
|
|
|
|
|
|
def runNewswireWatchdog(projectVersion: str, httpd) -> None:
|
|
|
|
"""This tries to keep the newswire update thread running even if it dies
|
|
|
|
"""
|
|
|
|
print('Starting newswire watchdog')
|
|
|
|
newswireOriginal = \
|
|
|
|
httpd.thrPostSchedule.clone(runNewswireDaemon)
|
|
|
|
httpd.thrNewswireDaemon.start()
|
|
|
|
while True:
|
|
|
|
time.sleep(50)
|
|
|
|
if not httpd.thrNewswireDaemon.isAlive():
|
|
|
|
httpd.thrNewswireDaemon.kill()
|
|
|
|
httpd.thrNewswireDaemon = \
|
|
|
|
newswireOriginal.clone(runNewswireDaemon)
|
|
|
|
httpd.thrNewswireDaemon.start()
|
|
|
|
print('Restarting newswire daemon...')
|