__filename__ = "newsdaemon.py" __author__ = "Bob Mottram" __license__ = "AGPL3+" __version__ = "1.1.0" __maintainer__ = "Bob Mottram" __email__ = "bob@freedombone.net" __status__ = "Production" import os import time import datetime from collections import OrderedDict from newswire import getDictFromNewswire # from posts import sendSignedJson from posts import createNewsPost from content import removeHtmlTag from content import dangerousMarkup from content import validHashTag from utils import loadJson from utils import saveJson from utils import getStatusNumber from inbox import storeHashTags def updateFeedsOutboxIndex(baseDir: str, domain: str, postId: str) -> None: """Updates the index used for imported RSS feeds """ basePath = baseDir + '/accounts/news@' + domain indexFilename = basePath + '/outbox.index' if os.path.isfile(indexFilename): if postId not in open(indexFilename).read(): try: with open(indexFilename, 'r+') as feedsFile: content = feedsFile.read() feedsFile.seek(0, 0) feedsFile.write(postId + '\n' + content) print('DEBUG: feeds post added to index') except Exception as e: print('WARN: Failed to write entry to feeds posts index ' + indexFilename + ' ' + str(e)) else: feedsFile = open(indexFilename, 'w+') if feedsFile: feedsFile.write(postId + '\n') feedsFile.close() def saveArrivedTime(baseDir: str, postFilename: str, arrived: str) -> None: """Saves the time when an rss post arrived to a file """ arrivedFile = open(postFilename + '.arrived', 'w+') if arrivedFile: arrivedFile.write(arrived) arrivedFile.close() def removeControlCharacters(content: str) -> str: """TODO this is hacky and a better solution is needed the unicode is messing up somehow """ lookups = { "8211": "-", "8230": "...", "8216": "'", "8217": "'", "8220": '"', "8221": '"' } for code, ch in lookups.items(): content = content.replace('&' + code + ';', ch) content = content.replace('&#' + code + ';', ch) return content def hasttagRuleResolve(tree: [], hashtags: []) -> bool: """Returns whether the tree for a hashtag rule evaluates to true or false """ if not tree: return False if tree[0] == 'not': if len(tree) == 2: if isinstance(tree[1], str): return tree[1] not in hashtags elif isinstance(tree[1], list): return not hasttagRuleResolve(tree[1], hashtags) elif tree[0] == 'and': if len(tree) == 3: firstArg = False if isinstance(tree[1], str): firstArg = (tree[1] in hashtags) elif isinstance(tree[1], list): firstArg = (hasttagRuleResolve(tree[1], hashtags)) secondArg = False if isinstance(tree[2], str): secondArg = (tree[2] in hashtags) elif isinstance(tree[2], list): secondArg = (hasttagRuleResolve(tree[2], hashtags)) return firstArg and secondArg elif tree[0] == 'or': if len(tree) == 3: firstArg = False if isinstance(tree[1], str): firstArg = (tree[1] in hashtags) elif isinstance(tree[1], list): firstArg = (hasttagRuleResolve(tree[1], hashtags)) secondArg = False if isinstance(tree[2], str): secondArg = (tree[2] in hashtags) elif isinstance(tree[2], list): secondArg = (hasttagRuleResolve(tree[2], hashtags)) return firstArg or secondArg elif tree[0].startswith('#') and len(tree) == 1: return tree[0] in hashtags return False def hashtagRuleTree(operators: [], conditionsStr: str, tagsInConditions: []) -> []: """Walks the tree """ if not operators and conditionsStr: conditionsStr = conditionsStr.strip() if conditionsStr.startswith('#') or conditionsStr in operators: if conditionsStr.startswith('#'): if conditionsStr not in tagsInConditions: if ' ' not in conditionsStr: tagsInConditions.append(conditionsStr) return [conditionsStr.strip()] else: return None if not operators or not conditionsStr: return None tree = None conditionsStr = conditionsStr.strip() if conditionsStr.startswith('#') or conditionsStr in operators: if conditionsStr.startswith('#'): if conditionsStr not in tagsInConditions: if ' ' not in conditionsStr: tagsInConditions.append(conditionsStr) tree = [conditionsStr.strip()] ctr = 0 while ctr < len(operators): op = operators[ctr] if op not in conditionsStr: ctr += 1 continue else: tree = [op] sections = conditionsStr.split(op) for subConditionStr in sections: result = hashtagRuleTree(operators[ctr + 1:], subConditionStr, tagsInConditions) if result: tree.append(result) break return tree def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {}, hashtags: str, httpPrefix: str, domain: str, port: int, personCache: {}, cachedWebfingers: {}, federationList: [], sendThreads: [], postLog: []) -> bool: """Applies hashtag rules to a news post. Returns true if the post should be saved to the news timeline of this instance """ rulesFilename = baseDir + '/accounts/hashtagrules.txt' if not os.path.isfile(rulesFilename): return True rules = [] with open(rulesFilename, "r") as f: rules = f.readlines() domainFull = domain if port: if port != 80 and port != 443: domainFull = domain + ':' + str(port) actionOccurred = False operators = ('not', 'and', 'or') for ruleStr in rules: if not ruleStr: continue if not ruleStr.startswith('if '): continue if ' then ' not in ruleStr: continue conditionsStr = ruleStr.split('if ', 1)[1] conditionsStr = conditionsStr.split(' then ')[0] tagsInConditions = [] tree = hashtagRuleTree(operators, conditionsStr, tagsInConditions) # does the rule contain any hashtags? if not tagsInConditions: continue if not hasttagRuleResolve(tree, hashtags): continue # the condition matches, so do something actionStr = ruleStr.split(' then ')[1].strip() # add a hashtag if actionStr.startswith('add '): addHashtag = actionStr.split('add ', 1)[1].strip() if addHashtag.startswith('#'): if addHashtag not in hashtags: hashtags.append(addHashtag) htId = addHashtag.replace('#', '') if validHashTag(htId): hashtagUrl = \ httpPrefix + "://" + domainFull + "/tags/" + htId postJsonObject['object']['tag'][htId] = { 'href': hashtagUrl, 'name': addHashtag, 'type': 'Hashtag' } hashtagHtml = \ " #" + \ htId + "" content = postJsonObject['object']['content'] if content.endswith('

'): content = \ content[:len(content) - len('

')] + \ hashtagHtml + '

' else: content += hashtagHtml postJsonObject['object']['content'] = content storeHashTags(baseDir, 'news', postJsonObject) actionOccurred = True # remove a hashtag if actionStr.startswith('remove '): rmHashtag = actionStr.split('remove ', 1)[1].strip() if rmHashtag.startswith('#'): if rmHashtag in hashtags: hashtags.remove(rmHashtag) htId = addHashtag.replace('#', '') hashtagUrl = \ httpPrefix + "://" + domainFull + "/tags/" + htId hashtagHtml = \ "#" + \ htId + "" content = postJsonObject['object']['content'] if hashtagHtml in content: content = \ content.replace(hashtagHtml, '').replace(' ', ' ') postJsonObject['object']['content'] = content del postJsonObject['object']['tag'][htId] actionOccurred = True # TODO # If routing to another instance # sendSignedJson(postJsonObject: {}, session, baseDir: str, # nickname: str, domain: str, port: int, # toNickname: str, toDomain: str, toPort: int, cc: str, # httpPrefix: str, False, False, # federationList: [], # sendThreads: [], postLog: [], cachedWebfingers: {}, # personCache: {}, False, __version__) -> int: if actionOccurred: return True return True def convertRSStoActivityPub(baseDir: str, httpPrefix: str, domain: str, port: int, newswire: {}, translate: {}, recentPostsCache: {}, maxRecentPosts: int, session, cachedWebfingers: {}, personCache: {}, federationList: [], sendThreads: [], postLog: []) -> None: """Converts rss items in a newswire into posts """ basePath = baseDir + '/accounts/news@' + domain + '/outbox' if not os.path.isdir(basePath): os.mkdir(basePath) # oldest items first newswireReverse = \ OrderedDict(sorted(newswire.items(), reverse=False)) for dateStr, item in newswireReverse.items(): originalDateStr = dateStr # convert the date to the format used by ActivityPub dateStr = dateStr.replace(' ', 'T') dateStr = dateStr.replace('+00:00', 'Z') statusNumber, published = getStatusNumber(dateStr) newPostId = \ httpPrefix + '://' + domain + \ '/users/news/statuses/' + statusNumber # file where the post is stored filename = basePath + '/' + newPostId.replace('/', '#') + '.json' if os.path.isfile(filename): # don't create the post if it already exists # set the url newswire[originalDateStr][1] = \ '/users/news/statuses/' + statusNumber # set the filename newswire[originalDateStr][3] = filename continue rssTitle = removeControlCharacters(item[0]) url = item[1] if dangerousMarkup(url) or dangerousMarkup(rssTitle): continue rssDescription = '' # get the rss description if it exists rssDescription = removeControlCharacters(item[4]) if rssDescription.startswith('', '') rssDescription = '

' + rssDescription + '

' # add the off-site link to the description if rssDescription and not dangerousMarkup(rssDescription): rssDescription += \ '
' + \ translate['Read more...'] + '' else: rssDescription = \ '' + \ translate['Read more...'] + '' # remove image dimensions if ' None: """Preserve any votes or generated activitypub post filename as rss feeds are updated """ for published, fields in oldNewswire.items(): if not newNewswire.get(published): continue for i in range(1, 5): newNewswire[published][i] = fields[i] def runNewswireDaemon(baseDir: str, httpd, httpPrefix: str, domain: str, port: int, translate: {}) -> None: """Periodically updates RSS feeds """ newswireStateFilename = baseDir + '/accounts/.newswirestate.json' # initial sleep to allow the system to start up time.sleep(50) while True: # has the session been created yet? if not httpd.session: print('Newswire daemon waiting for session') time.sleep(60) continue # try to update the feeds newNewswire = None try: newNewswire = \ getDictFromNewswire(httpd.session, baseDir, httpd.maxNewswirePostsPerSource, httpd.maxNewswireFeedSizeKb) except Exception as e: print('WARN: unable to update newswire ' + str(e)) time.sleep(120) continue if not httpd.newswire: if os.path.isfile(newswireStateFilename): httpd.newswire = loadJson(newswireStateFilename) mergeWithPreviousNewswire(httpd.newswire, newNewswire) httpd.newswire = newNewswire saveJson(httpd.newswire, newswireStateFilename) print('Newswire updated') convertRSStoActivityPub(baseDir, httpPrefix, domain, port, newNewswire, translate, httpd.recentPostsCache, httpd.maxRecentPosts, httpd.session, httpd.cachedWebfingers, httpd.personCache, httpd.federationList, httpd.sendThreads, httpd.postLog) print('Newswire feed converted to ActivityPub') # wait a while before the next feeds update time.sleep(1200) def runNewswireWatchdog(projectVersion: str, httpd) -> None: """This tries to keep the newswire update thread running even if it dies """ print('Starting newswire watchdog') newswireOriginal = \ httpd.thrPostSchedule.clone(runNewswireDaemon) httpd.thrNewswireDaemon.start() while True: time.sleep(50) if not httpd.thrNewswireDaemon.isAlive(): httpd.thrNewswireDaemon.kill() httpd.thrNewswireDaemon = \ newswireOriginal.clone(runNewswireDaemon) httpd.thrNewswireDaemon.start() print('Restarting newswire daemon...')