diff --git a/content.py b/content.py index 34598bb8..6d619509 100644 --- a/content.py +++ b/content.py @@ -353,6 +353,7 @@ def validHashTag(hashtag: str) -> bool: # long hashtags are not valid if len(hashtag) >= 32: return False + # TODO: this may need to be an international character set validChars = set('0123456789' + 'abcdefghijklmnopqrstuvwxyz' + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ') diff --git a/newsdaemon.py b/newsdaemon.py index 54dd2a83..ade72823 100644 --- a/newsdaemon.py +++ b/newsdaemon.py @@ -15,6 +15,7 @@ from newswire import getDictFromNewswire from posts import createNewsPost from content import removeHtmlTag from content import dangerousMarkup +from content import validHashTag from utils import loadJson from utils import saveJson from utils import getStatusNumber @@ -71,6 +72,97 @@ def removeControlCharacters(content: str) -> str: return content +def hasttagRuleResolve(tree: [], hashtags: []) -> bool: + """Returns whether the tree for a hashtag rule evaluates to true or false + """ + if not tree: + return False + + if tree[0] == 'not': + if len(tree) == 2: + if isinstance(tree[1], str): + return tree[1] not in hashtags + elif isinstance(tree[1], list): + return not hasttagRuleResolve(tree[1], hashtags) + elif tree[0] == 'and': + if len(tree) == 3: + + firstArg = False + if isinstance(tree[1], str): + firstArg = (tree[1] in hashtags) + elif isinstance(tree[1], list): + firstArg = (hasttagRuleResolve(tree[1], hashtags)) + + secondArg = False + if isinstance(tree[2], str): + secondArg = (tree[2] in hashtags) + elif isinstance(tree[2], list): + secondArg = (hasttagRuleResolve(tree[2], hashtags)) + return firstArg and secondArg + elif tree[0] == 'or': + if len(tree) == 3: + + firstArg = False + if isinstance(tree[1], str): + firstArg = (tree[1] in hashtags) + elif isinstance(tree[1], list): + firstArg = (hasttagRuleResolve(tree[1], hashtags)) + + secondArg = False + if isinstance(tree[2], str): + secondArg = (tree[2] in hashtags) + elif isinstance(tree[2], list): + secondArg = (hasttagRuleResolve(tree[2], hashtags)) + return firstArg or secondArg + elif tree[0].startswith('#') and len(tree) == 1: + return tree[0] in hashtags + + return False + + +def hashtagRuleTree(operators: [], + conditionsStr: str, + tagsInConditions: []) -> []: + """Walks the tree + """ + if not operators and conditionsStr: + conditionsStr = conditionsStr.strip() + if conditionsStr.startswith('#') or conditionsStr in operators: + if conditionsStr.startswith('#'): + if conditionsStr not in tagsInConditions: + if ' ' not in conditionsStr: + tagsInConditions.append(conditionsStr) + return [conditionsStr.strip()] + else: + return None + if not operators or not conditionsStr: + return None + tree = None + conditionsStr = conditionsStr.strip() + if conditionsStr.startswith('#') or conditionsStr in operators: + if conditionsStr.startswith('#'): + if conditionsStr not in tagsInConditions: + if ' ' not in conditionsStr: + tagsInConditions.append(conditionsStr) + tree = [conditionsStr.strip()] + ctr = 0 + while ctr < len(operators): + op = operators[ctr] + if op not in conditionsStr: + ctr += 1 + continue + else: + tree = [op] + sections = conditionsStr.split(op) + for subConditionStr in sections: + result = hashtagRuleTree(operators[ctr + 1:], subConditionStr, + tagsInConditions) + if result: + tree.append(result) + break + return tree + + def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {}, hashtags: str, httpPrefix: str, domain: str, port: int, @@ -82,6 +174,90 @@ def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {}, Returns true if the post should be saved to the news timeline of this instance """ + rulesFilename = baseDir + '/accounts/hashtagrules.txt' + if not os.path.isfile(rulesFilename): + return True + rules = [] + with open(rulesFilename, "r") as f: + rules = f.readlines() + + domainFull = domain + if port: + if port != 80 and port != 443: + domainFull = domain + ':' + str(port) + + actionOccurred = False + operators = ('not', 'and', 'or') + for ruleStr in rules: + if not ruleStr: + continue + if not ruleStr.startswith('if '): + continue + if ' then ' not in ruleStr: + continue + conditionsStr = ruleStr.split('if ', 1)[1] + conditionsStr = conditionsStr.split(' then ')[0] + tagsInConditions = [] + tree = hashtagRuleTree(operators, conditionsStr, tagsInConditions) + # does the rule contain any hashtags? + if not tagsInConditions: + continue + if not hasttagRuleResolve(tree, hashtags): + continue + # the condition matches, so do something + actionStr = ruleStr.split(' then ')[1].strip() + + # add a hashtag + if actionStr.startswith('add '): + addHashtag = actionStr.split('add ', 1)[1].strip() + if addHashtag.startswith('#'): + if addHashtag not in hashtags: + hashtags.append(addHashtag) + htId = addHashtag.replace('#', '') + if validHashTag(htId): + hashtagUrl = \ + httpPrefix + "://" + domainFull + "/tags/" + htId + postJsonObject['object']['tag'][htId] = { + 'href': hashtagUrl, + 'name': addHashtag, + 'type': 'Hashtag' + } + hashtagHtml = \ + "#" + \ + htId + "" + content = postJsonObject['object']['content'] + if content.endswith('

'): + content = \ + content[:len(content) - len('

')] + \ + hashtagHtml + '

' + else: + content += hashtagHtml + postJsonObject['object']['content'] = content + actionOccurred = True + + # remove a hashtag + if actionStr.startswith('remove '): + rmHashtag = actionStr.split('remove ', 1)[1].strip() + if rmHashtag.startswith('#'): + if rmHashtag in hashtags: + hashtags.remove(rmHashtag) + htId = addHashtag.replace('#', '') + hashtagUrl = \ + httpPrefix + "://" + domainFull + "/tags/" + htId + hashtagHtml = \ + "#" + \ + htId + "" + content = postJsonObject['object']['content'] + if hashtagHtml in content: + postJsonObject['object']['content'] = \ + content.replace(hashtagHtml, '') + del postJsonObject['object']['tag'][htId] + actionOccurred = True + # TODO # If routing to another instance # sendSignedJson(postJsonObject: {}, session, baseDir: str, @@ -91,6 +267,8 @@ def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {}, # federationList: [], # sendThreads: [], postLog: [], cachedWebfingers: {}, # personCache: {}, False, __version__) -> int: + if actionOccurred: + return True return True @@ -210,6 +388,7 @@ def convertRSStoActivityPub(baseDir: str, httpPrefix: str, # save the post and update the index if savePost: + newswire[originalDateStr][6] = hashtags if saveJson(blog, filename): updateFeedsOutboxIndex(baseDir, domain, postId + '.json') diff --git a/tests.py b/tests.py index 082108b6..956d9efe 100644 --- a/tests.py +++ b/tests.py @@ -82,6 +82,8 @@ from content import removeHtmlTag from theme import setCSSparam from jsonldsig import testSignJsonld from jsonldsig import jsonldVerify +from newsdaemon import hashtagRuleTree +from newsdaemon import hasttagRuleResolve testServerAliceRunning = False testServerBobRunning = False @@ -2173,8 +2175,64 @@ def testRemoveHtmlTag(): "src=\"https://somesiteorother.com/image.jpg\">

" +def testHashtagRuleTree(): + print('testHashtagRuleTree') + operators = ('not', 'and', 'or') + + conditionsStr = '#foo or #bar' + tagsInConditions = [] + tree = hashtagRuleTree(operators, conditionsStr, tagsInConditions) + assert str(tree) == str(['or', ['#foo'], ['#bar']]) + assert str(tagsInConditions) == str(['#foo', '#bar']) + hashtags = ['#foo'] + assert hasttagRuleResolve(tree, hashtags) + hashtags = ['#carrot', '#stick'] + assert not hasttagRuleResolve(tree, hashtags) + + conditionsStr = 'x' + tagsInConditions = [] + tree = hashtagRuleTree(operators, conditionsStr, tagsInConditions) + assert tree is None + assert tagsInConditions == [] + hashtags = ['#foo'] + assert not hasttagRuleResolve(tree, hashtags) + + conditionsStr = '#x' + tagsInConditions = [] + tree = hashtagRuleTree(operators, conditionsStr, tagsInConditions) + assert str(tree) == str(['#x']) + assert str(tagsInConditions) == str(['#x']) + hashtags = ['#x'] + assert hasttagRuleResolve(tree, hashtags) + hashtags = ['#y', '#z'] + assert not hasttagRuleResolve(tree, hashtags) + + conditionsStr = 'not #b' + tagsInConditions = [] + tree = hashtagRuleTree(operators, conditionsStr, tagsInConditions) + assert str(tree) == str(['not', ['#b']]) + assert str(tagsInConditions) == str(['#b']) + hashtags = ['#y', '#z'] + assert hasttagRuleResolve(tree, hashtags) + hashtags = ['#a', '#b', '#c'] + assert not hasttagRuleResolve(tree, hashtags) + + conditionsStr = '#foo or #bar and #a' + tagsInConditions = [] + tree = hashtagRuleTree(operators, conditionsStr, tagsInConditions) + assert str(tree) == str(['and', ['or', ['#foo'], ['#bar']], ['#a']]) + assert str(tagsInConditions) == str(['#foo', '#bar', '#a']) + hashtags = ['#bar', '#a'] + assert hasttagRuleResolve(tree, hashtags) + hashtags = ['#foo', '#a'] + assert hasttagRuleResolve(tree, hashtags) + hashtags = ['#x', '#a'] + assert not hasttagRuleResolve(tree, hashtags) + + def runAllTests(): print('Running tests...') + testHashtagRuleTree() testRemoveHtmlTag() testReplaceEmailQuote() testConstantTimeStringCheck()