From 5bd133ffff591df024a26fe3671647c310535318 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Fri, 16 Oct 2020 20:25:55 +0100 Subject: [PATCH 001/351] Tidying of newswire dictionary creation --- newswire.py | 50 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/newswire.py b/newswire.py index bd964580b..f145eb866 100644 --- a/newswire.py +++ b/newswire.py @@ -52,6 +52,17 @@ def rss2Footer() -> str: return rssStr +def addNewswireDictEntry(newswire: {}, dateStr: str, + title: str, link: str, + votesStatus: str, postFilename: str, + description: str, moderated: bool) -> None: + """Update the newswire dictionary + """ + newswire[dateStr] = [title, link, + votesStatus, postFilename, + description, moderated] + + def xml2StrToDict(baseDir: str, xmlStr: str, moderated: bool, maxPostsPerSource: int) -> {}: """Converts an xml 2.0 string to a dictionary @@ -97,9 +108,10 @@ def xml2StrToDict(baseDir: str, xmlStr: str, moderated: bool, datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S %z") postFilename = '' votesStatus = [] - result[str(publishedDate)] = [title, link, - votesStatus, postFilename, - description, moderated] + addNewswireDictEntry(result, str(publishedDate), + title, link, + votesStatus, postFilename, + description, moderated) postCtr += 1 if postCtr >= maxPostsPerSource: break @@ -112,10 +124,10 @@ def xml2StrToDict(baseDir: str, xmlStr: str, moderated: bool, datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S UT") postFilename = '' votesStatus = [] - result[str(publishedDate) + '+00:00'] = \ - [title, link, - votesStatus, postFilename, - description, moderated] + addNewswireDictEntry(result, str(publishedDate) + '+00:00', + title, link, + votesStatus, postFilename, + description, moderated) postCtr += 1 if postCtr >= maxPostsPerSource: break @@ -171,9 +183,10 @@ def atomFeedToDict(baseDir: str, xmlStr: str, moderated: bool, datetime.strptime(pubDate, "%Y-%m-%dT%H:%M:%SZ") postFilename = '' votesStatus = [] - result[str(publishedDate)] = [title, link, - votesStatus, postFilename, - description, moderated] + addNewswireDictEntry(result, str(publishedDate), + title, link, + votesStatus, postFilename, + description, moderated) postCtr += 1 if postCtr >= maxPostsPerSource: break @@ -186,10 +199,10 @@ def atomFeedToDict(baseDir: str, xmlStr: str, moderated: bool, datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S UT") postFilename = '' votesStatus = [] - result[str(publishedDate) + '+00:00'] = \ - [title, link, - votesStatus, postFilename, - description, moderated] + addNewswireDictEntry(result, str(publishedDate) + '+00:00', + title, link, + votesStatus, postFilename, + description, moderated) postCtr += 1 if postCtr >= maxPostsPerSource: break @@ -363,10 +376,11 @@ def addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str, if os.path.isfile(fullPostFilename + '.votes'): votes = loadJson(fullPostFilename + '.votes') description = '' - newswire[published] = \ - [postJsonObject['object']['summary'], - postJsonObject['object']['url'], votes, - fullPostFilename, description, moderated] + addNewswireDictEntry(newswire, published, + postJsonObject['object']['summary'], + postJsonObject['object']['url'], + votes, fullPostFilename, + description, moderated) ctr += 1 if ctr >= maxBlogsPerAccount: From a60491585e95ae8084451dbc88283cbf9ef01045 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Fri, 16 Oct 2020 20:49:34 +0100 Subject: [PATCH 002/351] Extract hashtags from feeds --- content.py | 7 ++++++- newswire.py | 22 +++++++++++++++++++++- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/content.py b/content.py index 8b140c1f4..c2d19f6ee 100644 --- a/content.py +++ b/content.py @@ -701,7 +701,12 @@ def addHtmlTags(baseDir: str, httpPrefix: str, content = content.replace('\r', '') content = content.replace('\n', ' --linebreak-- ') content = addMusicTag(content, 'nowplaying') - words = content.replace(',', ' ').replace(';', ' ').split(' ') + contentSimplified = \ + content.replace(',', ' ').replace(';', ' ').replace('- ', ' ') + contentSimplified = contentSimplified.replace('. ', ' ').strip() + if contentSimplified.endswith('.'): + contentSimplified = contentSimplified[:len(contentSimplified)-1] + words = contentSimplified.split(' ') # remove . for words which are not mentions newWords = [] diff --git a/newswire.py b/newswire.py index f145eb866..bec28b701 100644 --- a/newswire.py +++ b/newswire.py @@ -52,6 +52,25 @@ def rss2Footer() -> str: return rssStr +def getNewswireTags(text: str) -> []: + """Returns a list of hashtags found in the given text + """ + if ' ' not in text: + return [] + textSimplified = \ + text.replace(',', ' ').replace(';', ' ').replace('- ', ' ') + textSimplified = textSimplified.replace('. ', ' ').strip() + if textSimplified.endswith('.'): + textSimplified = textSimplified[:len(textSimplified)-1] + words = textSimplified.split(' ') + tags = [] + for wrd in words: + if wrd.startswith('#'): + if wrd not in tags: + tags.append(wrd) + return tags + + def addNewswireDictEntry(newswire: {}, dateStr: str, title: str, link: str, votesStatus: str, postFilename: str, @@ -60,7 +79,8 @@ def addNewswireDictEntry(newswire: {}, dateStr: str, """ newswire[dateStr] = [title, link, votesStatus, postFilename, - description, moderated] + description, moderated, + getNewswireTags(title + ' ' + description)] def xml2StrToDict(baseDir: str, xmlStr: str, moderated: bool, From a5d21852bbf3c64684f008bb05db6d7da7b65d76 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Fri, 16 Oct 2020 20:52:27 +0100 Subject: [PATCH 003/351] Avoid lone hashes --- newswire.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/newswire.py b/newswire.py index bec28b701..57f992f9f 100644 --- a/newswire.py +++ b/newswire.py @@ -66,8 +66,9 @@ def getNewswireTags(text: str) -> []: tags = [] for wrd in words: if wrd.startswith('#'): - if wrd not in tags: - tags.append(wrd) + if len(wrd) > 1: + if wrd not in tags: + tags.append(wrd) return tags From af3b7baf127a12aab4ccdf03647863f082a29852 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Fri, 16 Oct 2020 21:13:23 +0100 Subject: [PATCH 004/351] Extract hashtags from newswire feeds --- content.py | 2 +- newswire.py | 37 +++++++++++++++++++++++++++++++++---- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/content.py b/content.py index c2d19f6ee..34598bb82 100644 --- a/content.py +++ b/content.py @@ -374,7 +374,7 @@ def addHashTags(wordStr: str, httpPrefix: str, domain: str, hashtagUrl = httpPrefix + "://" + domain + "/tags/" + hashtag postHashtags[hashtag] = { 'href': hashtagUrl, - 'name': '#'+hashtag, + 'name': '#' + hashtag, 'type': 'Hashtag' } replaceHashTags[wordStr] = " []: def addNewswireDictEntry(newswire: {}, dateStr: str, title: str, link: str, votesStatus: str, postFilename: str, - description: str, moderated: bool) -> None: + description: str, moderated: bool, + tags=[]) -> None: """Update the newswire dictionary """ + if not tags: + tags = getNewswireTags(title + ' ' + description) newswire[dateStr] = [title, link, votesStatus, postFilename, - description, moderated, - getNewswireTags(title + ' ' + description)] + description, moderated, tags] def xml2StrToDict(baseDir: str, xmlStr: str, moderated: bool, @@ -340,6 +342,32 @@ def isaBlogPost(postJsonObject: {}) -> bool: return False +def getHashtagsFromPost(postJsonObject: {}) -> []: + """Returns a list of any hashtags within a post + """ + if not postJsonObject.get('object'): + return [] + if not isinstance(postJsonObject['object'], dict): + return [] + if not postJsonObject['object'].get('tag'): + return [] + if not isinstance(postJsonObject['object']['tag'], dict): + return [] + tags = [] + for tg in postJsonObject['object']['tag'].items(): + if not isinstance(tg, dict): + continue + if not tg.get('name'): + continue + if not tg.get('type'): + continue + if tg['type'] != 'Hashtag': + continue + if tg['name'] not in tags: + tags.append(tg['name']) + return tags + + def addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str, newswire: {}, maxBlogsPerAccount: int, @@ -401,7 +429,8 @@ def addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str, postJsonObject['object']['summary'], postJsonObject['object']['url'], votes, fullPostFilename, - description, moderated) + description, moderated, + getHashtagsFromPost(postJsonObject)) ctr += 1 if ctr >= maxBlogsPerAccount: From 5ed417138a5a8d138d86613dc7f779445729f8b2 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Fri, 16 Oct 2020 21:46:34 +0100 Subject: [PATCH 005/351] Quick check for hashtags --- newswire.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/newswire.py b/newswire.py index fc07eca28..40a33c127 100644 --- a/newswire.py +++ b/newswire.py @@ -55,6 +55,8 @@ def rss2Footer() -> str: def getNewswireTags(text: str) -> []: """Returns a list of hashtags found in the given text """ + if '#' not in text: + return [] if ' ' not in text: return [] textSimplified = \ From 18034d20fe2dab348cfe46a2922a2275f32d494e Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Fri, 16 Oct 2020 22:33:18 +0100 Subject: [PATCH 006/351] Prepare for hashtag processing --- newsdaemon.py | 72 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 55 insertions(+), 17 deletions(-) diff --git a/newsdaemon.py b/newsdaemon.py index 0e861bf06..54dd2a835 100644 --- a/newsdaemon.py +++ b/newsdaemon.py @@ -11,6 +11,7 @@ import time import datetime from collections import OrderedDict from newswire import getDictFromNewswire +# from posts import sendSignedJson from posts import createNewsPost from content import removeHtmlTag from content import dangerousMarkup @@ -70,13 +71,38 @@ def removeControlCharacters(content: str) -> str: return content +def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {}, + hashtags: str, httpPrefix: str, + domain: str, port: int, + personCache: {}, + cachedWebfingers: {}, + federationList: [], + sendThreads: [], postLog: []) -> bool: + """Applies hashtag rules to a news post. + Returns true if the post should be saved to the news timeline + of this instance + """ + # TODO + # If routing to another instance + # sendSignedJson(postJsonObject: {}, session, baseDir: str, + # nickname: str, domain: str, port: int, + # toNickname: str, toDomain: str, toPort: int, cc: str, + # httpPrefix: str, False, False, + # federationList: [], + # sendThreads: [], postLog: [], cachedWebfingers: {}, + # personCache: {}, False, __version__) -> int: + return True + + def convertRSStoActivityPub(baseDir: str, httpPrefix: str, domain: str, port: int, newswire: {}, translate: {}, recentPostsCache: {}, maxRecentPosts: int, session, cachedWebfingers: {}, - personCache: {}) -> None: + personCache: {}, + federationList: [], + sendThreads: [], postLog: []) -> None: """Converts rss items in a newswire into posts """ basePath = baseDir + '/accounts/news@' + domain + '/outbox' @@ -175,24 +201,33 @@ def convertRSStoActivityPub(baseDir: str, httpPrefix: str, moderated = item[5] + hashtags = item[6] + savePost = newswireHashtagProcessing(session, baseDir, blog, hashtags, + httpPrefix, domain, port, + personCache, cachedWebfingers, + federationList, + sendThreads, postLog) + # save the post and update the index - if saveJson(blog, filename): - updateFeedsOutboxIndex(baseDir, domain, postId + '.json') + if savePost: + if saveJson(blog, filename): + updateFeedsOutboxIndex(baseDir, domain, postId + '.json') - # Save a file containing the time when the post arrived - # this can then later be used to construct the news timeline - # excluding items during the voting period - if moderated: - saveArrivedTime(baseDir, filename, blog['object']['arrived']) - else: - if os.path.isfile(filename + '.arrived'): - os.remove(filename + '.arrived') + # Save a file containing the time when the post arrived + # this can then later be used to construct the news timeline + # excluding items during the voting period + if moderated: + saveArrivedTime(baseDir, filename, + blog['object']['arrived']) + else: + if os.path.isfile(filename + '.arrived'): + os.remove(filename + '.arrived') - # set the url - newswire[originalDateStr][1] = \ - '/users/news/statuses/' + statusNumber - # set the filename - newswire[originalDateStr][3] = filename + # set the url + newswire[originalDateStr][1] = \ + '/users/news/statuses/' + statusNumber + # set the filename + newswire[originalDateStr][3] = filename def mergeWithPreviousNewswire(oldNewswire: {}, newNewswire: {}) -> None: @@ -251,7 +286,10 @@ def runNewswireDaemon(baseDir: str, httpd, httpd.maxRecentPosts, httpd.session, httpd.cachedWebfingers, - httpd.personCache) + httpd.personCache, + httpd.federationList, + httpd.sendThreads, + httpd.postLog) print('Newswire feed converted to ActivityPub') # wait a while before the next feeds update From c691cff82df19116545d85d30bbdaefbe24a906a Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 17 Oct 2020 13:05:41 +0100 Subject: [PATCH 007/351] Add hashtag interpreter --- content.py | 1 + newsdaemon.py | 179 ++++++++++++++++++++++++++++++++++++++++++++++++++ tests.py | 58 ++++++++++++++++ 3 files changed, 238 insertions(+) diff --git a/content.py b/content.py index 34598bb82..6d619509c 100644 --- a/content.py +++ b/content.py @@ -353,6 +353,7 @@ def validHashTag(hashtag: str) -> bool: # long hashtags are not valid if len(hashtag) >= 32: return False + # TODO: this may need to be an international character set validChars = set('0123456789' + 'abcdefghijklmnopqrstuvwxyz' + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ') diff --git a/newsdaemon.py b/newsdaemon.py index 54dd2a835..ade72823b 100644 --- a/newsdaemon.py +++ b/newsdaemon.py @@ -15,6 +15,7 @@ from newswire import getDictFromNewswire from posts import createNewsPost from content import removeHtmlTag from content import dangerousMarkup +from content import validHashTag from utils import loadJson from utils import saveJson from utils import getStatusNumber @@ -71,6 +72,97 @@ def removeControlCharacters(content: str) -> str: return content +def hasttagRuleResolve(tree: [], hashtags: []) -> bool: + """Returns whether the tree for a hashtag rule evaluates to true or false + """ + if not tree: + return False + + if tree[0] == 'not': + if len(tree) == 2: + if isinstance(tree[1], str): + return tree[1] not in hashtags + elif isinstance(tree[1], list): + return not hasttagRuleResolve(tree[1], hashtags) + elif tree[0] == 'and': + if len(tree) == 3: + + firstArg = False + if isinstance(tree[1], str): + firstArg = (tree[1] in hashtags) + elif isinstance(tree[1], list): + firstArg = (hasttagRuleResolve(tree[1], hashtags)) + + secondArg = False + if isinstance(tree[2], str): + secondArg = (tree[2] in hashtags) + elif isinstance(tree[2], list): + secondArg = (hasttagRuleResolve(tree[2], hashtags)) + return firstArg and secondArg + elif tree[0] == 'or': + if len(tree) == 3: + + firstArg = False + if isinstance(tree[1], str): + firstArg = (tree[1] in hashtags) + elif isinstance(tree[1], list): + firstArg = (hasttagRuleResolve(tree[1], hashtags)) + + secondArg = False + if isinstance(tree[2], str): + secondArg = (tree[2] in hashtags) + elif isinstance(tree[2], list): + secondArg = (hasttagRuleResolve(tree[2], hashtags)) + return firstArg or secondArg + elif tree[0].startswith('#') and len(tree) == 1: + return tree[0] in hashtags + + return False + + +def hashtagRuleTree(operators: [], + conditionsStr: str, + tagsInConditions: []) -> []: + """Walks the tree + """ + if not operators and conditionsStr: + conditionsStr = conditionsStr.strip() + if conditionsStr.startswith('#') or conditionsStr in operators: + if conditionsStr.startswith('#'): + if conditionsStr not in tagsInConditions: + if ' ' not in conditionsStr: + tagsInConditions.append(conditionsStr) + return [conditionsStr.strip()] + else: + return None + if not operators or not conditionsStr: + return None + tree = None + conditionsStr = conditionsStr.strip() + if conditionsStr.startswith('#') or conditionsStr in operators: + if conditionsStr.startswith('#'): + if conditionsStr not in tagsInConditions: + if ' ' not in conditionsStr: + tagsInConditions.append(conditionsStr) + tree = [conditionsStr.strip()] + ctr = 0 + while ctr < len(operators): + op = operators[ctr] + if op not in conditionsStr: + ctr += 1 + continue + else: + tree = [op] + sections = conditionsStr.split(op) + for subConditionStr in sections: + result = hashtagRuleTree(operators[ctr + 1:], subConditionStr, + tagsInConditions) + if result: + tree.append(result) + break + return tree + + def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {}, hashtags: str, httpPrefix: str, domain: str, port: int, @@ -82,6 +174,90 @@ def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {}, Returns true if the post should be saved to the news timeline of this instance """ + rulesFilename = baseDir + '/accounts/hashtagrules.txt' + if not os.path.isfile(rulesFilename): + return True + rules = [] + with open(rulesFilename, "r") as f: + rules = f.readlines() + + domainFull = domain + if port: + if port != 80 and port != 443: + domainFull = domain + ':' + str(port) + + actionOccurred = False + operators = ('not', 'and', 'or') + for ruleStr in rules: + if not ruleStr: + continue + if not ruleStr.startswith('if '): + continue + if ' then ' not in ruleStr: + continue + conditionsStr = ruleStr.split('if ', 1)[1] + conditionsStr = conditionsStr.split(' then ')[0] + tagsInConditions = [] + tree = hashtagRuleTree(operators, conditionsStr, tagsInConditions) + # does the rule contain any hashtags? + if not tagsInConditions: + continue + if not hasttagRuleResolve(tree, hashtags): + continue + # the condition matches, so do something + actionStr = ruleStr.split(' then ')[1].strip() + + # add a hashtag + if actionStr.startswith('add '): + addHashtag = actionStr.split('add ', 1)[1].strip() + if addHashtag.startswith('#'): + if addHashtag not in hashtags: + hashtags.append(addHashtag) + htId = addHashtag.replace('#', '') + if validHashTag(htId): + hashtagUrl = \ + httpPrefix + "://" + domainFull + "/tags/" + htId + postJsonObject['object']['tag'][htId] = { + 'href': hashtagUrl, + 'name': addHashtag, + 'type': 'Hashtag' + } + hashtagHtml = \ + "#" + \ + htId + "" + content = postJsonObject['object']['content'] + if content.endswith('

'): + content = \ + content[:len(content) - len('

')] + \ + hashtagHtml + '

' + else: + content += hashtagHtml + postJsonObject['object']['content'] = content + actionOccurred = True + + # remove a hashtag + if actionStr.startswith('remove '): + rmHashtag = actionStr.split('remove ', 1)[1].strip() + if rmHashtag.startswith('#'): + if rmHashtag in hashtags: + hashtags.remove(rmHashtag) + htId = addHashtag.replace('#', '') + hashtagUrl = \ + httpPrefix + "://" + domainFull + "/tags/" + htId + hashtagHtml = \ + "#" + \ + htId + "" + content = postJsonObject['object']['content'] + if hashtagHtml in content: + postJsonObject['object']['content'] = \ + content.replace(hashtagHtml, '') + del postJsonObject['object']['tag'][htId] + actionOccurred = True + # TODO # If routing to another instance # sendSignedJson(postJsonObject: {}, session, baseDir: str, @@ -91,6 +267,8 @@ def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {}, # federationList: [], # sendThreads: [], postLog: [], cachedWebfingers: {}, # personCache: {}, False, __version__) -> int: + if actionOccurred: + return True return True @@ -210,6 +388,7 @@ def convertRSStoActivityPub(baseDir: str, httpPrefix: str, # save the post and update the index if savePost: + newswire[originalDateStr][6] = hashtags if saveJson(blog, filename): updateFeedsOutboxIndex(baseDir, domain, postId + '.json') diff --git a/tests.py b/tests.py index 082108b61..956d9efe5 100644 --- a/tests.py +++ b/tests.py @@ -82,6 +82,8 @@ from content import removeHtmlTag from theme import setCSSparam from jsonldsig import testSignJsonld from jsonldsig import jsonldVerify +from newsdaemon import hashtagRuleTree +from newsdaemon import hasttagRuleResolve testServerAliceRunning = False testServerBobRunning = False @@ -2173,8 +2175,64 @@ def testRemoveHtmlTag(): "src=\"https://somesiteorother.com/image.jpg\">

" +def testHashtagRuleTree(): + print('testHashtagRuleTree') + operators = ('not', 'and', 'or') + + conditionsStr = '#foo or #bar' + tagsInConditions = [] + tree = hashtagRuleTree(operators, conditionsStr, tagsInConditions) + assert str(tree) == str(['or', ['#foo'], ['#bar']]) + assert str(tagsInConditions) == str(['#foo', '#bar']) + hashtags = ['#foo'] + assert hasttagRuleResolve(tree, hashtags) + hashtags = ['#carrot', '#stick'] + assert not hasttagRuleResolve(tree, hashtags) + + conditionsStr = 'x' + tagsInConditions = [] + tree = hashtagRuleTree(operators, conditionsStr, tagsInConditions) + assert tree is None + assert tagsInConditions == [] + hashtags = ['#foo'] + assert not hasttagRuleResolve(tree, hashtags) + + conditionsStr = '#x' + tagsInConditions = [] + tree = hashtagRuleTree(operators, conditionsStr, tagsInConditions) + assert str(tree) == str(['#x']) + assert str(tagsInConditions) == str(['#x']) + hashtags = ['#x'] + assert hasttagRuleResolve(tree, hashtags) + hashtags = ['#y', '#z'] + assert not hasttagRuleResolve(tree, hashtags) + + conditionsStr = 'not #b' + tagsInConditions = [] + tree = hashtagRuleTree(operators, conditionsStr, tagsInConditions) + assert str(tree) == str(['not', ['#b']]) + assert str(tagsInConditions) == str(['#b']) + hashtags = ['#y', '#z'] + assert hasttagRuleResolve(tree, hashtags) + hashtags = ['#a', '#b', '#c'] + assert not hasttagRuleResolve(tree, hashtags) + + conditionsStr = '#foo or #bar and #a' + tagsInConditions = [] + tree = hashtagRuleTree(operators, conditionsStr, tagsInConditions) + assert str(tree) == str(['and', ['or', ['#foo'], ['#bar']], ['#a']]) + assert str(tagsInConditions) == str(['#foo', '#bar', '#a']) + hashtags = ['#bar', '#a'] + assert hasttagRuleResolve(tree, hashtags) + hashtags = ['#foo', '#a'] + assert hasttagRuleResolve(tree, hashtags) + hashtags = ['#x', '#a'] + assert not hasttagRuleResolve(tree, hashtags) + + def runAllTests(): print('Running tests...') + testHashtagRuleTree() testRemoveHtmlTag() testReplaceEmailQuote() testConstantTimeStringCheck() From 37fee435ecf78f7d3d81003a9dcff87b05d0bdb2 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 17 Oct 2020 13:08:20 +0100 Subject: [PATCH 008/351] Add a space to separate hashtags --- newsdaemon.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/newsdaemon.py b/newsdaemon.py index ade72823b..7aaa7befc 100644 --- a/newsdaemon.py +++ b/newsdaemon.py @@ -223,7 +223,7 @@ def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {}, 'type': 'Hashtag' } hashtagHtml = \ - "#" + \ htId + "" @@ -253,8 +253,9 @@ def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {}, htId + "" content = postJsonObject['object']['content'] if hashtagHtml in content: - postJsonObject['object']['content'] = \ - content.replace(hashtagHtml, '') + content = \ + content.replace(hashtagHtml, '').replace(' ', ' ') + postJsonObject['object']['content'] = content del postJsonObject['object']['tag'][htId] actionOccurred = True From ae3b7819c32777cf42d8b6c2c72404acfb448140 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 17 Oct 2020 14:39:04 +0100 Subject: [PATCH 009/351] Store hashtags on incoming news posts --- newsdaemon.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/newsdaemon.py b/newsdaemon.py index 7aaa7befc..5f60cf099 100644 --- a/newsdaemon.py +++ b/newsdaemon.py @@ -19,6 +19,7 @@ from content import validHashTag from utils import loadJson from utils import saveJson from utils import getStatusNumber +from inbox import storeHashTags def updateFeedsOutboxIndex(baseDir: str, domain: str, postId: str) -> None: @@ -235,6 +236,7 @@ def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {}, else: content += hashtagHtml postJsonObject['object']['content'] = content + storeHashTags(baseDir, 'news', postJsonObject) actionOccurred = True # remove a hashtag @@ -390,6 +392,9 @@ def convertRSStoActivityPub(baseDir: str, httpPrefix: str, # save the post and update the index if savePost: newswire[originalDateStr][6] = hashtags + + storeHashTags(baseDir, 'news', blog) + if saveJson(blog, filename): updateFeedsOutboxIndex(baseDir, domain, postId + '.json') From 4c3106b1275621c9344b2e344907a1e504dec589 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 17 Oct 2020 14:40:36 +0100 Subject: [PATCH 010/351] Always true --- newsdaemon.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/newsdaemon.py b/newsdaemon.py index 5f60cf099..5d65dd812 100644 --- a/newsdaemon.py +++ b/newsdaemon.py @@ -270,8 +270,8 @@ def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {}, # federationList: [], # sendThreads: [], postLog: [], cachedWebfingers: {}, # personCache: {}, False, __version__) -> int: - if actionOccurred: - return True + # if actionOccurred: + # return True return True From 8f41ce4de117112e0f53643f601f77ea27ee999c Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 17 Oct 2020 14:41:20 +0100 Subject: [PATCH 011/351] Unused variable --- newsdaemon.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/newsdaemon.py b/newsdaemon.py index 5d65dd812..e18a9bad0 100644 --- a/newsdaemon.py +++ b/newsdaemon.py @@ -187,7 +187,7 @@ def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {}, if port != 80 and port != 443: domainFull = domain + ':' + str(port) - actionOccurred = False + # actionOccurred = False operators = ('not', 'and', 'or') for ruleStr in rules: if not ruleStr: @@ -237,7 +237,7 @@ def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {}, content += hashtagHtml postJsonObject['object']['content'] = content storeHashTags(baseDir, 'news', postJsonObject) - actionOccurred = True + # actionOccurred = True # remove a hashtag if actionStr.startswith('remove '): @@ -259,7 +259,7 @@ def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {}, content.replace(hashtagHtml, '').replace(' ', ' ') postJsonObject['object']['content'] = content del postJsonObject['object']['tag'][htId] - actionOccurred = True + # actionOccurred = True # TODO # If routing to another instance From 18f8643725d79ed07ac32480fe3600842d431583 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 17 Oct 2020 14:59:47 +0100 Subject: [PATCH 012/351] Add hashtags for incoming news posts --- newsdaemon.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/newsdaemon.py b/newsdaemon.py index e18a9bad0..61c2e9136 100644 --- a/newsdaemon.py +++ b/newsdaemon.py @@ -378,11 +378,33 @@ def convertRSStoActivityPub(baseDir: str, httpPrefix: str, httpPrefix + '://' + domain + '/@news/' + statusNumber blog['object']['published'] = dateStr + domainFull = domain + if port: + if port != 80 and port != 443: + domainFull = domain + ':' + str(port) + + hashtags = item[6] + for tagName in hashtags: + htId = tagName.replace('#', '') + hashtagUrl = \ + httpPrefix + "://" + domainFull + "/tags/" + htId + blog['object']['tag'][htId] = { + 'href': hashtagUrl, + 'name': tagName, + 'type': 'Hashtag' + } + if tagName in blog['object']['content']: + hashtagHtml = \ + "#" + \ + htId + "" + blog['object']['content'].replace(tagName, hashtagHtml) + postId = newPostId.replace('/', '#') moderated = item[5] - hashtags = item[6] savePost = newswireHashtagProcessing(session, baseDir, blog, hashtags, httpPrefix, domain, port, personCache, cachedWebfingers, From 7835f02122c57332c2c30aa69446286941645864 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 17 Oct 2020 15:23:35 +0100 Subject: [PATCH 013/351] Apply hashtag moderation to news feeds --- newswire.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/newswire.py b/newswire.py index 40a33c127..205afff25 100644 --- a/newswire.py +++ b/newswire.py @@ -18,6 +18,7 @@ from utils import saveJson from utils import isSuspended from utils import containsInvalidChars from blocking import isBlockedDomain +from blocking import isBlockedHashtag def rss2Header(httpPrefix: str, @@ -74,7 +75,7 @@ def getNewswireTags(text: str) -> []: return tags -def addNewswireDictEntry(newswire: {}, dateStr: str, +def addNewswireDictEntry(baseDir: str, newswire: {}, dateStr: str, title: str, link: str, votesStatus: str, postFilename: str, description: str, moderated: bool, @@ -83,9 +84,16 @@ def addNewswireDictEntry(newswire: {}, dateStr: str, """ if not tags: tags = getNewswireTags(title + ' ' + description) - newswire[dateStr] = [title, link, - votesStatus, postFilename, - description, moderated, tags] + newswireItemBlocked = False + if tags: + for tag in tags: + if isBlockedHashtag(baseDir, tag): + newswireItemBlocked = True + break + if not newswireItemBlocked: + newswire[dateStr] = [title, link, + votesStatus, postFilename, + description, moderated, tags] def xml2StrToDict(baseDir: str, xmlStr: str, moderated: bool, @@ -133,7 +141,7 @@ def xml2StrToDict(baseDir: str, xmlStr: str, moderated: bool, datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S %z") postFilename = '' votesStatus = [] - addNewswireDictEntry(result, str(publishedDate), + addNewswireDictEntry(baseDir, result, str(publishedDate), title, link, votesStatus, postFilename, description, moderated) @@ -149,7 +157,8 @@ def xml2StrToDict(baseDir: str, xmlStr: str, moderated: bool, datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S UT") postFilename = '' votesStatus = [] - addNewswireDictEntry(result, str(publishedDate) + '+00:00', + addNewswireDictEntry(baseDir, result, + str(publishedDate) + '+00:00', title, link, votesStatus, postFilename, description, moderated) @@ -208,7 +217,7 @@ def atomFeedToDict(baseDir: str, xmlStr: str, moderated: bool, datetime.strptime(pubDate, "%Y-%m-%dT%H:%M:%SZ") postFilename = '' votesStatus = [] - addNewswireDictEntry(result, str(publishedDate), + addNewswireDictEntry(baseDir, result, str(publishedDate), title, link, votesStatus, postFilename, description, moderated) @@ -224,7 +233,8 @@ def atomFeedToDict(baseDir: str, xmlStr: str, moderated: bool, datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S UT") postFilename = '' votesStatus = [] - addNewswireDictEntry(result, str(publishedDate) + '+00:00', + addNewswireDictEntry(baseDir, result, + str(publishedDate) + '+00:00', title, link, votesStatus, postFilename, description, moderated) @@ -427,7 +437,7 @@ def addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str, if os.path.isfile(fullPostFilename + '.votes'): votes = loadJson(fullPostFilename + '.votes') description = '' - addNewswireDictEntry(newswire, published, + addNewswireDictEntry(baseDir, newswire, published, postJsonObject['object']['summary'], postJsonObject['object']['url'], votes, fullPostFilename, From 96221bf86c44ef3d074cf47edc3f1fdf2f4e0919 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 17 Oct 2020 15:28:42 +0100 Subject: [PATCH 014/351] Tidying --- newswire.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/newswire.py b/newswire.py index 205afff25..bb63b06fd 100644 --- a/newswire.py +++ b/newswire.py @@ -91,9 +91,15 @@ def addNewswireDictEntry(baseDir: str, newswire: {}, dateStr: str, newswireItemBlocked = True break if not newswireItemBlocked: - newswire[dateStr] = [title, link, - votesStatus, postFilename, - description, moderated, tags] + newswire[dateStr] = [ + title, + link, + votesStatus, + postFilename, + description, + moderated, + tags + ] def xml2StrToDict(baseDir: str, xmlStr: str, moderated: bool, From c536a8a0c860d1a9685ef0a3be1b6c51e4ba2215 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 17 Oct 2020 15:42:23 +0100 Subject: [PATCH 015/351] Remove hash --- newswire.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/newswire.py b/newswire.py index bb63b06fd..c1009c85d 100644 --- a/newswire.py +++ b/newswire.py @@ -87,7 +87,7 @@ def addNewswireDictEntry(baseDir: str, newswire: {}, dateStr: str, newswireItemBlocked = False if tags: for tag in tags: - if isBlockedHashtag(baseDir, tag): + if isBlockedHashtag(baseDir, tag.replace('#', '')): newswireItemBlocked = True break if not newswireItemBlocked: From 22fcc7be069879b2d1cbf8b0ca705a8eec7b4f80 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 17 Oct 2020 17:08:07 +0100 Subject: [PATCH 016/351] Apply word filter to newswire --- newsdaemon.py | 2 +- newswire.py | 53 ++++++++++++++++++++++++++++++++++----------------- 2 files changed, 36 insertions(+), 19 deletions(-) diff --git a/newsdaemon.py b/newsdaemon.py index 61c2e9136..c829cb5f3 100644 --- a/newsdaemon.py +++ b/newsdaemon.py @@ -468,7 +468,7 @@ def runNewswireDaemon(baseDir: str, httpd, newNewswire = None try: newNewswire = \ - getDictFromNewswire(httpd.session, baseDir, + getDictFromNewswire(httpd.session, baseDir, domain, httpd.maxNewswirePostsPerSource, httpd.maxNewswireFeedSizeKb) except Exception as e: diff --git a/newswire.py b/newswire.py index c1009c85d..089391be5 100644 --- a/newswire.py +++ b/newswire.py @@ -19,6 +19,7 @@ from utils import isSuspended from utils import containsInvalidChars from blocking import isBlockedDomain from blocking import isBlockedHashtag +from filters import isFiltered def rss2Header(httpPrefix: str, @@ -75,15 +76,19 @@ def getNewswireTags(text: str) -> []: return tags -def addNewswireDictEntry(baseDir: str, newswire: {}, dateStr: str, +def addNewswireDictEntry(baseDir: str, domain: str, + newswire: {}, dateStr: str, title: str, link: str, votesStatus: str, postFilename: str, description: str, moderated: bool, tags=[]) -> None: """Update the newswire dictionary """ + allText = title + ' ' + description + if isFiltered(baseDir, 'news', domain, allText): + return if not tags: - tags = getNewswireTags(title + ' ' + description) + tags = getNewswireTags(allText) newswireItemBlocked = False if tags: for tag in tags: @@ -102,7 +107,8 @@ def addNewswireDictEntry(baseDir: str, newswire: {}, dateStr: str, ] -def xml2StrToDict(baseDir: str, xmlStr: str, moderated: bool, +def xml2StrToDict(baseDir: str, domain: str, + xmlStr: str, moderated: bool, maxPostsPerSource: int) -> {}: """Converts an xml 2.0 string to a dictionary """ @@ -147,7 +153,8 @@ def xml2StrToDict(baseDir: str, xmlStr: str, moderated: bool, datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S %z") postFilename = '' votesStatus = [] - addNewswireDictEntry(baseDir, result, str(publishedDate), + addNewswireDictEntry(baseDir, domain, + result, str(publishedDate), title, link, votesStatus, postFilename, description, moderated) @@ -163,7 +170,8 @@ def xml2StrToDict(baseDir: str, xmlStr: str, moderated: bool, datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S UT") postFilename = '' votesStatus = [] - addNewswireDictEntry(baseDir, result, + addNewswireDictEntry(baseDir, domain, + result, str(publishedDate) + '+00:00', title, link, votesStatus, postFilename, @@ -178,7 +186,8 @@ def xml2StrToDict(baseDir: str, xmlStr: str, moderated: bool, return result -def atomFeedToDict(baseDir: str, xmlStr: str, moderated: bool, +def atomFeedToDict(baseDir: str, domain: str, + xmlStr: str, moderated: bool, maxPostsPerSource: int) -> {}: """Converts an atom feed string to a dictionary """ @@ -223,7 +232,8 @@ def atomFeedToDict(baseDir: str, xmlStr: str, moderated: bool, datetime.strptime(pubDate, "%Y-%m-%dT%H:%M:%SZ") postFilename = '' votesStatus = [] - addNewswireDictEntry(baseDir, result, str(publishedDate), + addNewswireDictEntry(baseDir, domain, + result, str(publishedDate), title, link, votesStatus, postFilename, description, moderated) @@ -239,7 +249,7 @@ def atomFeedToDict(baseDir: str, xmlStr: str, moderated: bool, datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S UT") postFilename = '' votesStatus = [] - addNewswireDictEntry(baseDir, result, + addNewswireDictEntry(baseDir, domain, result, str(publishedDate) + '+00:00', title, link, votesStatus, postFilename, @@ -254,18 +264,22 @@ def atomFeedToDict(baseDir: str, xmlStr: str, moderated: bool, return result -def xmlStrToDict(baseDir: str, xmlStr: str, moderated: bool, +def xmlStrToDict(baseDir: str, domain: str, + xmlStr: str, moderated: bool, maxPostsPerSource: int) -> {}: """Converts an xml string to a dictionary """ if 'rss version="2.0"' in xmlStr: - return xml2StrToDict(baseDir, xmlStr, moderated, maxPostsPerSource) + return xml2StrToDict(baseDir, domain, + xmlStr, moderated, maxPostsPerSource) elif 'xmlns="http://www.w3.org/2005/Atom"' in xmlStr: - return atomFeedToDict(baseDir, xmlStr, moderated, maxPostsPerSource) + return atomFeedToDict(baseDir, domain, + xmlStr, moderated, maxPostsPerSource) return {} -def getRSS(baseDir: str, session, url: str, moderated: bool, +def getRSS(baseDir: str, domain: str, + session, url: str, moderated: bool, maxPostsPerSource: int, maxFeedSizeKb: int) -> {}: """Returns an RSS url as a dict @@ -293,7 +307,8 @@ def getRSS(baseDir: str, session, url: str, moderated: bool, if result: if int(len(result.text) / 1024) < maxFeedSizeKb and \ not containsInvalidChars(result.text): - return xmlStrToDict(baseDir, result.text, moderated, + return xmlStrToDict(baseDir, domain, + result.text, moderated, maxPostsPerSource) else: print('WARN: feed is too large: ' + url) @@ -443,7 +458,8 @@ def addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str, if os.path.isfile(fullPostFilename + '.votes'): votes = loadJson(fullPostFilename + '.votes') description = '' - addNewswireDictEntry(baseDir, newswire, published, + addNewswireDictEntry(baseDir, domain, + newswire, published, postJsonObject['object']['summary'], postJsonObject['object']['url'], votes, fullPostFilename, @@ -455,7 +471,7 @@ def addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str, break -def addBlogsToNewswire(baseDir: str, newswire: {}, +def addBlogsToNewswire(baseDir: str, domain: str, newswire: {}, maxBlogsPerAccount: int) -> None: """Adds blogs from each user account into the newswire """ @@ -501,7 +517,7 @@ def addBlogsToNewswire(baseDir: str, newswire: {}, os.remove(newswireModerationFilename) -def getDictFromNewswire(session, baseDir: str, +def getDictFromNewswire(session, baseDir: str, domain: str, maxPostsPerSource: int, maxFeedSizeKb: int) -> {}: """Gets rss feeds as a dictionary from newswire file """ @@ -533,13 +549,14 @@ def getDictFromNewswire(session, baseDir: str, moderated = True url = url.replace('*', '').strip() - itemsList = getRSS(baseDir, session, url, moderated, + itemsList = getRSS(baseDir, domain, + session, url, moderated, maxPostsPerSource, maxFeedSizeKb) for dateStr, item in itemsList.items(): result[dateStr] = item # add blogs from each user account - addBlogsToNewswire(baseDir, result, maxPostsPerSource) + addBlogsToNewswire(baseDir, domain, result, maxPostsPerSource) # sort into chronological order, latest first sortedResult = OrderedDict(sorted(result.items(), reverse=True)) From 30f4a03b5c0bf5d4adedc8b1dbb1e66742faa1ed Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 17 Oct 2020 17:24:47 +0100 Subject: [PATCH 017/351] Blocking news posts with hashtag interpreter --- newsdaemon.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/newsdaemon.py b/newsdaemon.py index c829cb5f3..506ea03aa 100644 --- a/newsdaemon.py +++ b/newsdaemon.py @@ -261,6 +261,10 @@ def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {}, del postJsonObject['object']['tag'][htId] # actionOccurred = True + # Block this item + if actionStr.startswith('block') or actionStr.startswith('drop'): + return False + # TODO # If routing to another instance # sendSignedJson(postJsonObject: {}, session, baseDir: str, From aa1017b3a842e35a9532d1db54712921fc609f1b Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 17 Oct 2020 17:46:21 +0100 Subject: [PATCH 018/351] Typo --- newsdaemon.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/newsdaemon.py b/newsdaemon.py index 506ea03aa..d67732dc4 100644 --- a/newsdaemon.py +++ b/newsdaemon.py @@ -73,7 +73,7 @@ def removeControlCharacters(content: str) -> str: return content -def hasttagRuleResolve(tree: [], hashtags: []) -> bool: +def hashtagRuleResolve(tree: [], hashtags: []) -> bool: """Returns whether the tree for a hashtag rule evaluates to true or false """ if not tree: @@ -84,7 +84,7 @@ def hasttagRuleResolve(tree: [], hashtags: []) -> bool: if isinstance(tree[1], str): return tree[1] not in hashtags elif isinstance(tree[1], list): - return not hasttagRuleResolve(tree[1], hashtags) + return not hashtagRuleResolve(tree[1], hashtags) elif tree[0] == 'and': if len(tree) == 3: @@ -92,13 +92,13 @@ def hasttagRuleResolve(tree: [], hashtags: []) -> bool: if isinstance(tree[1], str): firstArg = (tree[1] in hashtags) elif isinstance(tree[1], list): - firstArg = (hasttagRuleResolve(tree[1], hashtags)) + firstArg = (hashtagRuleResolve(tree[1], hashtags)) secondArg = False if isinstance(tree[2], str): secondArg = (tree[2] in hashtags) elif isinstance(tree[2], list): - secondArg = (hasttagRuleResolve(tree[2], hashtags)) + secondArg = (hashtagRuleResolve(tree[2], hashtags)) return firstArg and secondArg elif tree[0] == 'or': if len(tree) == 3: @@ -107,13 +107,13 @@ def hasttagRuleResolve(tree: [], hashtags: []) -> bool: if isinstance(tree[1], str): firstArg = (tree[1] in hashtags) elif isinstance(tree[1], list): - firstArg = (hasttagRuleResolve(tree[1], hashtags)) + firstArg = (hashtagRuleResolve(tree[1], hashtags)) secondArg = False if isinstance(tree[2], str): secondArg = (tree[2] in hashtags) elif isinstance(tree[2], list): - secondArg = (hasttagRuleResolve(tree[2], hashtags)) + secondArg = (hashtagRuleResolve(tree[2], hashtags)) return firstArg or secondArg elif tree[0].startswith('#') and len(tree) == 1: return tree[0] in hashtags @@ -165,7 +165,7 @@ def hashtagRuleTree(operators: [], def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {}, - hashtags: str, httpPrefix: str, + hashtags: [], httpPrefix: str, domain: str, port: int, personCache: {}, cachedWebfingers: {}, @@ -203,7 +203,7 @@ def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {}, # does the rule contain any hashtags? if not tagsInConditions: continue - if not hasttagRuleResolve(tree, hashtags): + if not hashtagRuleResolve(tree, hashtags): continue # the condition matches, so do something actionStr = ruleStr.split(' then ')[1].strip() From f934f54528d098b3c337ab31c7b9442d69be5bfb Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 17 Oct 2020 18:36:10 +0100 Subject: [PATCH 019/351] Test for moderated feed --- newsdaemon.py | 37 ++++++++++++++++++----------- tests.py | 64 ++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 71 insertions(+), 30 deletions(-) diff --git a/newsdaemon.py b/newsdaemon.py index d67732dc4..a36f22c3d 100644 --- a/newsdaemon.py +++ b/newsdaemon.py @@ -73,7 +73,7 @@ def removeControlCharacters(content: str) -> str: return content -def hashtagRuleResolve(tree: [], hashtags: []) -> bool: +def hashtagRuleResolve(tree: [], hashtags: [], moderated: bool) -> bool: """Returns whether the tree for a hashtag rule evaluates to true or false """ if not tree: @@ -84,7 +84,7 @@ def hashtagRuleResolve(tree: [], hashtags: []) -> bool: if isinstance(tree[1], str): return tree[1] not in hashtags elif isinstance(tree[1], list): - return not hashtagRuleResolve(tree[1], hashtags) + return not hashtagRuleResolve(tree[1], hashtags, moderated) elif tree[0] == 'and': if len(tree) == 3: @@ -92,13 +92,13 @@ def hashtagRuleResolve(tree: [], hashtags: []) -> bool: if isinstance(tree[1], str): firstArg = (tree[1] in hashtags) elif isinstance(tree[1], list): - firstArg = (hashtagRuleResolve(tree[1], hashtags)) + firstArg = (hashtagRuleResolve(tree[1], hashtags, moderated)) secondArg = False if isinstance(tree[2], str): secondArg = (tree[2] in hashtags) elif isinstance(tree[2], list): - secondArg = (hashtagRuleResolve(tree[2], hashtags)) + secondArg = (hashtagRuleResolve(tree[2], hashtags, moderated)) return firstArg and secondArg elif tree[0] == 'or': if len(tree) == 3: @@ -107,28 +107,33 @@ def hashtagRuleResolve(tree: [], hashtags: []) -> bool: if isinstance(tree[1], str): firstArg = (tree[1] in hashtags) elif isinstance(tree[1], list): - firstArg = (hashtagRuleResolve(tree[1], hashtags)) + firstArg = (hashtagRuleResolve(tree[1], hashtags, moderated)) secondArg = False if isinstance(tree[2], str): secondArg = (tree[2] in hashtags) elif isinstance(tree[2], list): - secondArg = (hashtagRuleResolve(tree[2], hashtags)) + secondArg = (hashtagRuleResolve(tree[2], hashtags, moderated)) return firstArg or secondArg elif tree[0].startswith('#') and len(tree) == 1: return tree[0] in hashtags + elif tree[0].startswith('moderated'): + return moderated return False def hashtagRuleTree(operators: [], conditionsStr: str, - tagsInConditions: []) -> []: + tagsInConditions: [], + moderated: bool) -> []: """Walks the tree """ if not operators and conditionsStr: conditionsStr = conditionsStr.strip() - if conditionsStr.startswith('#') or conditionsStr in operators: + if conditionsStr.startswith('#') or \ + conditionsStr in operators or \ + conditionsStr == 'moderated': if conditionsStr.startswith('#'): if conditionsStr not in tagsInConditions: if ' ' not in conditionsStr: @@ -140,7 +145,9 @@ def hashtagRuleTree(operators: [], return None tree = None conditionsStr = conditionsStr.strip() - if conditionsStr.startswith('#') or conditionsStr in operators: + if conditionsStr.startswith('#') or \ + conditionsStr in operators or \ + conditionsStr == 'moderated': if conditionsStr.startswith('#'): if conditionsStr not in tagsInConditions: if ' ' not in conditionsStr: @@ -157,7 +164,7 @@ def hashtagRuleTree(operators: [], sections = conditionsStr.split(op) for subConditionStr in sections: result = hashtagRuleTree(operators[ctr + 1:], subConditionStr, - tagsInConditions) + tagsInConditions, moderated) if result: tree.append(result) break @@ -170,7 +177,8 @@ def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {}, personCache: {}, cachedWebfingers: {}, federationList: [], - sendThreads: [], postLog: []) -> bool: + sendThreads: [], postLog: [], + moderated: bool) -> bool: """Applies hashtag rules to a news post. Returns true if the post should be saved to the news timeline of this instance @@ -199,11 +207,12 @@ def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {}, conditionsStr = ruleStr.split('if ', 1)[1] conditionsStr = conditionsStr.split(' then ')[0] tagsInConditions = [] - tree = hashtagRuleTree(operators, conditionsStr, tagsInConditions) + tree = hashtagRuleTree(operators, conditionsStr, + tagsInConditions, moderated) # does the rule contain any hashtags? if not tagsInConditions: continue - if not hashtagRuleResolve(tree, hashtags): + if not hashtagRuleResolve(tree, hashtags, moderated): continue # the condition matches, so do something actionStr = ruleStr.split(' then ')[1].strip() @@ -413,7 +422,7 @@ def convertRSStoActivityPub(baseDir: str, httpPrefix: str, httpPrefix, domain, port, personCache, cachedWebfingers, federationList, - sendThreads, postLog) + sendThreads, postLog, moderated) # save the post and update the index if savePost: diff --git a/tests.py b/tests.py index 956d9efe5..e433fd837 100644 --- a/tests.py +++ b/tests.py @@ -83,7 +83,7 @@ from theme import setCSSparam from jsonldsig import testSignJsonld from jsonldsig import jsonldVerify from newsdaemon import hashtagRuleTree -from newsdaemon import hasttagRuleResolve +from newsdaemon import hashtagRuleResolve testServerAliceRunning = False testServerBobRunning = False @@ -2179,55 +2179,87 @@ def testHashtagRuleTree(): print('testHashtagRuleTree') operators = ('not', 'and', 'or') + moderated = True conditionsStr = '#foo or #bar' tagsInConditions = [] - tree = hashtagRuleTree(operators, conditionsStr, tagsInConditions) + tree = hashtagRuleTree(operators, conditionsStr, + tagsInConditions, moderated) assert str(tree) == str(['or', ['#foo'], ['#bar']]) assert str(tagsInConditions) == str(['#foo', '#bar']) hashtags = ['#foo'] - assert hasttagRuleResolve(tree, hashtags) + assert hashtagRuleResolve(tree, hashtags, moderated) hashtags = ['#carrot', '#stick'] - assert not hasttagRuleResolve(tree, hashtags) + assert not hashtagRuleResolve(tree, hashtags, moderated) + + moderated = False + conditionsStr = 'not moderated and #foo or #bar' + tagsInConditions = [] + tree = hashtagRuleTree(operators, conditionsStr, + tagsInConditions, moderated) + assert str(tree) == \ + str(['not', ['and', ['moderated'], ['or', ['#foo'], ['#bar']]]]) + assert str(tagsInConditions) == str(['#foo', '#bar']) + hashtags = ['#foo'] + assert hashtagRuleResolve(tree, hashtags, moderated) + hashtags = ['#carrot', '#stick'] + assert hashtagRuleResolve(tree, hashtags, moderated) + + moderated = True + conditionsStr = 'moderated and #foo or #bar' + tagsInConditions = [] + tree = hashtagRuleTree(operators, conditionsStr, + tagsInConditions, moderated) + assert str(tree) == \ + str(['and', ['moderated'], ['or', ['#foo'], ['#bar']]]) + assert str(tagsInConditions) == str(['#foo', '#bar']) + hashtags = ['#foo'] + assert hashtagRuleResolve(tree, hashtags, moderated) + hashtags = ['#carrot', '#stick'] + assert not hashtagRuleResolve(tree, hashtags, moderated) conditionsStr = 'x' tagsInConditions = [] - tree = hashtagRuleTree(operators, conditionsStr, tagsInConditions) + tree = hashtagRuleTree(operators, conditionsStr, + tagsInConditions, moderated) assert tree is None assert tagsInConditions == [] hashtags = ['#foo'] - assert not hasttagRuleResolve(tree, hashtags) + assert not hashtagRuleResolve(tree, hashtags, moderated) conditionsStr = '#x' tagsInConditions = [] - tree = hashtagRuleTree(operators, conditionsStr, tagsInConditions) + tree = hashtagRuleTree(operators, conditionsStr, + tagsInConditions, moderated) assert str(tree) == str(['#x']) assert str(tagsInConditions) == str(['#x']) hashtags = ['#x'] - assert hasttagRuleResolve(tree, hashtags) + assert hashtagRuleResolve(tree, hashtags, moderated) hashtags = ['#y', '#z'] - assert not hasttagRuleResolve(tree, hashtags) + assert not hashtagRuleResolve(tree, hashtags, moderated) conditionsStr = 'not #b' tagsInConditions = [] - tree = hashtagRuleTree(operators, conditionsStr, tagsInConditions) + tree = hashtagRuleTree(operators, conditionsStr, + tagsInConditions, moderated) assert str(tree) == str(['not', ['#b']]) assert str(tagsInConditions) == str(['#b']) hashtags = ['#y', '#z'] - assert hasttagRuleResolve(tree, hashtags) + assert hashtagRuleResolve(tree, hashtags, moderated) hashtags = ['#a', '#b', '#c'] - assert not hasttagRuleResolve(tree, hashtags) + assert not hashtagRuleResolve(tree, hashtags, moderated) conditionsStr = '#foo or #bar and #a' tagsInConditions = [] - tree = hashtagRuleTree(operators, conditionsStr, tagsInConditions) + tree = hashtagRuleTree(operators, conditionsStr, + tagsInConditions, moderated) assert str(tree) == str(['and', ['or', ['#foo'], ['#bar']], ['#a']]) assert str(tagsInConditions) == str(['#foo', '#bar', '#a']) hashtags = ['#bar', '#a'] - assert hasttagRuleResolve(tree, hashtags) + assert hashtagRuleResolve(tree, hashtags, moderated) hashtags = ['#foo', '#a'] - assert hasttagRuleResolve(tree, hashtags) + assert hashtagRuleResolve(tree, hashtags, moderated) hashtags = ['#x', '#a'] - assert not hasttagRuleResolve(tree, hashtags) + assert not hashtagRuleResolve(tree, hashtags, moderated) def runAllTests(): From dd580be52f33f14266447cf7133c6ce3bbd87ebd Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 17 Oct 2020 19:49:43 +0100 Subject: [PATCH 020/351] Hashtag logic can test for post content --- newsdaemon.py | 63 +++++++++++++++++++++++++++++++++++++++------------ tests.py | 46 +++++++++++++++++++++++++------------ 2 files changed, 80 insertions(+), 29 deletions(-) diff --git a/newsdaemon.py b/newsdaemon.py index a36f22c3d..47018090e 100644 --- a/newsdaemon.py +++ b/newsdaemon.py @@ -73,7 +73,8 @@ def removeControlCharacters(content: str) -> str: return content -def hashtagRuleResolve(tree: [], hashtags: [], moderated: bool) -> bool: +def hashtagRuleResolve(tree: [], hashtags: [], moderated: bool, + content: str) -> bool: """Returns whether the tree for a hashtag rule evaluates to true or false """ if not tree: @@ -84,7 +85,22 @@ def hashtagRuleResolve(tree: [], hashtags: [], moderated: bool) -> bool: if isinstance(tree[1], str): return tree[1] not in hashtags elif isinstance(tree[1], list): - return not hashtagRuleResolve(tree[1], hashtags, moderated) + return not hashtagRuleResolve(tree[1], hashtags, moderated, + content) + elif tree[0] == 'contains': + if len(tree) == 2: + if isinstance(tree[1], str): + matchStr = tree[1] + if matchStr.startswith('"') and matchStr.endswith('"'): + matchStr = matchStr[1:] + matchStr = matchStr[:len(matchStr) - 1] + return matchStr in content + elif isinstance(tree[1], list): + matchStr = tree[1][0] + if matchStr.startswith('"') and matchStr.endswith('"'): + matchStr = matchStr[1:] + matchStr = matchStr[:len(matchStr) - 1] + return matchStr in content elif tree[0] == 'and': if len(tree) == 3: @@ -92,13 +108,15 @@ def hashtagRuleResolve(tree: [], hashtags: [], moderated: bool) -> bool: if isinstance(tree[1], str): firstArg = (tree[1] in hashtags) elif isinstance(tree[1], list): - firstArg = (hashtagRuleResolve(tree[1], hashtags, moderated)) + firstArg = (hashtagRuleResolve(tree[1], hashtags, moderated, + content)) secondArg = False if isinstance(tree[2], str): secondArg = (tree[2] in hashtags) elif isinstance(tree[2], list): - secondArg = (hashtagRuleResolve(tree[2], hashtags, moderated)) + secondArg = (hashtagRuleResolve(tree[2], hashtags, moderated, + content)) return firstArg and secondArg elif tree[0] == 'or': if len(tree) == 3: @@ -107,18 +125,22 @@ def hashtagRuleResolve(tree: [], hashtags: [], moderated: bool) -> bool: if isinstance(tree[1], str): firstArg = (tree[1] in hashtags) elif isinstance(tree[1], list): - firstArg = (hashtagRuleResolve(tree[1], hashtags, moderated)) + firstArg = (hashtagRuleResolve(tree[1], hashtags, moderated, + content)) secondArg = False if isinstance(tree[2], str): secondArg = (tree[2] in hashtags) elif isinstance(tree[2], list): - secondArg = (hashtagRuleResolve(tree[2], hashtags, moderated)) + secondArg = (hashtagRuleResolve(tree[2], hashtags, moderated, + content)) return firstArg or secondArg elif tree[0].startswith('#') and len(tree) == 1: return tree[0] in hashtags elif tree[0].startswith('moderated'): return moderated + elif tree[0].startswith('"') and tree[0].endswith('"'): + return True return False @@ -131,12 +153,15 @@ def hashtagRuleTree(operators: [], """ if not operators and conditionsStr: conditionsStr = conditionsStr.strip() - if conditionsStr.startswith('#') or \ + isStr = conditionsStr.startswith('"') and conditionsStr.endswith('"') + if conditionsStr.startswith('#') or isStr or \ conditionsStr in operators or \ - conditionsStr == 'moderated': + conditionsStr == 'moderated' or \ + conditionsStr == 'contains': if conditionsStr.startswith('#'): if conditionsStr not in tagsInConditions: - if ' ' not in conditionsStr: + if ' ' not in conditionsStr or \ + conditionsStr.startswith('"'): tagsInConditions.append(conditionsStr) return [conditionsStr.strip()] else: @@ -145,12 +170,15 @@ def hashtagRuleTree(operators: [], return None tree = None conditionsStr = conditionsStr.strip() - if conditionsStr.startswith('#') or \ + isStr = conditionsStr.startswith('"') and conditionsStr.endswith('"') + if conditionsStr.startswith('#') or isStr or \ conditionsStr in operators or \ - conditionsStr == 'moderated': + conditionsStr == 'moderated' or \ + conditionsStr == 'contains': if conditionsStr.startswith('#'): if conditionsStr not in tagsInConditions: - if ' ' not in conditionsStr: + if ' ' not in conditionsStr or \ + conditionsStr.startswith('"'): tagsInConditions.append(conditionsStr) tree = [conditionsStr.strip()] ctr = 0 @@ -195,8 +223,15 @@ def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {}, if port != 80 and port != 443: domainFull = domain + ':' + str(port) + # get the full text content of the post + content = '' + if postJsonObject['object'].get('content'): + content += postJsonObject['object']['content'] + if postJsonObject['object'].get('summary'): + content += ' ' + postJsonObject['object']['summary'] + # actionOccurred = False - operators = ('not', 'and', 'or') + operators = ('not', 'and', 'or', 'contains') for ruleStr in rules: if not ruleStr: continue @@ -212,7 +247,7 @@ def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {}, # does the rule contain any hashtags? if not tagsInConditions: continue - if not hashtagRuleResolve(tree, hashtags, moderated): + if not hashtagRuleResolve(tree, hashtags, moderated, content): continue # the condition matches, so do something actionStr = ruleStr.split(' then ')[1].strip() diff --git a/tests.py b/tests.py index e433fd837..9daf3fb28 100644 --- a/tests.py +++ b/tests.py @@ -2177,8 +2177,9 @@ def testRemoveHtmlTag(): def testHashtagRuleTree(): print('testHashtagRuleTree') - operators = ('not', 'and', 'or') + operators = ('not', 'and', 'or', 'contains') + content = 'This is a test' moderated = True conditionsStr = '#foo or #bar' tagsInConditions = [] @@ -2187,9 +2188,24 @@ def testHashtagRuleTree(): assert str(tree) == str(['or', ['#foo'], ['#bar']]) assert str(tagsInConditions) == str(['#foo', '#bar']) hashtags = ['#foo'] - assert hashtagRuleResolve(tree, hashtags, moderated) + assert hashtagRuleResolve(tree, hashtags, moderated, content) hashtags = ['#carrot', '#stick'] - assert not hashtagRuleResolve(tree, hashtags, moderated) + assert not hashtagRuleResolve(tree, hashtags, moderated, content) + + content = 'This is a test' + moderated = True + conditionsStr = 'contains "is a" and #foo or #bar' + tagsInConditions = [] + tree = hashtagRuleTree(operators, conditionsStr, + tagsInConditions, moderated) + assert str(tree) == \ + str(['and', ['contains', ['"is a"']], + ['or', ['#foo'], ['#bar']]]) + assert str(tagsInConditions) == str(['#foo', '#bar']) + hashtags = ['#foo'] + assert hashtagRuleResolve(tree, hashtags, moderated, content) + hashtags = ['#carrot', '#stick'] + assert not hashtagRuleResolve(tree, hashtags, moderated, content) moderated = False conditionsStr = 'not moderated and #foo or #bar' @@ -2200,9 +2216,9 @@ def testHashtagRuleTree(): str(['not', ['and', ['moderated'], ['or', ['#foo'], ['#bar']]]]) assert str(tagsInConditions) == str(['#foo', '#bar']) hashtags = ['#foo'] - assert hashtagRuleResolve(tree, hashtags, moderated) + assert hashtagRuleResolve(tree, hashtags, moderated, content) hashtags = ['#carrot', '#stick'] - assert hashtagRuleResolve(tree, hashtags, moderated) + assert hashtagRuleResolve(tree, hashtags, moderated, content) moderated = True conditionsStr = 'moderated and #foo or #bar' @@ -2213,9 +2229,9 @@ def testHashtagRuleTree(): str(['and', ['moderated'], ['or', ['#foo'], ['#bar']]]) assert str(tagsInConditions) == str(['#foo', '#bar']) hashtags = ['#foo'] - assert hashtagRuleResolve(tree, hashtags, moderated) + assert hashtagRuleResolve(tree, hashtags, moderated, content) hashtags = ['#carrot', '#stick'] - assert not hashtagRuleResolve(tree, hashtags, moderated) + assert not hashtagRuleResolve(tree, hashtags, moderated, content) conditionsStr = 'x' tagsInConditions = [] @@ -2224,7 +2240,7 @@ def testHashtagRuleTree(): assert tree is None assert tagsInConditions == [] hashtags = ['#foo'] - assert not hashtagRuleResolve(tree, hashtags, moderated) + assert not hashtagRuleResolve(tree, hashtags, moderated, content) conditionsStr = '#x' tagsInConditions = [] @@ -2233,9 +2249,9 @@ def testHashtagRuleTree(): assert str(tree) == str(['#x']) assert str(tagsInConditions) == str(['#x']) hashtags = ['#x'] - assert hashtagRuleResolve(tree, hashtags, moderated) + assert hashtagRuleResolve(tree, hashtags, moderated, content) hashtags = ['#y', '#z'] - assert not hashtagRuleResolve(tree, hashtags, moderated) + assert not hashtagRuleResolve(tree, hashtags, moderated, content) conditionsStr = 'not #b' tagsInConditions = [] @@ -2244,9 +2260,9 @@ def testHashtagRuleTree(): assert str(tree) == str(['not', ['#b']]) assert str(tagsInConditions) == str(['#b']) hashtags = ['#y', '#z'] - assert hashtagRuleResolve(tree, hashtags, moderated) + assert hashtagRuleResolve(tree, hashtags, moderated, content) hashtags = ['#a', '#b', '#c'] - assert not hashtagRuleResolve(tree, hashtags, moderated) + assert not hashtagRuleResolve(tree, hashtags, moderated, content) conditionsStr = '#foo or #bar and #a' tagsInConditions = [] @@ -2255,11 +2271,11 @@ def testHashtagRuleTree(): assert str(tree) == str(['and', ['or', ['#foo'], ['#bar']], ['#a']]) assert str(tagsInConditions) == str(['#foo', '#bar', '#a']) hashtags = ['#bar', '#a'] - assert hashtagRuleResolve(tree, hashtags, moderated) + assert hashtagRuleResolve(tree, hashtags, moderated, content) hashtags = ['#foo', '#a'] - assert hashtagRuleResolve(tree, hashtags, moderated) + assert hashtagRuleResolve(tree, hashtags, moderated, content) hashtags = ['#x', '#a'] - assert not hashtagRuleResolve(tree, hashtags, moderated) + assert not hashtagRuleResolve(tree, hashtags, moderated, content) def runAllTests(): From da9810e1867e1363f526cb14190e41122074c94e Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 17 Oct 2020 19:53:08 +0100 Subject: [PATCH 021/351] Example logic --- newsdaemon.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/newsdaemon.py b/newsdaemon.py index 47018090e..1f016e918 100644 --- a/newsdaemon.py +++ b/newsdaemon.py @@ -6,6 +6,11 @@ __maintainer__ = "Bob Mottram" __email__ = "bob@freedombone.net" __status__ = "Production" +# Example hashtag logic: +# +# if moderated and not #imcoxford then block +# if #pol and contains "westminster" then add #britpol + import os import time import datetime From 9cb5df52d54c15923aa0e46873afcbc737a51327 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 17 Oct 2020 20:04:39 +0100 Subject: [PATCH 022/351] Case insensitive matching for hashtag rules --- newsdaemon.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/newsdaemon.py b/newsdaemon.py index 1f016e918..9139064cd 100644 --- a/newsdaemon.py +++ b/newsdaemon.py @@ -99,13 +99,13 @@ def hashtagRuleResolve(tree: [], hashtags: [], moderated: bool, if matchStr.startswith('"') and matchStr.endswith('"'): matchStr = matchStr[1:] matchStr = matchStr[:len(matchStr) - 1] - return matchStr in content + return matchStr.lower() in content elif isinstance(tree[1], list): matchStr = tree[1][0] if matchStr.startswith('"') and matchStr.endswith('"'): matchStr = matchStr[1:] matchStr = matchStr[:len(matchStr) - 1] - return matchStr in content + return matchStr.lower() in content elif tree[0] == 'and': if len(tree) == 3: @@ -234,6 +234,7 @@ def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {}, content += postJsonObject['object']['content'] if postJsonObject['object'].get('summary'): content += ' ' + postJsonObject['object']['summary'] + content = content.lower() # actionOccurred = False operators = ('not', 'and', 'or', 'contains') From 651f2b41e40eeba8317c6dc7ae1c032d6e296a91 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 17 Oct 2020 20:06:56 +0100 Subject: [PATCH 023/351] Example hashtag rule --- newsdaemon.py | 1 + 1 file changed, 1 insertion(+) diff --git a/newsdaemon.py b/newsdaemon.py index 9139064cd..abc846903 100644 --- a/newsdaemon.py +++ b/newsdaemon.py @@ -10,6 +10,7 @@ __status__ = "Production" # # if moderated and not #imcoxford then block # if #pol and contains "westminster" then add #britpol +# if #unwantedtag then block import os import time From 996499a2231108ffe016f95262d6078ff36c3d94 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 17 Oct 2020 21:12:49 +0100 Subject: [PATCH 024/351] Edit filtered words for newswire --- daemon.py | 11 +++++++++++ webinterface.py | 16 ++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/daemon.py b/daemon.py index 86cb9be8f..69a154813 100644 --- a/daemon.py +++ b/daemon.py @@ -2973,6 +2973,17 @@ class PubServer(BaseHTTPRequestHandler): if os.path.isfile(newswireFilename): os.remove(newswireFilename) + # save filtered words list for the newswire + filterNewswireFilename = \ + baseDir + '/accounts/' + \ + 'news@' + domain + '/filters.txt' + if fields.get('filteredWordsNewswire'): + with open(filterNewswireFilename, 'w+') as filterfile: + filterfile.write(fields['filteredWordsNewswire']) + else: + if os.path.isfile(filterNewswireFilename): + os.remove(filterNewswireFilename) + newswireTrustedFilename = baseDir + '/accounts/newswiretrusted.txt' if fields.get('trustedNewswire'): newswireTrusted = fields['trustedNewswire'] diff --git a/webinterface.py b/webinterface.py index 14b4aa121..112124607 100644 --- a/webinterface.py +++ b/webinterface.py @@ -1345,6 +1345,22 @@ def htmlEditNewswire(translate: {}, baseDir: str, path: str, ' ' + filterStr = '' + filterFilename = \ + baseDir + '/accounts/news@' + domain + '/filters.txt' + if os.path.isfile(filterFilename): + with open(filterFilename, 'r') as filterfile: + filterStr = filterfile.read() + + editNewswireForm += \ + '
\n' + editNewswireForm += '
\n' + editNewswireForm += ' \n' + editNewswireForm += \ '' From b0a80dbb612953bff05a6a9004bb77756e77d6ce Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 17 Oct 2020 21:53:36 +0100 Subject: [PATCH 025/351] Conflicting domains --- newswire.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/newswire.py b/newswire.py index 089391be5..a6773f3f2 100644 --- a/newswire.py +++ b/newswire.py @@ -140,10 +140,10 @@ def xml2StrToDict(baseDir: str, domain: str, link = link.split('')[0] if '://' not in link: continue - domain = link.split('://')[1] - if '/' in domain: - domain = domain.split('/')[0] - if isBlockedDomain(baseDir, domain): + itemDomain = link.split('://')[1] + if '/' in itemDomain: + itemDomain = itemDomain.split('/')[0] + if isBlockedDomain(baseDir, itemDomain): continue pubDate = rssItem.split('')[1] pubDate = pubDate.split('')[0] @@ -219,10 +219,10 @@ def atomFeedToDict(baseDir: str, domain: str, link = link.split('')[0] if '://' not in link: continue - domain = link.split('://')[1] - if '/' in domain: - domain = domain.split('/')[0] - if isBlockedDomain(baseDir, domain): + itemDomain = link.split('://')[1] + if '/' in itemDomain: + itemDomain = itemDomain.split('/')[0] + if isBlockedDomain(baseDir, itemDomain): continue pubDate = rssItem.split('')[1] pubDate = pubDate.split('')[0] From b42d6f54a1fc757518498744acb7cd77657e1663 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 17 Oct 2020 22:23:46 +0100 Subject: [PATCH 026/351] Enter news processing rules from the newswire edit screen --- daemon.py | 10 ++++++++++ translations/ar.json | 4 +++- translations/ca.json | 4 +++- translations/cy.json | 4 +++- translations/de.json | 4 +++- translations/en.json | 4 +++- translations/es.json | 4 +++- translations/fr.json | 4 +++- translations/ga.json | 4 +++- translations/hi.json | 4 +++- translations/it.json | 4 +++- translations/ja.json | 4 +++- translations/oc.json | 4 +++- translations/pt.json | 4 +++- translations/ru.json | 4 +++- translations/zh.json | 4 +++- webinterface.py | 22 +++++++++++++++++++++- 17 files changed, 76 insertions(+), 16 deletions(-) diff --git a/daemon.py b/daemon.py index 69a154813..cd18ef8cb 100644 --- a/daemon.py +++ b/daemon.py @@ -2984,6 +2984,16 @@ class PubServer(BaseHTTPRequestHandler): if os.path.isfile(filterNewswireFilename): os.remove(filterNewswireFilename) + # save news tagging rules + hashtagRulesFilename = \ + baseDir + '/accounts/hashtagrules.txt' + if fields.get('hashtagRulesList'): + with open(hashtagRulesFilename, 'w+') as rulesfile: + rulesfile.write(fields['hashtagRulesList']) + else: + if os.path.isfile(hashtagRulesFilename): + os.remove(hashtagRulesFilename) + newswireTrustedFilename = baseDir + '/accounts/newswiretrusted.txt' if fields.get('trustedNewswire'): newswireTrusted = fields['trustedNewswire'] diff --git a/translations/ar.json b/translations/ar.json index de49c1547..4b89fe1a1 100644 --- a/translations/ar.json +++ b/translations/ar.json @@ -311,5 +311,7 @@ "Site Editors": "محررو الموقع", "Allow news posts": "السماح بنشر الأخبار", "Publish": "ينشر", - "Publish a news article": "انشر مقالة إخبارية" + "Publish a news article": "انشر مقالة إخبارية", + "News tagging rules": "قواعد وسم الأخبار", + "See instructions": "انظر التعليمات" } diff --git a/translations/ca.json b/translations/ca.json index 3499c9de7..d007be902 100644 --- a/translations/ca.json +++ b/translations/ca.json @@ -311,5 +311,7 @@ "Site Editors": "Editors de llocs", "Allow news posts": "Permet publicacions de notícies", "Publish": "Publica", - "Publish a news article": "Publicar un article de notícies" + "Publish a news article": "Publicar un article de notícies", + "News tagging rules": "Regles d'etiquetatge de notícies", + "See instructions": "Consulteu les instruccions" } diff --git a/translations/cy.json b/translations/cy.json index 2ab1795cc..555b01d0a 100644 --- a/translations/cy.json +++ b/translations/cy.json @@ -311,5 +311,7 @@ "Site Editors": "Golygyddion Safle", "Allow news posts": "Caniatáu swyddi newyddion", "Publish": "Cyhoeddi", - "Publish a news article": "Cyhoeddi erthygl newyddion" + "Publish a news article": "Cyhoeddi erthygl newyddion", + "News tagging rules": "Rheolau tagio newyddion", + "See instructions": "Gweler y cyfarwyddiadau" } diff --git a/translations/de.json b/translations/de.json index ceb6179e6..f1c88a27a 100644 --- a/translations/de.json +++ b/translations/de.json @@ -311,5 +311,7 @@ "Site Editors": "Site-Editoren", "Allow news posts": "Nachrichtenbeiträge zulassen", "Publish": "Veröffentlichen", - "Publish a news article": "Veröffentlichen Sie einen Nachrichtenartikel" + "Publish a news article": "Veröffentlichen Sie einen Nachrichtenartikel", + "News tagging rules": "Regeln für das Markieren von Nachrichten", + "See instructions": "Siehe Anweisungen" } diff --git a/translations/en.json b/translations/en.json index e5a65517c..ca15cb68b 100644 --- a/translations/en.json +++ b/translations/en.json @@ -311,5 +311,7 @@ "Site Editors": "Site Editors", "Allow news posts": "Allow news posts", "Publish": "Publish", - "Publish a news article": "Publish a news article" + "Publish a news article": "Publish a news article", + "News tagging rules": "News tagging rules", + "See instructions": "See instructions" } diff --git a/translations/es.json b/translations/es.json index 32c7ac5f8..453f97bf2 100644 --- a/translations/es.json +++ b/translations/es.json @@ -311,5 +311,7 @@ "Site Editors": "Editores del sitio", "Allow news posts": "Permitir publicaciones de noticias", "Publish": "Publicar", - "Publish a news article": "Publica un artículo de noticias" + "Publish a news article": "Publica un artículo de noticias", + "News tagging rules": "Reglas de etiquetado de noticias", + "See instructions": "Vea las instrucciones" } diff --git a/translations/fr.json b/translations/fr.json index 068971bde..a2774c54d 100644 --- a/translations/fr.json +++ b/translations/fr.json @@ -311,5 +311,7 @@ "Site Editors": "Éditeurs du site", "Allow news posts": "Autoriser les articles d'actualité", "Publish": "Publier", - "Publish a news article": "Publier un article de presse" + "Publish a news article": "Publier un article de presse", + "News tagging rules": "Règles de marquage des actualités", + "See instructions": "Voir les instructions" } diff --git a/translations/ga.json b/translations/ga.json index 547f6b56a..927c5d32a 100644 --- a/translations/ga.json +++ b/translations/ga.json @@ -311,5 +311,7 @@ "Site Editors": "Eagarthóirí Suímh", "Allow news posts": "Ceadaigh poist nuachta", "Publish": "Fhoilsiú", - "Publish a news article": "Foilsigh alt nuachta" + "Publish a news article": "Foilsigh alt nuachta", + "News tagging rules": "Rialacha clibeála nuachta", + "See instructions": "Féach na treoracha" } diff --git a/translations/hi.json b/translations/hi.json index 758a933ab..f3f64de1b 100644 --- a/translations/hi.json +++ b/translations/hi.json @@ -311,5 +311,7 @@ "Site Editors": "साइट संपादकों", "Allow news posts": "समाचार पोस्ट की अनुमति दें", "Publish": "प्रकाशित करना", - "Publish a news article": "एक समाचार लेख प्रकाशित करें" + "Publish a news article": "एक समाचार लेख प्रकाशित करें", + "News tagging rules": "समाचार टैगिंग नियम", + "See instructions": "निर्देश देखें" } diff --git a/translations/it.json b/translations/it.json index e103f50c9..d6d636fe6 100644 --- a/translations/it.json +++ b/translations/it.json @@ -311,5 +311,7 @@ "Site Editors": "Editori del sito", "Allow news posts": "Consenti post di notizie", "Publish": "Pubblicare", - "Publish a news article": "Pubblica un articolo di notizie" + "Publish a news article": "Pubblica un articolo di notizie", + "News tagging rules": "Regole di tagging delle notizie", + "See instructions": "Vedere le istruzioni" } diff --git a/translations/ja.json b/translations/ja.json index ac44cd3e0..a2bd44210 100644 --- a/translations/ja.json +++ b/translations/ja.json @@ -311,5 +311,7 @@ "Site Editors": "サイト編集者", "Allow news posts": "ニュース投稿を許可する", "Publish": "公開する", - "Publish a news article": "ニュース記事を公開する" + "Publish a news article": "ニュース記事を公開する", + "News tagging rules": "ニュースのタグ付けルール", + "See instructions": "手順を参照してください" } diff --git a/translations/oc.json b/translations/oc.json index 3e7c8007c..a8fb2e273 100644 --- a/translations/oc.json +++ b/translations/oc.json @@ -307,5 +307,7 @@ "Site Editors": "Site Editors", "Allow news posts": "Allow news posts", "Publish": "Publish", - "Publish a news article": "Publish a news article" + "Publish a news article": "Publish a news article", + "News tagging rules": "News tagging rules", + "See instructions": "See instructions" } diff --git a/translations/pt.json b/translations/pt.json index a894c6c02..8bdaec28a 100644 --- a/translations/pt.json +++ b/translations/pt.json @@ -311,5 +311,7 @@ "Site Editors": "Editores do site", "Allow news posts": "Permitir postagens de notícias", "Publish": "Publicar", - "Publish a news article": "Publique um artigo de notícias" + "Publish a news article": "Publique um artigo de notícias", + "News tagging rules": "Regras de marcação de notícias", + "See instructions": "Veja as instruções" } diff --git a/translations/ru.json b/translations/ru.json index 6ae4b03f5..e44288f5d 100644 --- a/translations/ru.json +++ b/translations/ru.json @@ -311,5 +311,7 @@ "Site Editors": "Редакторы сайта", "Allow news posts": "Разрешить публикации новостей", "Publish": "Публиковать", - "Publish a news article": "Опубликовать новостную статью" + "Publish a news article": "Опубликовать новостную статью", + "News tagging rules": "Правила тегирования новостей", + "See instructions": "См. Инструкции" } diff --git a/translations/zh.json b/translations/zh.json index d2a33c46b..0d937a752 100644 --- a/translations/zh.json +++ b/translations/zh.json @@ -311,5 +311,7 @@ "Site Editors": "网站编辑", "Allow news posts": "允许新闻发布", "Publish": "发布", - "Publish a news article": "发布新闻文章" + "Publish a news article": "发布新闻文章", + "News tagging rules": "新闻标记规则", + "See instructions": "见说明" } diff --git a/webinterface.py b/webinterface.py index 112124607..06c0a1091 100644 --- a/webinterface.py +++ b/webinterface.py @@ -1356,11 +1356,31 @@ def htmlEditNewswire(translate: {}, baseDir: str, path: str, '
\n' editNewswireForm += '
\n' + translate['One per line'] + '' editNewswireForm += ' \n' + hashtagRulesStr = '' + hashtagRulesFilename = \ + baseDir + '/accounts/hashtagrules.txt' + if os.path.isfile(hashtagRulesFilename): + with open(hashtagRulesFilename, 'r') as rulesfile: + hashtagRulesStr = rulesfile.read() + + editNewswireForm += \ + '
\n' + editNewswireForm += '
\n' + editNewswireForm += \ + ' ' + translate['See instructions'] + '\n' + editNewswireForm += ' \n' + editNewswireForm += \ '' From 086826b9a159f07ad140f107981b024228d6ab33 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 17 Oct 2020 22:40:03 +0100 Subject: [PATCH 027/351] Hashtag rules documentation --- hashtagrules.txt | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 hashtagrules.txt diff --git a/hashtagrules.txt b/hashtagrules.txt new file mode 100644 index 000000000..211b25452 --- /dev/null +++ b/hashtagrules.txt @@ -0,0 +1,26 @@ +Epicyon news rules processing +============================= + +As news arrives via RSS or Atom feeds it can be processed to add or remove hashtags, in accordance to some rules which you can define. + +On the newswire edit screen, available to moderators, you can define the news processing rules. There is one rule per line. + +A simple example is: + + if moderated and not #oxfordimc then block + +For moderated feeds this will only allow items through if they have the #oxfordimc hashtag. + +If you want to add hashtags an example is: + + if contains "garden" then add #gardening + +So if incoming news contains the word "garden" either in its title or description then it will automatically be assigned the hashtag #gardening. You can also add hashtags based upon other hashtags. + + if #garden or #lawn then add #gardening + +You can also remove hashtags. + + if #garden or #lawn then remove #gardening + +Which will remove #gardening if it exists as a hashtag within the news post. From 10d4010cd8ce7fd2e977ae3fab723d0ef52bf232 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 18 Oct 2020 10:19:38 +0100 Subject: [PATCH 028/351] Adding and removing tags --- newsdaemon.py | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/newsdaemon.py b/newsdaemon.py index abc846903..d6166c5d9 100644 --- a/newsdaemon.py +++ b/newsdaemon.py @@ -269,11 +269,23 @@ def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {}, if validHashTag(htId): hashtagUrl = \ httpPrefix + "://" + domainFull + "/tags/" + htId - postJsonObject['object']['tag'][htId] = { + newTag = { 'href': hashtagUrl, 'name': addHashtag, 'type': 'Hashtag' } + # does the tag already exist? + addTagObject = None + for t in postJsonObject['object']['tag']: + if t.get('type') and t.get('name'): + if t['type'] == 'Hashtag' and \ + t['name'] == addHashtag: + addTagObject = t + break + # append the tag if it wasn't found + if not addTagObject: + postJsonObject['object']['tag'].append(newTag) + # add corresponding html to the post content hashtagHtml = \ " Date: Sun, 18 Oct 2020 10:28:43 +0100 Subject: [PATCH 029/351] Getting tags from posts --- newsdaemon.py | 3 ++- newswire.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/newsdaemon.py b/newsdaemon.py index d6166c5d9..e6dc0c26a 100644 --- a/newsdaemon.py +++ b/newsdaemon.py @@ -464,11 +464,12 @@ def convertRSStoActivityPub(baseDir: str, httpPrefix: str, htId = tagName.replace('#', '') hashtagUrl = \ httpPrefix + "://" + domainFull + "/tags/" + htId - blog['object']['tag'][htId] = { + newTag = { 'href': hashtagUrl, 'name': tagName, 'type': 'Hashtag' } + blog['object']['tag'].append(newTag) if tagName in blog['object']['content']: hashtagHtml = \ " []: return [] if not postJsonObject['object'].get('tag'): return [] - if not isinstance(postJsonObject['object']['tag'], dict): + if not isinstance(postJsonObject['object']['tag'], list): return [] tags = [] - for tg in postJsonObject['object']['tag'].items(): + for tg in postJsonObject['object']['tag']: if not isinstance(tg, dict): continue if not tg.get('name'): From 43096d087a7e8d2f5b156734c5ec14c8cfdd76ca Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 18 Oct 2020 11:35:25 +0100 Subject: [PATCH 030/351] Remove condition --- newsdaemon.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/newsdaemon.py b/newsdaemon.py index e6dc0c26a..f3d8c08b8 100644 --- a/newsdaemon.py +++ b/newsdaemon.py @@ -251,9 +251,6 @@ def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {}, tagsInConditions = [] tree = hashtagRuleTree(operators, conditionsStr, tagsInConditions, moderated) - # does the rule contain any hashtags? - if not tagsInConditions: - continue if not hashtagRuleResolve(tree, hashtags, moderated, content): continue # the condition matches, so do something From 574dc9d97571abc87c2c567ba337fa93bf3648e0 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 18 Oct 2020 13:53:23 +0100 Subject: [PATCH 031/351] Indentation --- newsdaemon.py | 115 +++++++++++++++++++++++++------------------------- 1 file changed, 58 insertions(+), 57 deletions(-) diff --git a/newsdaemon.py b/newsdaemon.py index f3d8c08b8..9fbf54a63 100644 --- a/newsdaemon.py +++ b/newsdaemon.py @@ -262,39 +262,40 @@ def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {}, if addHashtag.startswith('#'): if addHashtag not in hashtags: hashtags.append(addHashtag) - htId = addHashtag.replace('#', '') - if validHashTag(htId): - hashtagUrl = \ - httpPrefix + "://" + domainFull + "/tags/" + htId - newTag = { - 'href': hashtagUrl, - 'name': addHashtag, - 'type': 'Hashtag' - } - # does the tag already exist? - addTagObject = None - for t in postJsonObject['object']['tag']: - if t.get('type') and t.get('name'): - if t['type'] == 'Hashtag' and \ - t['name'] == addHashtag: - addTagObject = t - break - # append the tag if it wasn't found - if not addTagObject: - postJsonObject['object']['tag'].append(newTag) - # add corresponding html to the post content - hashtagHtml = \ - " #" + \ - htId + "" - content = postJsonObject['object']['content'] - if content.endswith('

'): - content = \ - content[:len(content) - len('

')] + \ - hashtagHtml + '

' - else: - content += hashtagHtml + htId = addHashtag.replace('#', '') + if validHashTag(htId): + hashtagUrl = \ + httpPrefix + "://" + domainFull + "/tags/" + htId + newTag = { + 'href': hashtagUrl, + 'name': addHashtag, + 'type': 'Hashtag' + } + # does the tag already exist? + addTagObject = None + for t in postJsonObject['object']['tag']: + if t.get('type') and t.get('name'): + if t['type'] == 'Hashtag' and \ + t['name'] == addHashtag: + addTagObject = t + break + # append the tag if it wasn't found + if not addTagObject: + postJsonObject['object']['tag'].append(newTag) + # add corresponding html to the post content + hashtagHtml = \ + " #" + \ + htId + "" + content = postJsonObject['object']['content'] + if hashtagHtml not in content: + if content.endswith('

'): + content = \ + content[:len(content) - len('

')] + \ + hashtagHtml + '

' + else: + content += hashtagHtml postJsonObject['object']['content'] = content storeHashTags(baseDir, 'news', postJsonObject) # actionOccurred = True @@ -305,30 +306,30 @@ def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {}, if rmHashtag.startswith('#'): if rmHashtag in hashtags: hashtags.remove(rmHashtag) - htId = rmHashtag.replace('#', '') - hashtagUrl = \ - httpPrefix + "://" + domainFull + "/tags/" + htId - # remove tag html from the post content - hashtagHtml = \ - "#" + \ - htId + "" - content = postJsonObject['object']['content'] - if hashtagHtml in content: - content = \ - content.replace(hashtagHtml, '').replace(' ', ' ') - postJsonObject['object']['content'] = content - rmTagObject = None - for t in postJsonObject['object']['tag']: - if t.get('type') and t.get('name'): - if t['type'] == 'Hashtag' and \ - t['name'] == rmHashtag: - rmTagObject = t - break - if rmTagObject: - postJsonObject['object']['tag'].remove(rmTagObject) - # actionOccurred = True + htId = rmHashtag.replace('#', '') + hashtagUrl = \ + httpPrefix + "://" + domainFull + "/tags/" + htId + # remove tag html from the post content + hashtagHtml = \ + "#" + \ + htId + "" + content = postJsonObject['object']['content'] + if hashtagHtml in content: + content = \ + content.replace(hashtagHtml, '').replace(' ', ' ') + postJsonObject['object']['content'] = content + rmTagObject = None + for t in postJsonObject['object']['tag']: + if t.get('type') and t.get('name'): + if t['type'] == 'Hashtag' and \ + t['name'] == rmHashtag: + rmTagObject = t + break + if rmTagObject: + postJsonObject['object']['tag'].remove(rmTagObject) + # actionOccurred = True # Block this item if actionStr.startswith('block') or actionStr.startswith('drop'): From cd7221ddf2ffcf0d01d4124511e3d4d76126c7c3 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 18 Oct 2020 13:56:22 +0100 Subject: [PATCH 032/351] Indentation --- newsdaemon.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/newsdaemon.py b/newsdaemon.py index 9fbf54a63..66a398c79 100644 --- a/newsdaemon.py +++ b/newsdaemon.py @@ -296,9 +296,9 @@ def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {}, hashtagHtml + '

' else: content += hashtagHtml - postJsonObject['object']['content'] = content - storeHashTags(baseDir, 'news', postJsonObject) - # actionOccurred = True + postJsonObject['object']['content'] = content + storeHashTags(baseDir, 'news', postJsonObject) + # actionOccurred = True # remove a hashtag if actionStr.startswith('remove '): From b5541563eb800420fb318c665647398d926d929b Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 18 Oct 2020 14:31:50 +0100 Subject: [PATCH 033/351] Indentation --- newsdaemon.py | 56 +++++++++++++++++++++++++-------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/newsdaemon.py b/newsdaemon.py index 66a398c79..bc75e7bc8 100644 --- a/newsdaemon.py +++ b/newsdaemon.py @@ -271,34 +271,34 @@ def newswireHashtagProcessing(session, baseDir: str, postJsonObject: {}, 'name': addHashtag, 'type': 'Hashtag' } - # does the tag already exist? - addTagObject = None - for t in postJsonObject['object']['tag']: - if t.get('type') and t.get('name'): - if t['type'] == 'Hashtag' and \ - t['name'] == addHashtag: - addTagObject = t - break - # append the tag if it wasn't found - if not addTagObject: - postJsonObject['object']['tag'].append(newTag) - # add corresponding html to the post content - hashtagHtml = \ - " #" + \ - htId + "" - content = postJsonObject['object']['content'] - if hashtagHtml not in content: - if content.endswith('

'): - content = \ - content[:len(content) - len('

')] + \ - hashtagHtml + '

' - else: - content += hashtagHtml - postJsonObject['object']['content'] = content - storeHashTags(baseDir, 'news', postJsonObject) - # actionOccurred = True + # does the tag already exist? + addTagObject = None + for t in postJsonObject['object']['tag']: + if t.get('type') and t.get('name'): + if t['type'] == 'Hashtag' and \ + t['name'] == addHashtag: + addTagObject = t + break + # append the tag if it wasn't found + if not addTagObject: + postJsonObject['object']['tag'].append(newTag) + # add corresponding html to the post content + hashtagHtml = \ + " #" + \ + htId + "" + content = postJsonObject['object']['content'] + if hashtagHtml not in content: + if content.endswith('

'): + content = \ + content[:len(content) - len('

')] + \ + hashtagHtml + '

' + else: + content += hashtagHtml + postJsonObject['object']['content'] = content + storeHashTags(baseDir, 'news', postJsonObject) + # actionOccurred = True # remove a hashtag if actionStr.startswith('remove '): From 2132b01950e3f366875d578ab0bc22066b71f6cc Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 18 Oct 2020 16:10:36 +0100 Subject: [PATCH 034/351] Fix tag resolver --- newsdaemon.py | 68 +++++++++++++++++++++++++-------------------------- tests.py | 14 +++++++++++ 2 files changed, 47 insertions(+), 35 deletions(-) diff --git a/newsdaemon.py b/newsdaemon.py index bc75e7bc8..e8c41da09 100644 --- a/newsdaemon.py +++ b/newsdaemon.py @@ -108,39 +108,31 @@ def hashtagRuleResolve(tree: [], hashtags: [], moderated: bool, matchStr = matchStr[:len(matchStr) - 1] return matchStr.lower() in content elif tree[0] == 'and': - if len(tree) == 3: - - firstArg = False - if isinstance(tree[1], str): - firstArg = (tree[1] in hashtags) - elif isinstance(tree[1], list): - firstArg = (hashtagRuleResolve(tree[1], hashtags, moderated, - content)) - - secondArg = False - if isinstance(tree[2], str): - secondArg = (tree[2] in hashtags) - elif isinstance(tree[2], list): - secondArg = (hashtagRuleResolve(tree[2], hashtags, moderated, - content)) - return firstArg and secondArg + if len(tree) >= 3: + for argIndex in range(1, len(tree)): + argValue = False + if isinstance(tree[argIndex], str): + argValue = (tree[argIndex] in hashtags) + elif isinstance(tree[argIndex], list): + argValue = hashtagRuleResolve(tree[argIndex], + hashtags, moderated, + content) + if not argValue: + return False + return True elif tree[0] == 'or': - if len(tree) == 3: - - firstArg = False - if isinstance(tree[1], str): - firstArg = (tree[1] in hashtags) - elif isinstance(tree[1], list): - firstArg = (hashtagRuleResolve(tree[1], hashtags, moderated, - content)) - - secondArg = False - if isinstance(tree[2], str): - secondArg = (tree[2] in hashtags) - elif isinstance(tree[2], list): - secondArg = (hashtagRuleResolve(tree[2], hashtags, moderated, - content)) - return firstArg or secondArg + if len(tree) >= 3: + for argIndex in range(1, len(tree)): + argValue = False + if isinstance(tree[argIndex], str): + argValue = (tree[argIndex] in hashtags) + elif isinstance(tree[argIndex], list): + argValue = hashtagRuleResolve(tree[argIndex], + hashtags, moderated, + content) + if argValue: + return True + return False elif tree[0].startswith('#') and len(tree) == 1: return tree[0] in hashtags elif tree[0].startswith('moderated'): @@ -190,14 +182,20 @@ def hashtagRuleTree(operators: [], ctr = 0 while ctr < len(operators): op = operators[ctr] - if op not in conditionsStr: + opMatch = ' ' + op + ' ' + if opMatch not in conditionsStr and \ + not conditionsStr.startswith(op + ' '): ctr += 1 continue else: tree = [op] - sections = conditionsStr.split(op) + if opMatch in conditionsStr: + sections = conditionsStr.split(opMatch) + else: + sections = conditionsStr.split(op + ' ', 1) for subConditionStr in sections: - result = hashtagRuleTree(operators[ctr + 1:], subConditionStr, + result = hashtagRuleTree(operators[ctr + 1:], + subConditionStr, tagsInConditions, moderated) if result: tree.append(result) diff --git a/tests.py b/tests.py index 9daf3fb28..1f9d5b6ef 100644 --- a/tests.py +++ b/tests.py @@ -2179,6 +2179,18 @@ def testHashtagRuleTree(): print('testHashtagRuleTree') operators = ('not', 'and', 'or', 'contains') + moderated = True + conditionsStr = \ + 'contains "Cat" or contains "Corvid" or ' + \ + 'contains "Dormouse" or contains "Buzzard"' + tagsInConditions = [] + tree = hashtagRuleTree(operators, conditionsStr, + tagsInConditions, moderated) + assert str(tree) == str(['or', ['contains', ['"Cat"']], + ['contains', ['"Corvid"']], + ['contains', ['"Dormouse"']], + ['contains', ['"Buzzard"']]]) + content = 'This is a test' moderated = True conditionsStr = '#foo or #bar' @@ -2270,6 +2282,8 @@ def testHashtagRuleTree(): tagsInConditions, moderated) assert str(tree) == str(['and', ['or', ['#foo'], ['#bar']], ['#a']]) assert str(tagsInConditions) == str(['#foo', '#bar', '#a']) + hashtags = ['#foo', '#bar', '#a'] + assert hashtagRuleResolve(tree, hashtags, moderated, content) hashtags = ['#bar', '#a'] assert hashtagRuleResolve(tree, hashtags, moderated, content) hashtags = ['#foo', '#a'] From 20dd5e069a65c3c905b3bd5c485889754d035702 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 18 Oct 2020 17:19:28 +0100 Subject: [PATCH 035/351] Clear post from caches before updating --- newsdaemon.py | 2 ++ utils.py | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/newsdaemon.py b/newsdaemon.py index e8c41da09..b7a14d130 100644 --- a/newsdaemon.py +++ b/newsdaemon.py @@ -25,6 +25,7 @@ from content import validHashTag from utils import loadJson from utils import saveJson from utils import getStatusNumber +from utils import clearFromPostCaches from inbox import storeHashTags @@ -490,6 +491,7 @@ def convertRSStoActivityPub(baseDir: str, httpPrefix: str, storeHashTags(baseDir, 'news', blog) + clearFromPostCaches(baseDir, postId) if saveJson(blog, filename): updateFeedsOutboxIndex(baseDir, domain, postId + '.json') diff --git a/utils.py b/utils.py index 6ed6c1c62..d0ced0b32 100644 --- a/utils.py +++ b/utils.py @@ -587,6 +587,26 @@ def locateNewsArrival(baseDir: str, domain: str, return None +def clearFromPostCaches(baseDir: str, postId: str) -> None: + """Clears cached html for the given post, so that edits + to news will appear + """ + filename = postId + '.html' + for subdir, dirs, files in os.walk(baseDir + '/accounts'): + for acct in dirs: + if '@' not in acct: + continue + if 'inbox@' in acct: + continue + cacheDir = os.path.join(baseDir + '/accounts/postcache', acct) + postFilename = cacheDir + filename + if os.path.isfile(postFilename): + try: + os.remove(postFilename) + except BaseException: + pass + + def locatePost(baseDir: str, nickname: str, domain: str, postUrl: str, replies=False) -> str: """Returns the filename for the given status post url From 300692de194a2476c0e615822fe3f926b7d1c24e Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 18 Oct 2020 17:24:28 +0100 Subject: [PATCH 036/351] Clear post caches when editing news --- daemon.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/daemon.py b/daemon.py index cd18ef8cb..734cca576 100644 --- a/daemon.py +++ b/daemon.py @@ -164,6 +164,7 @@ from shares import getSharesFeedForPerson from shares import addShare from shares import removeShare from shares import expireShares +from utils import clearFromPostCaches from utils import containsInvalidChars from utils import isSystemAccount from utils import setConfigParam @@ -3124,13 +3125,6 @@ class PubServer(BaseHTTPRequestHandler): newsPostTitle postJsonObject['object']['content'] = \ newsPostContent - # remove the html from post cache - cachedPost = \ - baseDir + '/accounts/' + \ - nickname + '@' + domain + \ - '/postcache/' + newsPostUrl + '.html' - if os.path.isfile(cachedPost): - os.remove(cachedPost) # update newswire pubDate = postJsonObject['object']['published'] publishedDate = \ @@ -3149,6 +3143,12 @@ class PubServer(BaseHTTPRequestHandler): newswireStateFilename) except Exception as e: print('ERROR saving newswire state, ' + str(e)) + + # remove any previous cached news posts + newsId = \ + postJsonObject['object']['id'].replace('/', '#') + clearFromPostCaches(baseDir, newsId) + # save the news post saveJson(postJsonObject, postFilename) From 3f98e7eb75d5589ba124149909cdd9dfaefa6863 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 18 Oct 2020 18:58:24 +0100 Subject: [PATCH 037/351] Update url --- webinterface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/webinterface.py b/webinterface.py index 06c0a1091..90202604e 100644 --- a/webinterface.py +++ b/webinterface.py @@ -1375,7 +1375,7 @@ def htmlEditNewswire(translate: {}, baseDir: str, path: str, translate['One per line'] + '.\n' editNewswireForm += \ ' ' + translate['See instructions'] + '\n' editNewswireForm += '