From 0132674ea32cad1da047fed1a2107f54c60e7544 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Fri, 12 Jun 2020 12:50:49 +0100 Subject: [PATCH] Label accusatory posts --- inbox.py | 12 +++++--- posts.py | 9 ++++-- semantic.py | 67 ++++++++++++++++++++++++++++++++++++++++++++ tests.py | 10 +++++++ translations/ar.json | 8 +++++- translations/ca.json | 8 +++++- translations/cy.json | 8 +++++- translations/de.json | 8 +++++- translations/en.json | 8 +++++- translations/es.json | 8 +++++- translations/fr.json | 8 +++++- translations/ga.json | 8 +++++- translations/hi.json | 8 +++++- translations/it.json | 8 +++++- translations/ja.json | 8 +++++- translations/oc.json | 8 +++++- translations/pt.json | 8 +++++- translations/ru.json | 8 +++++- translations/zh.json | 8 +++++- webinterface.py | 2 +- 20 files changed, 197 insertions(+), 23 deletions(-) create mode 100644 semantic.py diff --git a/inbox.py b/inbox.py index a439d8041..156d33f62 100644 --- a/inbox.py +++ b/inbox.py @@ -62,6 +62,7 @@ from question import questionUpdateVotes from media import replaceYouTube from git import isGitPatch from git import receiveGitPatch +from semantic import labelAccusatoryPost def storeHashTags(baseDir: str, nickname: str, postJsonObject: {}) -> None: @@ -1316,7 +1317,7 @@ def receiveAnnounce(recentPostsCache: {}, httpPrefix: str, domain: str, onionDomain: str, port: int, sendThreads: [], postLog: [], cachedWebfingers: {}, personCache: {}, messageJson: {}, federationList: [], - debug: bool) -> bool: + debug: bool, translate: {}) -> bool: """Receives an announce activity within the POST section of HTTPServer """ if messageJson['type'] != 'Announce': @@ -1398,7 +1399,7 @@ def receiveAnnounce(recentPostsCache: {}, ' -> ' + messageJson['object']) postJsonObject = downloadAnnounce(session, baseDir, httpPrefix, nickname, domain, messageJson, - __version__) + __version__, translate) if postJsonObject: if debug: print('DEBUG: Announce post downloaded for ' + @@ -2007,6 +2008,8 @@ def inboxAfterCapabilities(recentPostsCache: {}, maxRecentPosts: int, print('DEBUG: Undo bookmark accepted from ' + actor) return False + labelAccusatoryPost(messageJson, translate) + if receiveAnnounce(recentPostsCache, session, handle, isGroup, baseDir, httpPrefix, @@ -2016,7 +2019,7 @@ def inboxAfterCapabilities(recentPostsCache: {}, maxRecentPosts: int, personCache, messageJson, federationList, - debug): + debug, translate): if debug: print('DEBUG: Announce accepted from ' + actor) @@ -2171,7 +2174,8 @@ def inboxAfterCapabilities(recentPostsCache: {}, maxRecentPosts: int, '/users/' + nickname + '/tlreplies') if isImageMedia(session, baseDir, httpPrefix, - nickname, domain, postJsonObject): + nickname, domain, postJsonObject, + translate): # media index will be updated updateIndexList.append('tlmedia') if isBlogPost(postJsonObject): diff --git a/posts.py b/posts.py index 994af9304..32f0b7b26 100644 --- a/posts.py +++ b/posts.py @@ -51,6 +51,7 @@ from config import getConfigParam from blocking import isBlocked from filters import isFiltered from git import convertPostToPatch +from semantic import labelAccusatoryPost # try: # from BeautifulSoup import BeautifulSoup # except ImportError: @@ -2306,14 +2307,14 @@ def isDM(postJsonObject: {}) -> bool: def isImageMedia(session, baseDir: str, httpPrefix: str, nickname: str, domain: str, - postJsonObject: {}) -> bool: + postJsonObject: {}, translate: {}) -> bool: """Returns true if the given post has attached image media """ if postJsonObject['type'] == 'Announce': postJsonAnnounce = \ downloadAnnounce(session, baseDir, httpPrefix, nickname, domain, postJsonObject, - __version__) + __version__, translate) if postJsonAnnounce: postJsonObject = postJsonAnnounce if postJsonObject['type'] != 'Create': @@ -2981,7 +2982,8 @@ def rejectAnnounce(announceFilename: str): def downloadAnnounce(session, baseDir: str, httpPrefix: str, nickname: str, domain: str, - postJsonObject: {}, projectVersion: str) -> {}: + postJsonObject: {}, projectVersion: str, + translate: {}) -> {}: """Download the post referenced by an announce """ if not postJsonObject.get('object'): @@ -3094,6 +3096,7 @@ def downloadAnnounce(session, baseDir: str, httpPrefix: str, # pprint(announcedJson) return None + labelAccusatoryPost(postJsonObject, translate) # set the id to the original status announcedJson['id'] = postJsonObject['object'] announcedJson['object']['id'] = postJsonObject['object'] diff --git a/semantic.py b/semantic.py new file mode 100644 index 000000000..1388dd2cc --- /dev/null +++ b/semantic.py @@ -0,0 +1,67 @@ +__filename__ = "semantic.py" +__author__ = "Bob Mottram" +__license__ = "AGPL3+" +__version__ = "1.1.0" +__maintainer__ = "Bob Mottram" +__email__ = "bob@freedombone.net" +__status__ = "Production" + + +def isAccusatory(content: str, translate: {}, threshold=3) -> bool: + """Indicates whether the given content is an accusatory post + """ + words = ('you', 'your', "you're", 'if you', 'you are') + + if translate: + wordsTranslated = [] + for wrd in words: + translated = translate[wrd] + if '|' not in translated: + if translated not in wordsTranslated: + wordsTranslated.append(translated) + else: + # handle differing genders + words2 = translated.split('|') + for wrd2 in words2: + if wrd2.strip() not in wordsTranslated: + wordsTranslated.append(translated) + else: + wordsTranslated = words + + contentLower = content.lower() + ctr = 0 + for wrd in wordsTranslated: + ctr += contentLower.count(wrd + ' ') + if ctr >= threshold: + return True + return False + + +def labelAccusatoryPost(postJsonObject: {}, translate: {}, threshold=3): + """If a post is accusatory and it doesn't mention anyone + specific and isn't a reply and it doesn't have a content + warning then add a default 'accusatory' content warning + """ + if not postJsonObject.get('object'): + return + if not isinstance(postJsonObject['object'], dict): + return + if not postJsonObject['object'].get('content'): + return + if postJsonObject['object'].get('inReplyTo'): + return + if not isinstance(postJsonObject['object']['content'], str): + return + if '@' in postJsonObject['object']['content']: + return + if not isAccusatory(postJsonObject['object']['content'], + translate, threshold): + return + cwStr = translate['Accusatory'] + if postJsonObject['object'].get('summary'): + if cwStr not in postJsonObject['object']['summary']: + postJsonObject['object']['summary'] = \ + cwStr + ', ' + postJsonObject['object']['summary'] + else: + postJsonObject['object']['summary'] = cwStr + postJsonObject['object']['sensitive'] = True diff --git a/tests.py b/tests.py index 0b5c8c72a..56294f3eb 100644 --- a/tests.py +++ b/tests.py @@ -68,6 +68,7 @@ from content import addHtmlTags from content import removeLongWords from content import replaceContentDuplicates from theme import setCSSparam +from semantic import isAccusatory testServerAliceRunning = False testServerBobRunning = False @@ -1787,8 +1788,17 @@ def testRecentPostsCache(): assert len(recentPostsCache['html'].items()) == maxRecentPosts +def testAccusatory(): + print('testAccusatory') + testStr = 'This is not an accusatory post' + assert(not isAccusatory(testStr, None, 3)) + testStr = "If you x, and you're y then you are z" + assert(isAccusatory(testStr, None, 3)) + + def runAllTests(): print('Running tests...') + testAccusatory() testWebLinks() testRecentPostsCache() testTheme() diff --git a/translations/ar.json b/translations/ar.json index 059de1ed1..f7beee1b4 100644 --- a/translations/ar.json +++ b/translations/ar.json @@ -230,5 +230,11 @@ "Zen": "زين", "Night": "ليل", "Starlight": "ضوء النجوم", - "Search banner image": "البحث عن صورة بانر" + "Search banner image": "البحث عن صورة بانر", + "Accusatory", "اتهام", + "you": "أنت", + "your": "الخاص بك", + "you're": "أنت على", + "if you": "اذا أنت", + "you are": "أنت" } diff --git a/translations/ca.json b/translations/ca.json index 5d1c27337..872df8587 100644 --- a/translations/ca.json +++ b/translations/ca.json @@ -230,5 +230,11 @@ "Zen": "Zen", "Night": "Nit", "Starlight": "Starlight", - "Search banner image": "Cerca imatge del banner" + "Search banner image": "Cerca imatge del banner", + "Accusatory", "Acusatori", + "you": "vostè", + "your": "la seva", + "you're": "estàs", + "if you": "si tu", + "you are": "tu ets" } diff --git a/translations/cy.json b/translations/cy.json index 78aff3238..5ec1e38bb 100644 --- a/translations/cy.json +++ b/translations/cy.json @@ -230,5 +230,11 @@ "Zen": "Zen", "Night": "Noson", "Starlight": "Starlight", - "Search banner image": "Chwilio delwedd baner" + "Search banner image": "Chwilio delwedd baner", + "Accusatory", "Cyhuddwr", + "you": "ti", + "your": "eich", + "you're": "ti", + "if you": "os ydych", + "you are": "yr ydych" } diff --git a/translations/de.json b/translations/de.json index 124a8d024..c6e214bed 100644 --- a/translations/de.json +++ b/translations/de.json @@ -230,5 +230,11 @@ "Zen": "Zen", "Night": "Nacht", "Starlight": "Sternenlicht", - "Search banner image": "Suche Banner Bild" + "Search banner image": "Suche Banner Bild", + "Accusatory", "Anklagend", + "you": "du", + "your": "ihre", + "you're": "du bist", + "if you": "wenn du", + "you are": "sie sind" } diff --git a/translations/en.json b/translations/en.json index 16c139c61..49fdb982e 100644 --- a/translations/en.json +++ b/translations/en.json @@ -230,5 +230,11 @@ "Zen": "Zen", "Night": "Night", "Starlight": "Starlight", - "Search banner image": "Search banner image" + "Search banner image": "Search banner image", + "Accusatory", "Accusatory", + "you": "you", + "your": "your", + "you're": "you're", + "if you": "if you", + "you are": "you are" } diff --git a/translations/es.json b/translations/es.json index 8a26afcd2..090eb177d 100644 --- a/translations/es.json +++ b/translations/es.json @@ -230,5 +230,11 @@ "Zen": "Zen", "Night": "Noche", "Starlight": "Luz de las estrellas", - "Search banner image": "Buscar imagen de banner" + "Search banner image": "Buscar imagen de banner", + "Accusatory", "Acusatoria", + "you": "tú", + "your": "tu", + "you're": "tu eres", + "if you": "si tu", + "you are": "usted está" } diff --git a/translations/fr.json b/translations/fr.json index 39c1f5a59..5776e8ee1 100644 --- a/translations/fr.json +++ b/translations/fr.json @@ -230,5 +230,11 @@ "Zen": "Zen", "Night": "Nuit", "Starlight": "Lumière des étoiles", - "Search banner image": "Image de bannière de recherche" + "Search banner image": "Image de bannière de recherche", + "Accusatory", "Accusatoire", + "you": "vous", + "your": "votre", + "you're": "tu es", + "if you": "si tu", + "you are": "tu es" } diff --git a/translations/ga.json b/translations/ga.json index 8af92c68b..a6c2ed521 100644 --- a/translations/ga.json +++ b/translations/ga.json @@ -230,5 +230,11 @@ "Zen": "Zen", "Night": "Oíche", "Starlight": "Starlight", - "Search banner image": "Cuardaigh íomhá meirge" + "Search banner image": "Cuardaigh íomhá meirge", + "Accusatory", "Cúisí", + "you": "tú", + "your": "do", + "you're": "tá tú", + "if you": "má tá tú", + "you are": "tá tú" } diff --git a/translations/hi.json b/translations/hi.json index b161847c7..680e2be3c 100644 --- a/translations/hi.json +++ b/translations/hi.json @@ -230,5 +230,11 @@ "Zen": "जेन", "Night": "रात", "Starlight": "तारों का", - "Search banner image": "बैनर छवि खोजें" + "Search banner image": "बैनर छवि खोजें", + "Accusatory", "दोष लगानेवाला", + "you": "आप", + "your": "तुम्हारी", + "you're": "आप कर रहे हैं", + "if you": "अगर तुम", + "you are": "तुम हो" } diff --git a/translations/it.json b/translations/it.json index e7ce6e369..c222cfa26 100644 --- a/translations/it.json +++ b/translations/it.json @@ -230,5 +230,11 @@ "Zen": "Zen", "Night": "Notte", "Starlight": "luce stellare", - "Search banner image": "Cerca immagine banner" + "Search banner image": "Cerca immagine banner", + "Accusatory", "di accusa", + "you": "voi", + "your": "il tuo", + "you're": "sei", + "if you": "se tu", + "you are": "siete" } diff --git a/translations/ja.json b/translations/ja.json index a5567a64a..dc6ef6e2a 100644 --- a/translations/ja.json +++ b/translations/ja.json @@ -230,5 +230,11 @@ "Zen": "禅", "Night": "夜", "Starlight": "スターライト", - "Search banner image": "バナー画像を検索" + "Search banner image": "バナー画像を検索", + "Accusatory", "非難", + "you": "君は", + "your": "君の", + "you're": "あなたは", + "if you": "もし、あんたが", + "you are": "あなたは" } diff --git a/translations/oc.json b/translations/oc.json index 8ff921591..e6a489796 100644 --- a/translations/oc.json +++ b/translations/oc.json @@ -226,5 +226,11 @@ "Zen": "Zen", "Night": "Night", "Starlight": "Starlight", - "Search banner image": "Search banner image" + "Search banner image": "Search banner image", + "Accusatory", "Accusatory", + "you": "you", + "your": "your", + "you're": "you're", + "if you": "if you", + "you are": "you are" } diff --git a/translations/pt.json b/translations/pt.json index d95861eb9..50854cfb3 100644 --- a/translations/pt.json +++ b/translations/pt.json @@ -230,5 +230,11 @@ "Zen": "Zen", "Night": "Noite", "Starlight": "Luz das estrelas", - "Search banner image": "Pesquisar imagem do banner" + "Search banner image": "Pesquisar imagem do banner", + "Accusatory", "Acusatória", + "you": "você", + "your": "sua", + "you're": "você é", + "if you": "se vocês", + "you are": "tu es" } diff --git a/translations/ru.json b/translations/ru.json index db288ccfa..aefebc280 100644 --- a/translations/ru.json +++ b/translations/ru.json @@ -230,5 +230,11 @@ "Zen": "Zen", "Night": "Ночь", "Starlight": "Звездный свет", - "Search banner image": "Поиск изображения баннера" + "Search banner image": "Поиск изображения баннера", + "Accusatory", "обличительный", + "you": "вы", + "your": "ваш", + "you're": "Вы", + "if you": "если ты", + "you are": "ты" } diff --git a/translations/zh.json b/translations/zh.json index 5b62066be..1989e126a 100644 --- a/translations/zh.json +++ b/translations/zh.json @@ -229,5 +229,11 @@ "Zen": "禅", "Night": "晚", "Starlight": "星光", - "Search banner image": "搜索横幅图像" + "Search banner image": "搜索横幅图像", + "Accusatory", "指责的", + "you": "您", + "your": "您的", + "you're": "你是", + "if you": "如果你", + "you are": "你是" } diff --git a/webinterface.py b/webinterface.py index aaaa316bc..9e90ffa8e 100644 --- a/webinterface.py +++ b/webinterface.py @@ -3599,7 +3599,7 @@ def individualPostAsHtml(recentPostsCache: {}, maxRecentPosts: int, postJsonAnnounce = \ downloadAnnounce(session, baseDir, httpPrefix, nickname, domain, postJsonObject, - projectVersion) + projectVersion, translate) if not postJsonAnnounce: return '' postJsonObject = postJsonAnnounce