From f48b63a892cfc7d3ba2ab0ad49081ea06af026c3 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 18 Jul 2021 19:33:53 +0100 Subject: [PATCH] Check that language for incoming post is understood by the account --- daemon.py | 4 +-- inbox.py | 32 ++++++++++++++++++++ person.py | 76 ----------------------------------------------- utils.py | 76 +++++++++++++++++++++++++++++++++++++++++++++++ webapp_profile.py | 2 +- 5 files changed, 111 insertions(+), 79 deletions(-) diff --git a/daemon.py b/daemon.py index 05296e7fa..a55dbbf86 100644 --- a/daemon.py +++ b/daemon.py @@ -50,8 +50,6 @@ from matrix import getMatrixAddress from matrix import setMatrixAddress from donate import getDonationUrl from donate import setDonationUrl -from person import getActorLanguages -from person import setActorLanguages from person import setPersonNotes from person import getDefaultPersonContext from person import savePersonQrcode @@ -210,6 +208,8 @@ from shares import addShare from shares import removeShare from shares import expireShares from categories import setHashtagCategory +from utils import getActorLanguages +from utils import setActorLanguages from utils import getContentFromPost from utils import acctDir from utils import getImageExtensionFromMimeType diff --git a/inbox.py b/inbox.py index 23c4bdcff..10158c773 100644 --- a/inbox.py +++ b/inbox.py @@ -67,6 +67,7 @@ from utils import undoAnnounceCollectionEntry from utils import dangerousMarkup from utils import isDM from utils import isReply +from utils import getActorLanguagesList from httpsig import messageContentDigest from posts import createDirectMessagePost from posts import validContentWarning @@ -1596,6 +1597,33 @@ def _estimateNumberOfEmoji(content: str) -> int: return int(content.count(':') / 2) +def _understoodPostLanguage(baseDir: str, nickname: str, domain: str, + messageJson: {}, systemLanguage: str) -> bool: + """Returns true if the post is written in a language + understood by this account + """ + if not messageJson['object'].get('contentMap'): + return True + if not isinstance(messageJson['object']['contentMap'], dict): + return True + if messageJson['object']['contentMap'].get(systemLanguage): + return True + actorFilename = acctDir(baseDir, nickname, domain) + if not os.path.isfile(actorFilename): + return False + actorJson = loadJson(actorFilename) + if not actorJson: + print('WARN: unable to load actor to check languages ' + actorFilename) + return False + languagesUnderstood = getActorLanguagesList(actorJson) + if not languagesUnderstood: + return True + for lang in languagesUnderstood: + if messageJson['object']['contentMap'].get(lang): + return True + return False + + def _validPostContent(baseDir: str, nickname: str, domain: str, messageJson: {}, maxMentions: int, maxEmoji: int, allowLocalNetworkAccess: bool, debug: bool, @@ -1667,6 +1695,10 @@ def _validPostContent(baseDir: str, nickname: str, domain: str, print('REJECT: Too many tags in post - ' + messageJson['object']['tag']) return False + # check that the post is in a language suitable for this account + if not _understoodPostLanguage(baseDir, nickname, domain, + messageJson, systemLanguage): + return False # check for filtered content if isFiltered(baseDir, nickname, domain, contentStr): print('REJECT: content filtered') diff --git a/person.py b/person.py index ea19d40b9..1ee5180cb 100644 --- a/person.py +++ b/person.py @@ -1353,79 +1353,3 @@ def getPersonAvatarUrl(baseDir: str, personUrl: str, personCache: {}, if personJson['icon'].get('url'): return personJson['icon']['url'] return None - - -def _getActorLanguagesList(actorJson: {}) -> []: - """Returns a list containing languages used by the given actor - """ - for propertyValue in actorJson['attachment']: - if not propertyValue.get('name'): - continue - if not propertyValue['name'].lower().startswith('languages'): - continue - if not propertyValue.get('type'): - continue - if not propertyValue.get('value'): - continue - if not isinstance(propertyValue['value'], list): - continue - if propertyValue['type'] != 'PropertyValue': - continue - return propertyValue['value'] - return [] - - -def getActorLanguages(actorJson: {}) -> str: - """Returns a string containing languages used by the given actor - """ - langList = _getActorLanguagesList(actorJson) - if not langList: - return '' - languagesStr = '' - for lang in languagesStr: - if languagesStr: - languagesStr += ' / ' + lang - else: - languagesStr = lang - return languagesStr - - -def setActorLanguages(baseDir: str, actorJson: {}, languagesStr: str) -> None: - """Sets the languages used by the given actor - """ - separator = ',' - if '/' in languagesStr: - separator = '/' - elif ';' in languagesStr: - separator = ';' - langList = languagesStr.lower().split(separator) - langList2 = [] - for lang in langList: - lang = lang.strip() - languageFilename = baseDir + '/translations/' + lang + '.json' - if os.path.isfile(languageFilename): - langList2.append(lang) - - # remove any existing value - propertyFound = None - for propertyValue in actorJson['attachment']: - if not propertyValue.get('name'): - continue - if not propertyValue.get('type'): - continue - if not propertyValue['name'].lower().startswith('languages'): - continue - propertyFound = propertyValue - break - if propertyFound: - actorJson['attachment'].remove(propertyFound) - - if not langList2: - return - - newLanguages = { - "name": "Languages", - "type": "PropertyValue", - "value": langList2 - } - actorJson['attachment'].append(newLanguages) diff --git a/utils.py b/utils.py index 2f8c9f5d3..9d2dabce9 100644 --- a/utils.py +++ b/utils.py @@ -2562,3 +2562,79 @@ def validUrlPrefix(url: str) -> bool: if url.startswith(pre): return True return False + + +def getActorLanguagesList(actorJson: {}) -> []: + """Returns a list containing languages used by the given actor + """ + for propertyValue in actorJson['attachment']: + if not propertyValue.get('name'): + continue + if not propertyValue['name'].lower().startswith('languages'): + continue + if not propertyValue.get('type'): + continue + if not propertyValue.get('value'): + continue + if not isinstance(propertyValue['value'], list): + continue + if propertyValue['type'] != 'PropertyValue': + continue + return propertyValue['value'] + return [] + + +def getActorLanguages(actorJson: {}) -> str: + """Returns a string containing languages used by the given actor + """ + langList = getActorLanguagesList(actorJson) + if not langList: + return '' + languagesStr = '' + for lang in languagesStr: + if languagesStr: + languagesStr += ' / ' + lang + else: + languagesStr = lang + return languagesStr + + +def setActorLanguages(baseDir: str, actorJson: {}, languagesStr: str) -> None: + """Sets the languages used by the given actor + """ + separator = ',' + if '/' in languagesStr: + separator = '/' + elif ';' in languagesStr: + separator = ';' + langList = languagesStr.lower().split(separator) + langList2 = [] + for lang in langList: + lang = lang.strip() + languageFilename = baseDir + '/translations/' + lang + '.json' + if os.path.isfile(languageFilename): + langList2.append(lang) + + # remove any existing value + propertyFound = None + for propertyValue in actorJson['attachment']: + if not propertyValue.get('name'): + continue + if not propertyValue.get('type'): + continue + if not propertyValue['name'].lower().startswith('languages'): + continue + propertyFound = propertyValue + break + if propertyFound: + actorJson['attachment'].remove(propertyFound) + + if not langList2: + return + + newLanguages = { + "name": "Languages", + "type": "PropertyValue", + "value": langList2 + } + actorJson['attachment'].append(newLanguages) diff --git a/webapp_profile.py b/webapp_profile.py index a7b5d12ef..cbb3d52e9 100644 --- a/webapp_profile.py +++ b/webapp_profile.py @@ -23,9 +23,9 @@ from utils import loadJson from utils import getConfigParam from utils import getImageFormats from utils import acctDir +from utils import getActorLanguages from skills import getSkills from theme import getThemesList -from person import getActorLanguages from person import personBoxJson from person import getActorJson from person import getPersonAvatarUrl