Check that language for incoming post is understood by the account

main
Bob Mottram 2021-07-18 19:33:53 +01:00
parent c0a3dd459b
commit f48b63a892
5 changed files with 111 additions and 79 deletions

View File

@ -50,8 +50,6 @@ from matrix import getMatrixAddress
from matrix import setMatrixAddress from matrix import setMatrixAddress
from donate import getDonationUrl from donate import getDonationUrl
from donate import setDonationUrl from donate import setDonationUrl
from person import getActorLanguages
from person import setActorLanguages
from person import setPersonNotes from person import setPersonNotes
from person import getDefaultPersonContext from person import getDefaultPersonContext
from person import savePersonQrcode from person import savePersonQrcode
@ -210,6 +208,8 @@ from shares import addShare
from shares import removeShare from shares import removeShare
from shares import expireShares from shares import expireShares
from categories import setHashtagCategory from categories import setHashtagCategory
from utils import getActorLanguages
from utils import setActorLanguages
from utils import getContentFromPost from utils import getContentFromPost
from utils import acctDir from utils import acctDir
from utils import getImageExtensionFromMimeType from utils import getImageExtensionFromMimeType

View File

@ -67,6 +67,7 @@ from utils import undoAnnounceCollectionEntry
from utils import dangerousMarkup from utils import dangerousMarkup
from utils import isDM from utils import isDM
from utils import isReply from utils import isReply
from utils import getActorLanguagesList
from httpsig import messageContentDigest from httpsig import messageContentDigest
from posts import createDirectMessagePost from posts import createDirectMessagePost
from posts import validContentWarning from posts import validContentWarning
@ -1596,6 +1597,33 @@ def _estimateNumberOfEmoji(content: str) -> int:
return int(content.count(':') / 2) return int(content.count(':') / 2)
def _understoodPostLanguage(baseDir: str, nickname: str, domain: str,
messageJson: {}, systemLanguage: str) -> bool:
"""Returns true if the post is written in a language
understood by this account
"""
if not messageJson['object'].get('contentMap'):
return True
if not isinstance(messageJson['object']['contentMap'], dict):
return True
if messageJson['object']['contentMap'].get(systemLanguage):
return True
actorFilename = acctDir(baseDir, nickname, domain)
if not os.path.isfile(actorFilename):
return False
actorJson = loadJson(actorFilename)
if not actorJson:
print('WARN: unable to load actor to check languages ' + actorFilename)
return False
languagesUnderstood = getActorLanguagesList(actorJson)
if not languagesUnderstood:
return True
for lang in languagesUnderstood:
if messageJson['object']['contentMap'].get(lang):
return True
return False
def _validPostContent(baseDir: str, nickname: str, domain: str, def _validPostContent(baseDir: str, nickname: str, domain: str,
messageJson: {}, maxMentions: int, maxEmoji: int, messageJson: {}, maxMentions: int, maxEmoji: int,
allowLocalNetworkAccess: bool, debug: bool, allowLocalNetworkAccess: bool, debug: bool,
@ -1667,6 +1695,10 @@ def _validPostContent(baseDir: str, nickname: str, domain: str,
print('REJECT: Too many tags in post - ' + print('REJECT: Too many tags in post - ' +
messageJson['object']['tag']) messageJson['object']['tag'])
return False return False
# check that the post is in a language suitable for this account
if not _understoodPostLanguage(baseDir, nickname, domain,
messageJson, systemLanguage):
return False
# check for filtered content # check for filtered content
if isFiltered(baseDir, nickname, domain, contentStr): if isFiltered(baseDir, nickname, domain, contentStr):
print('REJECT: content filtered') print('REJECT: content filtered')

View File

@ -1353,79 +1353,3 @@ def getPersonAvatarUrl(baseDir: str, personUrl: str, personCache: {},
if personJson['icon'].get('url'): if personJson['icon'].get('url'):
return personJson['icon']['url'] return personJson['icon']['url']
return None return None
def _getActorLanguagesList(actorJson: {}) -> []:
"""Returns a list containing languages used by the given actor
"""
for propertyValue in actorJson['attachment']:
if not propertyValue.get('name'):
continue
if not propertyValue['name'].lower().startswith('languages'):
continue
if not propertyValue.get('type'):
continue
if not propertyValue.get('value'):
continue
if not isinstance(propertyValue['value'], list):
continue
if propertyValue['type'] != 'PropertyValue':
continue
return propertyValue['value']
return []
def getActorLanguages(actorJson: {}) -> str:
"""Returns a string containing languages used by the given actor
"""
langList = _getActorLanguagesList(actorJson)
if not langList:
return ''
languagesStr = ''
for lang in languagesStr:
if languagesStr:
languagesStr += ' / ' + lang
else:
languagesStr = lang
return languagesStr
def setActorLanguages(baseDir: str, actorJson: {}, languagesStr: str) -> None:
"""Sets the languages used by the given actor
"""
separator = ','
if '/' in languagesStr:
separator = '/'
elif ';' in languagesStr:
separator = ';'
langList = languagesStr.lower().split(separator)
langList2 = []
for lang in langList:
lang = lang.strip()
languageFilename = baseDir + '/translations/' + lang + '.json'
if os.path.isfile(languageFilename):
langList2.append(lang)
# remove any existing value
propertyFound = None
for propertyValue in actorJson['attachment']:
if not propertyValue.get('name'):
continue
if not propertyValue.get('type'):
continue
if not propertyValue['name'].lower().startswith('languages'):
continue
propertyFound = propertyValue
break
if propertyFound:
actorJson['attachment'].remove(propertyFound)
if not langList2:
return
newLanguages = {
"name": "Languages",
"type": "PropertyValue",
"value": langList2
}
actorJson['attachment'].append(newLanguages)

View File

@ -2562,3 +2562,79 @@ def validUrlPrefix(url: str) -> bool:
if url.startswith(pre): if url.startswith(pre):
return True return True
return False return False
def getActorLanguagesList(actorJson: {}) -> []:
"""Returns a list containing languages used by the given actor
"""
for propertyValue in actorJson['attachment']:
if not propertyValue.get('name'):
continue
if not propertyValue['name'].lower().startswith('languages'):
continue
if not propertyValue.get('type'):
continue
if not propertyValue.get('value'):
continue
if not isinstance(propertyValue['value'], list):
continue
if propertyValue['type'] != 'PropertyValue':
continue
return propertyValue['value']
return []
def getActorLanguages(actorJson: {}) -> str:
"""Returns a string containing languages used by the given actor
"""
langList = getActorLanguagesList(actorJson)
if not langList:
return ''
languagesStr = ''
for lang in languagesStr:
if languagesStr:
languagesStr += ' / ' + lang
else:
languagesStr = lang
return languagesStr
def setActorLanguages(baseDir: str, actorJson: {}, languagesStr: str) -> None:
"""Sets the languages used by the given actor
"""
separator = ','
if '/' in languagesStr:
separator = '/'
elif ';' in languagesStr:
separator = ';'
langList = languagesStr.lower().split(separator)
langList2 = []
for lang in langList:
lang = lang.strip()
languageFilename = baseDir + '/translations/' + lang + '.json'
if os.path.isfile(languageFilename):
langList2.append(lang)
# remove any existing value
propertyFound = None
for propertyValue in actorJson['attachment']:
if not propertyValue.get('name'):
continue
if not propertyValue.get('type'):
continue
if not propertyValue['name'].lower().startswith('languages'):
continue
propertyFound = propertyValue
break
if propertyFound:
actorJson['attachment'].remove(propertyFound)
if not langList2:
return
newLanguages = {
"name": "Languages",
"type": "PropertyValue",
"value": langList2
}
actorJson['attachment'].append(newLanguages)

View File

@ -23,9 +23,9 @@ from utils import loadJson
from utils import getConfigParam from utils import getConfigParam
from utils import getImageFormats from utils import getImageFormats
from utils import acctDir from utils import acctDir
from utils import getActorLanguages
from skills import getSkills from skills import getSkills
from theme import getThemesList from theme import getThemesList
from person import getActorLanguages
from person import personBoxJson from person import personBoxJson
from person import getActorJson from person import getActorJson
from person import getPersonAvatarUrl from person import getPersonAvatarUrl