Use cached actors when checking understood languages

This improves speed by reducing disk reads
main
Bob Mottram 2021-07-19 09:46:21 +01:00
parent 93a30f1255
commit 33ce8fbfb5
9 changed files with 164 additions and 130 deletions

View File

@ -208,8 +208,8 @@ from shares import addShare
from shares import removeShare from shares import removeShare
from shares import expireShares from shares import expireShares
from categories import setHashtagCategory from categories import setHashtagCategory
from utils import getActorLanguages from languages import getActorLanguages
from utils import setActorLanguages from languages import setActorLanguages
from utils import getContentFromPost from utils import getContentFromPost
from utils import acctDir from utils import acctDir
from utils import getImageExtensionFromMimeType from utils import getImageExtensionFromMimeType

View File

@ -655,7 +655,8 @@ def _readLocalBoxPost(session, nickname: str, domain: str,
pageNumber: int, index: int, boxJson: {}, pageNumber: int, index: int, boxJson: {},
systemLanguage: str, systemLanguage: str,
screenreader: str, espeak, screenreader: str, espeak,
translate: {}, yourActor: str) -> {}: translate: {}, yourActor: str,
domainFull: str, personCache: {}) -> {}:
"""Reads a post from the given timeline """Reads a post from the given timeline
Returns the post json Returns the post json
""" """
@ -691,7 +692,8 @@ def _readLocalBoxPost(session, nickname: str, domain: str,
YTReplacementDomain, YTReplacementDomain,
allowLocalNetworkAccess, allowLocalNetworkAccess,
recentPostsCache, False, recentPostsCache, False,
systemLanguage) systemLanguage,
domainFull, personCache)
if postJsonObject2: if postJsonObject2:
if hasObjectDict(postJsonObject2): if hasObjectDict(postJsonObject2):
if postJsonObject2['object'].get('attributedTo') and \ if postJsonObject2['object'].get('attributedTo') and \
@ -1596,7 +1598,8 @@ def runDesktopClient(baseDir: str, proxyType: str, httpPrefix: str,
httpPrefix, baseDir, currTimeline, httpPrefix, baseDir, currTimeline,
pageNumber, postIndex, boxJson, pageNumber, postIndex, boxJson,
systemLanguage, screenreader, systemLanguage, screenreader,
espeak, translate, yourActor) espeak, translate, yourActor,
domainFull, personCache)
print('') print('')
sayStr = 'Press Enter to continue...' sayStr = 'Press Enter to continue...'
sayStr2 = _highlightText(sayStr) sayStr2 = _highlightText(sayStr)
@ -2325,7 +2328,8 @@ def runDesktopClient(baseDir: str, proxyType: str, httpPrefix: str,
YTReplacementDomain, YTReplacementDomain,
allowLocalNetworkAccess, allowLocalNetworkAccess,
recentPostsCache, False, recentPostsCache, False,
systemLanguage) systemLanguage,
domainFull, personCache)
if postJsonObject2: if postJsonObject2:
postJsonObject = postJsonObject2 postJsonObject = postJsonObject2
if postJsonObject: if postJsonObject:

View File

@ -13,7 +13,7 @@ import datetime
import time import time
import random import random
from linked_data_sig import verifyJsonSignature from linked_data_sig import verifyJsonSignature
from utils import understoodPostLanguage from languages import understoodPostLanguage
from utils import getContentFromPost from utils import getContentFromPost
from utils import acctDir from utils import acctDir
from utils import removeDomainPort from utils import removeDomainPort
@ -1367,6 +1367,7 @@ def _receiveAnnounce(recentPostsCache: {},
if debug: if debug:
print('DEBUG: Downloading announce post ' + messageJson['actor'] + print('DEBUG: Downloading announce post ' + messageJson['actor'] +
' -> ' + messageJson['object']) ' -> ' + messageJson['object'])
domainFull = getFullDomain(domain, port)
postJsonObject = downloadAnnounce(session, baseDir, postJsonObject = downloadAnnounce(session, baseDir,
httpPrefix, httpPrefix,
nickname, domain, nickname, domain,
@ -1375,7 +1376,8 @@ def _receiveAnnounce(recentPostsCache: {},
YTReplacementDomain, YTReplacementDomain,
allowLocalNetworkAccess, allowLocalNetworkAccess,
recentPostsCache, debug, recentPostsCache, debug,
systemLanguage) systemLanguage,
domainFull, personCache)
if not postJsonObject: if not postJsonObject:
notInOnion = True notInOnion = True
if onionDomain: if onionDomain:
@ -1600,7 +1602,9 @@ def _estimateNumberOfEmoji(content: str) -> int:
def _validPostContent(baseDir: str, nickname: str, domain: str, def _validPostContent(baseDir: str, nickname: str, domain: str,
messageJson: {}, maxMentions: int, maxEmoji: int, messageJson: {}, maxMentions: int, maxEmoji: int,
allowLocalNetworkAccess: bool, debug: bool, allowLocalNetworkAccess: bool, debug: bool,
systemLanguage: str) -> bool: systemLanguage: str,
httpPrefix: str, domainFull: str,
personCache: {}) -> bool:
"""Is the content of a received post valid? """Is the content of a received post valid?
Check for bad html Check for bad html
Check for hellthreads Check for hellthreads
@ -1670,7 +1674,9 @@ def _validPostContent(baseDir: str, nickname: str, domain: str,
return False return False
# check that the post is in a language suitable for this account # check that the post is in a language suitable for this account
if not understoodPostLanguage(baseDir, nickname, domain, if not understoodPostLanguage(baseDir, nickname, domain,
messageJson, systemLanguage): messageJson, systemLanguage,
httpPrefix, domainFull,
personCache):
return False return False
# check for filtered content # check for filtered content
if isFiltered(baseDir, nickname, domain, contentStr): if isFiltered(baseDir, nickname, domain, contentStr):
@ -2405,10 +2411,12 @@ def _inboxAfterInitial(recentPostsCache: {}, maxRecentPosts: int,
nickname = handle.split('@')[0] nickname = handle.split('@')[0]
jsonObj = None jsonObj = None
domainFull = getFullDomain(domain, port)
if _validPostContent(baseDir, nickname, domain, if _validPostContent(baseDir, nickname, domain,
postJsonObject, maxMentions, maxEmoji, postJsonObject, maxMentions, maxEmoji,
allowLocalNetworkAccess, debug, allowLocalNetworkAccess, debug,
systemLanguage): systemLanguage, httpPrefix,
domainFull, personCache):
if postJsonObject.get('object'): if postJsonObject.get('object'):
jsonObj = postJsonObject['object'] jsonObj = postJsonObject['object']
@ -2486,7 +2494,6 @@ def _inboxAfterInitial(recentPostsCache: {}, maxRecentPosts: int,
return False return False
# get the actor being replied to # get the actor being replied to
domainFull = getFullDomain(domain, port)
actor = httpPrefix + '://' + domainFull + '/users/' + nickname actor = httpPrefix + '://' + domainFull + '/users/' + nickname
# create a reply notification file if needed # create a reply notification file if needed
@ -2511,7 +2518,8 @@ def _inboxAfterInitial(recentPostsCache: {}, maxRecentPosts: int,
nickname, domain, postJsonObject, nickname, domain, postJsonObject,
translate, YTReplacementDomain, translate, YTReplacementDomain,
allowLocalNetworkAccess, allowLocalNetworkAccess,
recentPostsCache, debug, systemLanguage): recentPostsCache, debug, systemLanguage,
domainFull, personCache):
# media index will be updated # media index will be updated
updateIndexList.append('tlmedia') updateIndexList.append('tlmedia')
if isBlogPost(postJsonObject): if isBlogPost(postJsonObject):

124
languages.py 100644
View File

@ -0,0 +1,124 @@
__filename__ = "languages.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
__version__ = "1.2.0"
__maintainer__ = "Bob Mottram"
__email__ = "bob@freedombone.net"
__status__ = "Production"
__module_group__ = "Core"
import os
from utils import acctDir
from cache import getPersonFromCache
def _getActorLanguagesList(actorJson: {}) -> []:
"""Returns a list containing languages used by the given actor
"""
if not actorJson.get('attachment'):
return []
for propertyValue in actorJson['attachment']:
if not propertyValue.get('name'):
continue
if not propertyValue['name'].lower().startswith('languages'):
continue
if not propertyValue.get('type'):
continue
if not propertyValue.get('value'):
continue
if not isinstance(propertyValue['value'], list):
continue
if propertyValue['type'] != 'PropertyValue':
continue
return propertyValue['value']
return []
def getActorLanguages(actorJson: {}) -> str:
"""Returns a string containing languages used by the given actor
"""
langList = _getActorLanguagesList(actorJson)
if not langList:
return ''
languagesStr = ''
for lang in langList:
if languagesStr:
languagesStr += ' / ' + lang
else:
languagesStr = lang
return languagesStr
def setActorLanguages(baseDir: str, actorJson: {}, languagesStr: str) -> None:
"""Sets the languages used by the given actor
"""
separator = ','
if '/' in languagesStr:
separator = '/'
elif ';' in languagesStr:
separator = ';'
langList = languagesStr.lower().split(separator)
langList2 = []
for lang in langList:
lang = lang.strip()
languageFilename = baseDir + '/translations/' + lang + '.json'
if os.path.isfile(languageFilename):
langList2.append(lang)
# remove any existing value
propertyFound = None
for propertyValue in actorJson['attachment']:
if not propertyValue.get('name'):
continue
if not propertyValue.get('type'):
continue
if not propertyValue['name'].lower().startswith('languages'):
continue
propertyFound = propertyValue
break
if propertyFound:
actorJson['attachment'].remove(propertyFound)
if not langList2:
return
newLanguages = {
"name": "Languages",
"type": "PropertyValue",
"value": langList2
}
actorJson['attachment'].append(newLanguages)
def understoodPostLanguage(baseDir: str, nickname: str, domain: str,
messageJson: {}, systemLanguage: str,
httpPrefix: str, domainFull: str,
personCache: {}) -> bool:
"""Returns true if the post is written in a language
understood by this account
"""
msgObject = messageJson
if msgObject.get('object'):
if isinstance(msgObject['object'], dict):
msgObject = messageJson['object']
if not msgObject.get('contentMap'):
return True
if not isinstance(msgObject['contentMap'], dict):
return True
if msgObject['contentMap'].get(systemLanguage):
return True
actorFilename = acctDir(baseDir, nickname, domain)
if not os.path.isfile(actorFilename):
return False
personUrl = httpPrefix + '://' + domainFull + '/users/' + nickname
actorJson = getPersonFromCache(baseDir, personUrl, personCache, False)
if not actorJson:
print('WARN: unable to load actor to check languages ' + actorFilename)
return False
languagesUnderstood = _getActorLanguagesList(actorJson)
if not languagesUnderstood:
return True
for lang in languagesUnderstood:
if msgObject['contentMap'].get(lang):
return True
return False

View File

@ -391,7 +391,8 @@ def postMessageToOutbox(session, translate: {},
messageJson, messageJson,
translate, YTReplacementDomain, translate, YTReplacementDomain,
allowLocalNetworkAccess, allowLocalNetworkAccess,
recentPostsCache, debug, systemLanguage): recentPostsCache, debug, systemLanguage,
domainFull, personCache):
inboxUpdateIndex('tlmedia', baseDir, inboxUpdateIndex('tlmedia', baseDir,
postToNickname + '@' + domain, postToNickname + '@' + domain,
savedFilename, debug) savedFilename, debug)

View File

@ -31,7 +31,7 @@ from session import postImage
from webfinger import webfingerHandle from webfinger import webfingerHandle
from httpsig import createSignedHeader from httpsig import createSignedHeader
from siteactive import siteIsActive from siteactive import siteIsActive
from utils import understoodPostLanguage from languages import understoodPostLanguage
from utils import getContentFromPost from utils import getContentFromPost
from utils import removeDomainPort from utils import removeDomainPort
from utils import getPortFromDomain from utils import getPortFromDomain
@ -2935,7 +2935,8 @@ def isImageMedia(session, baseDir: str, httpPrefix: str,
YTReplacementDomain: str, YTReplacementDomain: str,
allowLocalNetworkAccess: bool, allowLocalNetworkAccess: bool,
recentPostsCache: {}, debug: bool, recentPostsCache: {}, debug: bool,
systemLanguage: str) -> bool: systemLanguage: str,
domainFull: str, personCache: {}) -> bool:
"""Returns true if the given post has attached image media """Returns true if the given post has attached image media
""" """
if postJsonObject['type'] == 'Announce': if postJsonObject['type'] == 'Announce':
@ -2946,7 +2947,8 @@ def isImageMedia(session, baseDir: str, httpPrefix: str,
YTReplacementDomain, YTReplacementDomain,
allowLocalNetworkAccess, allowLocalNetworkAccess,
recentPostsCache, debug, recentPostsCache, debug,
systemLanguage) systemLanguage,
domainFull, personCache)
if postJsonAnnounce: if postJsonAnnounce:
postJsonObject = postJsonAnnounce postJsonObject = postJsonAnnounce
if postJsonObject['type'] != 'Create': if postJsonObject['type'] != 'Create':
@ -3900,7 +3902,8 @@ def downloadAnnounce(session, baseDir: str, httpPrefix: str,
translate: {}, YTReplacementDomain: str, translate: {}, YTReplacementDomain: str,
allowLocalNetworkAccess: bool, allowLocalNetworkAccess: bool,
recentPostsCache: {}, debug: bool, recentPostsCache: {}, debug: bool,
systemLanguage: str) -> {}: systemLanguage: str,
domainFull: str, personCache: {}) -> {}:
"""Download the post referenced by an announce """Download the post referenced by an announce
""" """
if not postJsonObject.get('object'): if not postJsonObject.get('object'):
@ -4029,7 +4032,9 @@ def downloadAnnounce(session, baseDir: str, httpPrefix: str,
recentPostsCache) recentPostsCache)
return None return None
if not understoodPostLanguage(baseDir, nickname, domain, if not understoodPostLanguage(baseDir, nickname, domain,
announcedJson, systemLanguage): announcedJson, systemLanguage,
httpPrefix, domainFull,
personCache):
return None return None
# Check the content of the announce # Check the content of the announce
contentStr = announcedJson['content'] contentStr = announcedJson['content']

109
utils.py
View File

@ -2562,112 +2562,3 @@ def validUrlPrefix(url: str) -> bool:
if url.startswith(pre): if url.startswith(pre):
return True return True
return False return False
def _getActorLanguagesList(actorJson: {}) -> []:
"""Returns a list containing languages used by the given actor
"""
if not actorJson.get('attachment'):
return []
for propertyValue in actorJson['attachment']:
if not propertyValue.get('name'):
continue
if not propertyValue['name'].lower().startswith('languages'):
continue
if not propertyValue.get('type'):
continue
if not propertyValue.get('value'):
continue
if not isinstance(propertyValue['value'], list):
continue
if propertyValue['type'] != 'PropertyValue':
continue
return propertyValue['value']
return []
def getActorLanguages(actorJson: {}) -> str:
"""Returns a string containing languages used by the given actor
"""
langList = _getActorLanguagesList(actorJson)
if not langList:
return ''
languagesStr = ''
for lang in langList:
if languagesStr:
languagesStr += ' / ' + lang
else:
languagesStr = lang
return languagesStr
def setActorLanguages(baseDir: str, actorJson: {}, languagesStr: str) -> None:
"""Sets the languages used by the given actor
"""
separator = ','
if '/' in languagesStr:
separator = '/'
elif ';' in languagesStr:
separator = ';'
langList = languagesStr.lower().split(separator)
langList2 = []
for lang in langList:
lang = lang.strip()
languageFilename = baseDir + '/translations/' + lang + '.json'
if os.path.isfile(languageFilename):
langList2.append(lang)
# remove any existing value
propertyFound = None
for propertyValue in actorJson['attachment']:
if not propertyValue.get('name'):
continue
if not propertyValue.get('type'):
continue
if not propertyValue['name'].lower().startswith('languages'):
continue
propertyFound = propertyValue
break
if propertyFound:
actorJson['attachment'].remove(propertyFound)
if not langList2:
return
newLanguages = {
"name": "Languages",
"type": "PropertyValue",
"value": langList2
}
actorJson['attachment'].append(newLanguages)
def understoodPostLanguage(baseDir: str, nickname: str, domain: str,
messageJson: {}, systemLanguage: str) -> bool:
"""Returns true if the post is written in a language
understood by this account
"""
msgObject = messageJson
if msgObject.get('object'):
if isinstance(msgObject['object'], dict):
msgObject = messageJson['object']
if not msgObject.get('contentMap'):
return True
if not isinstance(msgObject['contentMap'], dict):
return True
if msgObject['contentMap'].get(systemLanguage):
return True
actorFilename = acctDir(baseDir, nickname, domain)
if not os.path.isfile(actorFilename):
return False
actorJson = loadJson(actorFilename)
if not actorJson:
print('WARN: unable to load actor to check languages ' + actorFilename)
return False
languagesUnderstood = _getActorLanguagesList(actorJson)
if not languagesUnderstood:
return True
for lang in languagesUnderstood:
if msgObject['contentMap'].get(lang):
return True
return False

View File

@ -1310,7 +1310,8 @@ def individualPostAsHtml(allowDownloads: bool,
YTReplacementDomain, YTReplacementDomain,
allowLocalNetworkAccess, allowLocalNetworkAccess,
recentPostsCache, False, recentPostsCache, False,
systemLanguage) systemLanguage,
domainFull, personCache)
if not postJsonAnnounce: if not postJsonAnnounce:
# if the announce could not be downloaded then mark it as rejected # if the announce could not be downloaded then mark it as rejected
rejectPostId(baseDir, nickname, domain, postJsonObject['id'], rejectPostId(baseDir, nickname, domain, postJsonObject['id'],

View File

@ -23,7 +23,7 @@ from utils import loadJson
from utils import getConfigParam from utils import getConfigParam
from utils import getImageFormats from utils import getImageFormats
from utils import acctDir from utils import acctDir
from utils import getActorLanguages from languages import getActorLanguages
from skills import getSkills from skills import getSkills
from theme import getThemesList from theme import getThemesList
from person import personBoxJson from person import personBoxJson