Use cached actors when checking understood languages

This improves speed by reducing disk reads
main
Bob Mottram 2021-07-19 09:46:21 +01:00
parent 93a30f1255
commit 33ce8fbfb5
9 changed files with 164 additions and 130 deletions

View File

@ -208,8 +208,8 @@ from shares import addShare
from shares import removeShare
from shares import expireShares
from categories import setHashtagCategory
from utils import getActorLanguages
from utils import setActorLanguages
from languages import getActorLanguages
from languages import setActorLanguages
from utils import getContentFromPost
from utils import acctDir
from utils import getImageExtensionFromMimeType

View File

@ -655,7 +655,8 @@ def _readLocalBoxPost(session, nickname: str, domain: str,
pageNumber: int, index: int, boxJson: {},
systemLanguage: str,
screenreader: str, espeak,
translate: {}, yourActor: str) -> {}:
translate: {}, yourActor: str,
domainFull: str, personCache: {}) -> {}:
"""Reads a post from the given timeline
Returns the post json
"""
@ -691,7 +692,8 @@ def _readLocalBoxPost(session, nickname: str, domain: str,
YTReplacementDomain,
allowLocalNetworkAccess,
recentPostsCache, False,
systemLanguage)
systemLanguage,
domainFull, personCache)
if postJsonObject2:
if hasObjectDict(postJsonObject2):
if postJsonObject2['object'].get('attributedTo') and \
@ -1596,7 +1598,8 @@ def runDesktopClient(baseDir: str, proxyType: str, httpPrefix: str,
httpPrefix, baseDir, currTimeline,
pageNumber, postIndex, boxJson,
systemLanguage, screenreader,
espeak, translate, yourActor)
espeak, translate, yourActor,
domainFull, personCache)
print('')
sayStr = 'Press Enter to continue...'
sayStr2 = _highlightText(sayStr)
@ -2325,7 +2328,8 @@ def runDesktopClient(baseDir: str, proxyType: str, httpPrefix: str,
YTReplacementDomain,
allowLocalNetworkAccess,
recentPostsCache, False,
systemLanguage)
systemLanguage,
domainFull, personCache)
if postJsonObject2:
postJsonObject = postJsonObject2
if postJsonObject:

View File

@ -13,7 +13,7 @@ import datetime
import time
import random
from linked_data_sig import verifyJsonSignature
from utils import understoodPostLanguage
from languages import understoodPostLanguage
from utils import getContentFromPost
from utils import acctDir
from utils import removeDomainPort
@ -1367,6 +1367,7 @@ def _receiveAnnounce(recentPostsCache: {},
if debug:
print('DEBUG: Downloading announce post ' + messageJson['actor'] +
' -> ' + messageJson['object'])
domainFull = getFullDomain(domain, port)
postJsonObject = downloadAnnounce(session, baseDir,
httpPrefix,
nickname, domain,
@ -1375,7 +1376,8 @@ def _receiveAnnounce(recentPostsCache: {},
YTReplacementDomain,
allowLocalNetworkAccess,
recentPostsCache, debug,
systemLanguage)
systemLanguage,
domainFull, personCache)
if not postJsonObject:
notInOnion = True
if onionDomain:
@ -1600,7 +1602,9 @@ def _estimateNumberOfEmoji(content: str) -> int:
def _validPostContent(baseDir: str, nickname: str, domain: str,
messageJson: {}, maxMentions: int, maxEmoji: int,
allowLocalNetworkAccess: bool, debug: bool,
systemLanguage: str) -> bool:
systemLanguage: str,
httpPrefix: str, domainFull: str,
personCache: {}) -> bool:
"""Is the content of a received post valid?
Check for bad html
Check for hellthreads
@ -1670,7 +1674,9 @@ def _validPostContent(baseDir: str, nickname: str, domain: str,
return False
# check that the post is in a language suitable for this account
if not understoodPostLanguage(baseDir, nickname, domain,
messageJson, systemLanguage):
messageJson, systemLanguage,
httpPrefix, domainFull,
personCache):
return False
# check for filtered content
if isFiltered(baseDir, nickname, domain, contentStr):
@ -2405,10 +2411,12 @@ def _inboxAfterInitial(recentPostsCache: {}, maxRecentPosts: int,
nickname = handle.split('@')[0]
jsonObj = None
domainFull = getFullDomain(domain, port)
if _validPostContent(baseDir, nickname, domain,
postJsonObject, maxMentions, maxEmoji,
allowLocalNetworkAccess, debug,
systemLanguage):
systemLanguage, httpPrefix,
domainFull, personCache):
if postJsonObject.get('object'):
jsonObj = postJsonObject['object']
@ -2486,7 +2494,6 @@ def _inboxAfterInitial(recentPostsCache: {}, maxRecentPosts: int,
return False
# get the actor being replied to
domainFull = getFullDomain(domain, port)
actor = httpPrefix + '://' + domainFull + '/users/' + nickname
# create a reply notification file if needed
@ -2511,7 +2518,8 @@ def _inboxAfterInitial(recentPostsCache: {}, maxRecentPosts: int,
nickname, domain, postJsonObject,
translate, YTReplacementDomain,
allowLocalNetworkAccess,
recentPostsCache, debug, systemLanguage):
recentPostsCache, debug, systemLanguage,
domainFull, personCache):
# media index will be updated
updateIndexList.append('tlmedia')
if isBlogPost(postJsonObject):

124
languages.py 100644
View File

@ -0,0 +1,124 @@
__filename__ = "languages.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
__version__ = "1.2.0"
__maintainer__ = "Bob Mottram"
__email__ = "bob@freedombone.net"
__status__ = "Production"
__module_group__ = "Core"
import os
from utils import acctDir
from cache import getPersonFromCache
def _getActorLanguagesList(actorJson: {}) -> []:
"""Returns a list containing languages used by the given actor
"""
if not actorJson.get('attachment'):
return []
for propertyValue in actorJson['attachment']:
if not propertyValue.get('name'):
continue
if not propertyValue['name'].lower().startswith('languages'):
continue
if not propertyValue.get('type'):
continue
if not propertyValue.get('value'):
continue
if not isinstance(propertyValue['value'], list):
continue
if propertyValue['type'] != 'PropertyValue':
continue
return propertyValue['value']
return []
def getActorLanguages(actorJson: {}) -> str:
"""Returns a string containing languages used by the given actor
"""
langList = _getActorLanguagesList(actorJson)
if not langList:
return ''
languagesStr = ''
for lang in langList:
if languagesStr:
languagesStr += ' / ' + lang
else:
languagesStr = lang
return languagesStr
def setActorLanguages(baseDir: str, actorJson: {}, languagesStr: str) -> None:
"""Sets the languages used by the given actor
"""
separator = ','
if '/' in languagesStr:
separator = '/'
elif ';' in languagesStr:
separator = ';'
langList = languagesStr.lower().split(separator)
langList2 = []
for lang in langList:
lang = lang.strip()
languageFilename = baseDir + '/translations/' + lang + '.json'
if os.path.isfile(languageFilename):
langList2.append(lang)
# remove any existing value
propertyFound = None
for propertyValue in actorJson['attachment']:
if not propertyValue.get('name'):
continue
if not propertyValue.get('type'):
continue
if not propertyValue['name'].lower().startswith('languages'):
continue
propertyFound = propertyValue
break
if propertyFound:
actorJson['attachment'].remove(propertyFound)
if not langList2:
return
newLanguages = {
"name": "Languages",
"type": "PropertyValue",
"value": langList2
}
actorJson['attachment'].append(newLanguages)
def understoodPostLanguage(baseDir: str, nickname: str, domain: str,
messageJson: {}, systemLanguage: str,
httpPrefix: str, domainFull: str,
personCache: {}) -> bool:
"""Returns true if the post is written in a language
understood by this account
"""
msgObject = messageJson
if msgObject.get('object'):
if isinstance(msgObject['object'], dict):
msgObject = messageJson['object']
if not msgObject.get('contentMap'):
return True
if not isinstance(msgObject['contentMap'], dict):
return True
if msgObject['contentMap'].get(systemLanguage):
return True
actorFilename = acctDir(baseDir, nickname, domain)
if not os.path.isfile(actorFilename):
return False
personUrl = httpPrefix + '://' + domainFull + '/users/' + nickname
actorJson = getPersonFromCache(baseDir, personUrl, personCache, False)
if not actorJson:
print('WARN: unable to load actor to check languages ' + actorFilename)
return False
languagesUnderstood = _getActorLanguagesList(actorJson)
if not languagesUnderstood:
return True
for lang in languagesUnderstood:
if msgObject['contentMap'].get(lang):
return True
return False

View File

@ -391,7 +391,8 @@ def postMessageToOutbox(session, translate: {},
messageJson,
translate, YTReplacementDomain,
allowLocalNetworkAccess,
recentPostsCache, debug, systemLanguage):
recentPostsCache, debug, systemLanguage,
domainFull, personCache):
inboxUpdateIndex('tlmedia', baseDir,
postToNickname + '@' + domain,
savedFilename, debug)

View File

@ -31,7 +31,7 @@ from session import postImage
from webfinger import webfingerHandle
from httpsig import createSignedHeader
from siteactive import siteIsActive
from utils import understoodPostLanguage
from languages import understoodPostLanguage
from utils import getContentFromPost
from utils import removeDomainPort
from utils import getPortFromDomain
@ -2935,7 +2935,8 @@ def isImageMedia(session, baseDir: str, httpPrefix: str,
YTReplacementDomain: str,
allowLocalNetworkAccess: bool,
recentPostsCache: {}, debug: bool,
systemLanguage: str) -> bool:
systemLanguage: str,
domainFull: str, personCache: {}) -> bool:
"""Returns true if the given post has attached image media
"""
if postJsonObject['type'] == 'Announce':
@ -2946,7 +2947,8 @@ def isImageMedia(session, baseDir: str, httpPrefix: str,
YTReplacementDomain,
allowLocalNetworkAccess,
recentPostsCache, debug,
systemLanguage)
systemLanguage,
domainFull, personCache)
if postJsonAnnounce:
postJsonObject = postJsonAnnounce
if postJsonObject['type'] != 'Create':
@ -3900,7 +3902,8 @@ def downloadAnnounce(session, baseDir: str, httpPrefix: str,
translate: {}, YTReplacementDomain: str,
allowLocalNetworkAccess: bool,
recentPostsCache: {}, debug: bool,
systemLanguage: str) -> {}:
systemLanguage: str,
domainFull: str, personCache: {}) -> {}:
"""Download the post referenced by an announce
"""
if not postJsonObject.get('object'):
@ -4029,7 +4032,9 @@ def downloadAnnounce(session, baseDir: str, httpPrefix: str,
recentPostsCache)
return None
if not understoodPostLanguage(baseDir, nickname, domain,
announcedJson, systemLanguage):
announcedJson, systemLanguage,
httpPrefix, domainFull,
personCache):
return None
# Check the content of the announce
contentStr = announcedJson['content']

109
utils.py
View File

@ -2562,112 +2562,3 @@ def validUrlPrefix(url: str) -> bool:
if url.startswith(pre):
return True
return False
def _getActorLanguagesList(actorJson: {}) -> []:
"""Returns a list containing languages used by the given actor
"""
if not actorJson.get('attachment'):
return []
for propertyValue in actorJson['attachment']:
if not propertyValue.get('name'):
continue
if not propertyValue['name'].lower().startswith('languages'):
continue
if not propertyValue.get('type'):
continue
if not propertyValue.get('value'):
continue
if not isinstance(propertyValue['value'], list):
continue
if propertyValue['type'] != 'PropertyValue':
continue
return propertyValue['value']
return []
def getActorLanguages(actorJson: {}) -> str:
"""Returns a string containing languages used by the given actor
"""
langList = _getActorLanguagesList(actorJson)
if not langList:
return ''
languagesStr = ''
for lang in langList:
if languagesStr:
languagesStr += ' / ' + lang
else:
languagesStr = lang
return languagesStr
def setActorLanguages(baseDir: str, actorJson: {}, languagesStr: str) -> None:
"""Sets the languages used by the given actor
"""
separator = ','
if '/' in languagesStr:
separator = '/'
elif ';' in languagesStr:
separator = ';'
langList = languagesStr.lower().split(separator)
langList2 = []
for lang in langList:
lang = lang.strip()
languageFilename = baseDir + '/translations/' + lang + '.json'
if os.path.isfile(languageFilename):
langList2.append(lang)
# remove any existing value
propertyFound = None
for propertyValue in actorJson['attachment']:
if not propertyValue.get('name'):
continue
if not propertyValue.get('type'):
continue
if not propertyValue['name'].lower().startswith('languages'):
continue
propertyFound = propertyValue
break
if propertyFound:
actorJson['attachment'].remove(propertyFound)
if not langList2:
return
newLanguages = {
"name": "Languages",
"type": "PropertyValue",
"value": langList2
}
actorJson['attachment'].append(newLanguages)
def understoodPostLanguage(baseDir: str, nickname: str, domain: str,
messageJson: {}, systemLanguage: str) -> bool:
"""Returns true if the post is written in a language
understood by this account
"""
msgObject = messageJson
if msgObject.get('object'):
if isinstance(msgObject['object'], dict):
msgObject = messageJson['object']
if not msgObject.get('contentMap'):
return True
if not isinstance(msgObject['contentMap'], dict):
return True
if msgObject['contentMap'].get(systemLanguage):
return True
actorFilename = acctDir(baseDir, nickname, domain)
if not os.path.isfile(actorFilename):
return False
actorJson = loadJson(actorFilename)
if not actorJson:
print('WARN: unable to load actor to check languages ' + actorFilename)
return False
languagesUnderstood = _getActorLanguagesList(actorJson)
if not languagesUnderstood:
return True
for lang in languagesUnderstood:
if msgObject['contentMap'].get(lang):
return True
return False

View File

@ -1310,7 +1310,8 @@ def individualPostAsHtml(allowDownloads: bool,
YTReplacementDomain,
allowLocalNetworkAccess,
recentPostsCache, False,
systemLanguage)
systemLanguage,
domainFull, personCache)
if not postJsonAnnounce:
# if the announce could not be downloaded then mark it as rejected
rejectPostId(baseDir, nickname, domain, postJsonObject['id'],

View File

@ -23,7 +23,7 @@ from utils import loadJson
from utils import getConfigParam
from utils import getImageFormats
from utils import acctDir
from utils import getActorLanguages
from languages import getActorLanguages
from skills import getSkills
from theme import getThemesList
from person import personBoxJson