Handling of understood languages prior to automatic translation

main
Bob Mottram 2021-07-20 14:33:27 +01:00
parent bb3dee7533
commit bb3de9e173
14 changed files with 116 additions and 78 deletions

40
blog.py
View File

@ -16,6 +16,8 @@ from webapp_utils import htmlHeaderWithBlogMarkup
from webapp_utils import htmlFooter from webapp_utils import htmlFooter
from webapp_utils import getPostAttachmentsAsHtml from webapp_utils import getPostAttachmentsAsHtml
from webapp_media import addEmbeddedElements from webapp_media import addEmbeddedElements
from utils import getActorLanguagesList
from utils import getBaseContentFromPost
from utils import getContentFromPost from utils import getContentFromPost
from utils import isAccountDir from utils import isAccountDir
from utils import removeHtml from utils import removeHtml
@ -32,6 +34,7 @@ from utils import acctDir
from posts import createBlogsTimeline from posts import createBlogsTimeline
from newswire import rss2Header from newswire import rss2Header
from newswire import rss2Footer from newswire import rss2Footer
from cache import getPersonFromCache
def _noOfBlogReplies(baseDir: str, httpPrefix: str, translate: {}, def _noOfBlogReplies(baseDir: str, httpPrefix: str, translate: {},
@ -166,6 +169,7 @@ def _htmlBlogPostContent(authorized: bool,
handle: str, restrictToDomain: bool, handle: str, restrictToDomain: bool,
peertubeInstances: [], peertubeInstances: [],
systemLanguage: str, systemLanguage: str,
personCache: {},
blogSeparator: str = '<hr>') -> str: blogSeparator: str = '<hr>') -> str:
"""Returns the content for a single blog post """Returns the content for a single blog post
""" """
@ -237,7 +241,15 @@ def _htmlBlogPostContent(authorized: bool,
if attachmentStr: if attachmentStr:
blogStr += '<br><center>' + attachmentStr + '</center>' blogStr += '<br><center>' + attachmentStr + '</center>'
jsonContent = getContentFromPost(postJsonObject, systemLanguage) personUrl = \
httpPrefix + '://' + domainFull + '/users/' + nickname
actorJson = \
getPersonFromCache(baseDir, personUrl, personCache, False)
languagesUnderstood = []
if actorJson:
languagesUnderstood = getActorLanguagesList(actorJson)
jsonContent = getContentFromPost(postJsonObject, systemLanguage,
languagesUnderstood)
if jsonContent: if jsonContent:
contentStr = addEmbeddedElements(translate, jsonContent, contentStr = addEmbeddedElements(translate, jsonContent,
peertubeInstances) peertubeInstances)
@ -330,7 +342,8 @@ def _htmlBlogPostRSS2(authorized: bool,
pubDate = datetime.strptime(published, "%Y-%m-%dT%H:%M:%SZ") pubDate = datetime.strptime(published, "%Y-%m-%dT%H:%M:%SZ")
titleStr = postJsonObject['object']['summary'] titleStr = postJsonObject['object']['summary']
rssDateStr = pubDate.strftime("%a, %d %b %Y %H:%M:%S UT") rssDateStr = pubDate.strftime("%a, %d %b %Y %H:%M:%S UT")
content = getContentFromPost(postJsonObject, systemLanguage) content = \
getBaseContentFromPost(postJsonObject, systemLanguage)
description = firstParagraphFromString(content) description = firstParagraphFromString(content)
rssStr = ' <item>' rssStr = ' <item>'
rssStr += ' <title>' + titleStr + '</title>' rssStr += ' <title>' + titleStr + '</title>'
@ -362,7 +375,8 @@ def _htmlBlogPostRSS3(authorized: bool,
pubDate = datetime.strptime(published, "%Y-%m-%dT%H:%M:%SZ") pubDate = datetime.strptime(published, "%Y-%m-%dT%H:%M:%SZ")
titleStr = postJsonObject['object']['summary'] titleStr = postJsonObject['object']['summary']
rssDateStr = pubDate.strftime("%a, %d %b %Y %H:%M:%S UT") rssDateStr = pubDate.strftime("%a, %d %b %Y %H:%M:%S UT")
content = getContentFromPost(postJsonObject, systemLanguage) content = \
getBaseContentFromPost(postJsonObject, systemLanguage)
description = firstParagraphFromString(content) description = firstParagraphFromString(content)
rssStr = 'title: ' + titleStr + '\n' rssStr = 'title: ' + titleStr + '\n'
rssStr += 'link: ' + messageLink + '\n' rssStr += 'link: ' + messageLink + '\n'
@ -386,7 +400,7 @@ def _htmlBlogRemoveCwButton(blogStr: str, translate: {}) -> str:
def _getSnippetFromBlogContent(postJsonObject: {}, systemLanguage: str) -> str: def _getSnippetFromBlogContent(postJsonObject: {}, systemLanguage: str) -> str:
"""Returns a snippet of text from the blog post as a preview """Returns a snippet of text from the blog post as a preview
""" """
content = getContentFromPost(postJsonObject, systemLanguage) content = getBaseContentFromPost(postJsonObject, systemLanguage)
if '<p>' in content: if '<p>' in content:
content = content.split('<p>', 1)[1] content = content.split('<p>', 1)[1]
if '</p>' in content: if '</p>' in content:
@ -404,7 +418,7 @@ def htmlBlogPost(authorized: bool,
nickname: str, domain: str, domainFull: str, nickname: str, domain: str, domainFull: str,
postJsonObject: {}, postJsonObject: {},
peertubeInstances: [], peertubeInstances: [],
systemLanguage: str) -> str: systemLanguage: str, personCache: {}) -> str:
"""Returns a html blog post """Returns a html blog post
""" """
blogStr = '' blogStr = ''
@ -428,7 +442,8 @@ def htmlBlogPost(authorized: bool,
nickname, domain, nickname, domain,
domainFull, postJsonObject, domainFull, postJsonObject,
None, False, None, False,
peertubeInstances, systemLanguage) peertubeInstances, systemLanguage,
personCache)
# show rss links # show rss links
blogStr += '<p class="rssfeed">' blogStr += '<p class="rssfeed">'
@ -456,7 +471,8 @@ def htmlBlogPage(authorized: bool, session,
baseDir: str, httpPrefix: str, translate: {}, baseDir: str, httpPrefix: str, translate: {},
nickname: str, domain: str, port: int, nickname: str, domain: str, port: int,
noOfItems: int, pageNumber: int, noOfItems: int, pageNumber: int,
peertubeInstances: [], systemLanguage: str) -> str: peertubeInstances: [], systemLanguage: str,
personCache: {}) -> str:
"""Returns a html blog page containing posts """Returns a html blog page containing posts
""" """
if ' ' in nickname or '@' in nickname or \ if ' ' in nickname or '@' in nickname or \
@ -519,7 +535,8 @@ def htmlBlogPage(authorized: bool, session,
domainFull, item, domainFull, item,
None, True, None, True,
peertubeInstances, peertubeInstances,
systemLanguage) systemLanguage,
personCache)
if len(timelineJson['orderedItems']) >= noOfItems: if len(timelineJson['orderedItems']) >= noOfItems:
blogStr += navigateStr blogStr += navigateStr
@ -677,7 +694,8 @@ def htmlBlogView(authorized: bool,
session, baseDir: str, httpPrefix: str, session, baseDir: str, httpPrefix: str,
translate: {}, domain: str, port: int, translate: {}, domain: str, port: int,
noOfItems: int, noOfItems: int,
peertubeInstances: [], systemLanguage: str) -> str: peertubeInstances: [], systemLanguage: str,
personCache: {}) -> str:
"""Show the blog main page """Show the blog main page
""" """
blogStr = '' blogStr = ''
@ -696,7 +714,7 @@ def htmlBlogView(authorized: bool,
baseDir, httpPrefix, translate, baseDir, httpPrefix, translate,
nickname, domain, port, nickname, domain, port,
noOfItems, 1, peertubeInstances, noOfItems, 1, peertubeInstances,
systemLanguage) systemLanguage, personCache)
domainFull = getFullDomain(domain, port) domainFull = getFullDomain(domain, port)
@ -840,7 +858,7 @@ def htmlEditBlog(mediaInstance: bool, translate: {},
placeholderMessage + '</label>' placeholderMessage + '</label>'
messageBoxHeight = 800 messageBoxHeight = 800
contentStr = getContentFromPost(postJsonObject, systemLanguage) contentStr = getBaseContentFromPost(postJsonObject, systemLanguage)
contentStr = contentStr.replace('<p>', '').replace('</p>', '\n') contentStr = contentStr.replace('<p>', '').replace('</p>', '\n')
editBlogForm += \ editBlogForm += \

View File

@ -210,7 +210,7 @@ from shares import expireShares
from categories import setHashtagCategory from categories import setHashtagCategory
from languages import getActorLanguages from languages import getActorLanguages
from languages import setActorLanguages from languages import setActorLanguages
from utils import getContentFromPost from utils import getBaseContentFromPost
from utils import acctDir from utils import acctDir
from utils import getImageExtensionFromMimeType from utils import getImageExtensionFromMimeType
from utils import getImageMimeType from utils import getImageMimeType
@ -9825,7 +9825,8 @@ class PubServer(BaseHTTPRequestHandler):
domain, port, domain, port,
maxPostsInBlogsFeed, pageNumber, maxPostsInBlogsFeed, pageNumber,
self.server.peertubeInstances, self.server.peertubeInstances,
self.server.systemLanguage) self.server.systemLanguage,
self.server.personCache)
if msg is not None: if msg is not None:
msg = msg.encode('utf-8') msg = msg.encode('utf-8')
msglen = len(msg) msglen = len(msg)
@ -10955,7 +10956,8 @@ class PubServer(BaseHTTPRequestHandler):
self.server.port, self.server.port,
maxPostsInBlogsFeed, maxPostsInBlogsFeed,
self.server.peertubeInstances, self.server.peertubeInstances,
self.server.systemLanguage) self.server.systemLanguage,
self.server.personCache)
if msg is not None: if msg is not None:
msg = msg.encode('utf-8') msg = msg.encode('utf-8')
msglen = len(msg) msglen = len(msg)
@ -11054,7 +11056,8 @@ class PubServer(BaseHTTPRequestHandler):
self.server.domainFull, self.server.domainFull,
postJsonObject, postJsonObject,
self.server.peertubeInstances, self.server.peertubeInstances,
self.server.systemLanguage) self.server.systemLanguage,
self.server.personCache)
if msg is not None: if msg is not None:
msg = msg.encode('utf-8') msg = msg.encode('utf-8')
msglen = len(msg) msglen = len(msg)
@ -13186,8 +13189,8 @@ class PubServer(BaseHTTPRequestHandler):
return 1 return 1
if pinToProfile: if pinToProfile:
contentStr = \ contentStr = \
getContentFromPost(messageJson, getBaseContentFromPost(messageJson,
self.server.systemLanguage) self.server.systemLanguage)
pinPost(self.server.baseDir, pinPost(self.server.baseDir,
nickname, self.server.domain, contentStr) nickname, self.server.domain, contentStr)
return 1 return 1

View File

@ -16,7 +16,7 @@ import webbrowser
import urllib.parse import urllib.parse
from pathlib import Path from pathlib import Path
from random import randint from random import randint
from utils import getContentFromPost from utils import getBaseContentFromPost
from utils import hasObjectDict from utils import hasObjectDict
from utils import getFullDomain from utils import getFullDomain
from utils import isDM from utils import isDM
@ -700,7 +700,7 @@ def _readLocalBoxPost(session, nickname: str, domain: str,
postJsonObject2['object'].get('content'): postJsonObject2['object'].get('content'):
attributedTo = postJsonObject2['object']['attributedTo'] attributedTo = postJsonObject2['object']['attributedTo']
content = \ content = \
getContentFromPost(postJsonObject2, systemLanguage) getBaseContentFromPost(postJsonObject2, systemLanguage)
if isinstance(attributedTo, str) and content: if isinstance(attributedTo, str) and content:
actor = attributedTo actor = attributedTo
nameStr += ' ' + translate['announces'] + ' ' + \ nameStr += ' ' + translate['announces'] + ' ' + \
@ -725,7 +725,7 @@ def _readLocalBoxPost(session, nickname: str, domain: str,
attributedTo = postJsonObject['object']['attributedTo'] attributedTo = postJsonObject['object']['attributedTo']
if not attributedTo: if not attributedTo:
return {} return {}
content = getContentFromPost(postJsonObject, systemLanguage) content = getBaseContentFromPost(postJsonObject, systemLanguage)
if not isinstance(attributedTo, str) or \ if not isinstance(attributedTo, str) or \
not isinstance(content, str): not isinstance(content, str):
return {} return {}
@ -1048,7 +1048,7 @@ def _desktopShowBox(indent: str,
published = _formatPublished(postJsonObject['published']) published = _formatPublished(postJsonObject['published'])
contentStr = getContentFromPost(postJsonObject, systemLanguage) contentStr = getBaseContentFromPost(postJsonObject, systemLanguage)
content = _textOnlyContent(contentStr) content = _textOnlyContent(contentStr)
if boxName != 'dm': if boxName != 'dm':
if isDM(postJsonObject): if isDM(postJsonObject):
@ -2334,7 +2334,7 @@ def runDesktopClient(baseDir: str, proxyType: str, httpPrefix: str,
postJsonObject = postJsonObject2 postJsonObject = postJsonObject2
if postJsonObject: if postJsonObject:
content = \ content = \
getContentFromPost(postJsonObject, systemLanguage) getBaseContentFromPost(postJsonObject, systemLanguage)
messageStr, detectedLinks = \ messageStr, detectedLinks = \
speakableText(baseDir, content, translate) speakableText(baseDir, content, translate)
linkOpened = False linkOpened = False
@ -2390,8 +2390,8 @@ def runDesktopClient(baseDir: str, proxyType: str, httpPrefix: str,
print('') print('')
if postJsonObject['object'].get('summary'): if postJsonObject['object'].get('summary'):
print(postJsonObject['object']['summary']) print(postJsonObject['object']['summary'])
contentStr = getContentFromPost(postJsonObject, contentStr = getBaseContentFromPost(postJsonObject,
systemLanguage) systemLanguage)
print(contentStr) print(contentStr)
print('') print('')
sayStr = 'Confirm delete, yes or no?' sayStr = 'Confirm delete, yes or no?'

View File

@ -14,7 +14,7 @@ import time
import random import random
from linked_data_sig import verifyJsonSignature from linked_data_sig import verifyJsonSignature
from languages import understoodPostLanguage from languages import understoodPostLanguage
from utils import getContentFromPost from utils import getBaseContentFromPost
from utils import acctDir from utils import acctDir
from utils import removeDomainPort from utils import removeDomainPort
from utils import getPortFromDomain from utils import getPortFromDomain
@ -353,7 +353,7 @@ def savePostToInboxQueue(baseDir: str, httpPrefix: str,
httpHeaders: {}, httpHeaders: {},
postPath: str, debug: bool, postPath: str, debug: bool,
blockedCache: [], systemLanguage: str) -> str: blockedCache: [], systemLanguage: str) -> str:
"""Saves the give json to the inbox queue for the person """Saves the given json to the inbox queue for the person
keyId specifies the actor sending the post keyId specifies the actor sending the post
""" """
if len(messageBytes) > 10240: if len(messageBytes) > 10240:
@ -416,7 +416,7 @@ def savePostToInboxQueue(baseDir: str, httpPrefix: str,
replyNickname + '@' + replyDomain) replyNickname + '@' + replyDomain)
return None return None
if postJsonObject['object'].get('content'): if postJsonObject['object'].get('content'):
contentStr = getContentFromPost(postJsonObject, systemLanguage) contentStr = getBaseContentFromPost(postJsonObject, systemLanguage)
if contentStr: if contentStr:
if isFiltered(baseDir, nickname, domain, contentStr): if isFiltered(baseDir, nickname, domain, contentStr):
if debug: if debug:
@ -1649,7 +1649,7 @@ def _validPostContent(baseDir: str, nickname: str, domain: str,
messageJson['object']['content']): messageJson['object']['content']):
return True return True
contentStr = getContentFromPost(messageJson, systemLanguage) contentStr = getBaseContentFromPost(messageJson, systemLanguage)
if dangerousMarkup(contentStr, allowLocalNetworkAccess): if dangerousMarkup(contentStr, allowLocalNetworkAccess):
if messageJson['object'].get('id'): if messageJson['object'].get('id'):
print('REJECT ARBITRARY HTML: ' + messageJson['object']['id']) print('REJECT ARBITRARY HTML: ' + messageJson['object']['id'])
@ -1951,7 +1951,7 @@ def _sendToGroupMembers(session, baseDir: str, handle: str, port: int,
sendingActorDomainFull = \ sendingActorDomainFull = \
getFullDomain(sendingActorDomain, sendingActorPort) getFullDomain(sendingActorDomain, sendingActorPort)
senderStr = '@' + sendingActorNickname + '@' + sendingActorDomainFull senderStr = '@' + sendingActorNickname + '@' + sendingActorDomainFull
contentStr = getContentFromPost(postJsonObject, systemLanguage) contentStr = getBaseContentFromPost(postJsonObject, systemLanguage)
if not contentStr.startswith(senderStr): if not contentStr.startswith(senderStr):
postJsonObject['object']['content'] = \ postJsonObject['object']['content'] = \
senderStr + ' ' + contentStr senderStr + ' ' + contentStr

View File

@ -10,41 +10,17 @@ __module_group__ = "Core"
import os import os
import json import json
from urllib import request, parse from urllib import request, parse
from utils import getActorLanguagesList
from utils import removeHtml from utils import removeHtml
from utils import acctDir
from utils import hasObjectDict from utils import hasObjectDict
from utils import getConfigParam from utils import getConfigParam
from cache import getPersonFromCache from cache import getPersonFromCache
def _getActorLanguagesList(actorJson: {}) -> []:
"""Returns a list containing languages used by the given actor
"""
if not actorJson.get('attachment'):
return []
for propertyValue in actorJson['attachment']:
if not propertyValue.get('name'):
continue
if not propertyValue['name'].lower().startswith('languages'):
continue
if not propertyValue.get('type'):
continue
if not propertyValue.get('value'):
continue
if not isinstance(propertyValue['value'], list):
continue
if propertyValue['type'] != 'PropertyValue':
continue
langList = propertyValue['value']
langList.sort()
return langList
return []
def getActorLanguages(actorJson: {}) -> str: def getActorLanguages(actorJson: {}) -> str:
"""Returns a string containing languages used by the given actor """Returns a string containing languages used by the given actor
""" """
langList = _getActorLanguagesList(actorJson) langList = getActorLanguagesList(actorJson)
if not langList: if not langList:
return '' return ''
languagesStr = '' languagesStr = ''
@ -121,7 +97,7 @@ def understoodPostLanguage(baseDir: str, nickname: str, domain: str,
if not actorJson: if not actorJson:
print('WARN: unable to load actor to check languages ' + personUrl) print('WARN: unable to load actor to check languages ' + personUrl)
return False return False
languagesUnderstood = _getActorLanguagesList(actorJson) languagesUnderstood = getActorLanguagesList(actorJson)
if not languagesUnderstood: if not languagesUnderstood:
return True return True
for lang in languagesUnderstood: for lang in languagesUnderstood:

View File

@ -13,7 +13,7 @@ import subprocess
from random import randint from random import randint
from hashlib import sha1 from hashlib import sha1
from auth import createPassword from auth import createPassword
from utils import getContentFromPost from utils import getBaseContentFromPost
from utils import getFullDomain from utils import getFullDomain
from utils import getImageExtensions from utils import getImageExtensions
from utils import getVideoExtensions from utils import getVideoExtensions
@ -38,12 +38,13 @@ def replaceYouTube(postJsonObject: {}, replacementDomain: str,
return return
if not postJsonObject['object'].get('content'): if not postJsonObject['object'].get('content'):
return return
contentStr = getContentFromPost(postJsonObject, systemLanguage) contentStr = getBaseContentFromPost(postJsonObject, systemLanguage)
if 'www.youtube.com' not in contentStr: if 'www.youtube.com' not in contentStr:
return return
contentStr = contentStr.replace('www.youtube.com', replacementDomain) contentStr = contentStr.replace('www.youtube.com', replacementDomain)
postJsonObject['object']['content'] = contentStr postJsonObject['object']['content'] = contentStr
postJsonObject['object']['contentMap'][systemLanguage] = contentStr if postJsonObject['object'].get('contentMap'):
postJsonObject['object']['contentMap'][systemLanguage] = contentStr
def _removeMetaData(imageFilename: str, outputFilename: str) -> None: def _removeMetaData(imageFilename: str, outputFilename: str) -> None:

View File

@ -25,7 +25,7 @@ from newswire import getDictFromNewswire
from posts import createNewsPost from posts import createNewsPost
from posts import archivePostsForPerson from posts import archivePostsForPerson
from content import validHashTag from content import validHashTag
from utils import getContentFromPost from utils import getBaseContentFromPost
from utils import removeHtml from utils import removeHtml
from utils import getFullDomain from utils import getFullDomain
from utils import loadJson from utils import loadJson
@ -314,7 +314,7 @@ def _hashtagAdd(baseDir: str, httpPrefix: str, domainFull: str,
hashtagHtml = \ hashtagHtml = \
" <a href=\"" + hashtagUrl + "\" class=\"addedHashtag\" " + \ " <a href=\"" + hashtagUrl + "\" class=\"addedHashtag\" " + \
"rel=\"tag\">#<span>" + htId + "</span></a>" "rel=\"tag\">#<span>" + htId + "</span></a>"
content = getContentFromPost(postJsonObject, systemLanguage) content = getBaseContentFromPost(postJsonObject, systemLanguage)
if hashtagHtml in content: if hashtagHtml in content:
return return
@ -344,7 +344,7 @@ def _hashtagRemove(httpPrefix: str, domainFull: str, postJsonObject: {},
hashtagHtml = \ hashtagHtml = \
"<a href=\"" + hashtagUrl + "\" class=\"addedHashtag\" " + \ "<a href=\"" + hashtagUrl + "\" class=\"addedHashtag\" " + \
"rel=\"tag\">#<span>" + htId + "</span></a>" "rel=\"tag\">#<span>" + htId + "</span></a>"
content = getContentFromPost(postJsonObject, systemLanguage) content = getBaseContentFromPost(postJsonObject, systemLanguage)
if hashtagHtml in content: if hashtagHtml in content:
content = content.replace(hashtagHtml, '').replace(' ', ' ') content = content.replace(hashtagHtml, '').replace(' ', ' ')
postJsonObject['object']['content'] = content postJsonObject['object']['content'] = content
@ -385,7 +385,7 @@ def _newswireHashtagProcessing(session, baseDir: str, postJsonObject: {},
# get the full text content of the post # get the full text content of the post
content = '' content = ''
if postJsonObject['object'].get('content'): if postJsonObject['object'].get('content'):
content += getContentFromPost(postJsonObject, systemLanguage) content += getBaseContentFromPost(postJsonObject, systemLanguage)
if postJsonObject['object'].get('summary'): if postJsonObject['object'].get('summary'):
content += ' ' + postJsonObject['object']['summary'] content += ' ' + postJsonObject['object']['summary']
content = content.lower() content = content.lower()
@ -667,7 +667,7 @@ def _convertRSStoActivityPub(baseDir: str, httpPrefix: str,
"\" class=\"addedHashtag\" " + \ "\" class=\"addedHashtag\" " + \
"rel=\"tag\">#<span>" + \ "rel=\"tag\">#<span>" + \
htId + "</span></a>" htId + "</span></a>"
content = getContentFromPost(blog, systemLanguage) content = getBaseContentFromPost(blog, systemLanguage)
if hashtagHtml not in content: if hashtagHtml not in content:
if content.endswith('</p>'): if content.endswith('</p>'):
content = \ content = \

View File

@ -18,7 +18,7 @@ from datetime import timezone
from collections import OrderedDict from collections import OrderedDict
from utils import validPostDate from utils import validPostDate
from categories import setHashtagCategory from categories import setHashtagCategory
from utils import getContentFromPost from utils import getBaseContentFromPost
from utils import hasObjectDict from utils import hasObjectDict
from utils import firstParagraphFromString from utils import firstParagraphFromString
from utils import isPublicPost from utils import isPublicPost
@ -963,7 +963,7 @@ def _addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str,
if os.path.isfile(fullPostFilename + '.votes'): if os.path.isfile(fullPostFilename + '.votes'):
votes = loadJson(fullPostFilename + '.votes') votes = loadJson(fullPostFilename + '.votes')
content = \ content = \
getContentFromPost(postJsonObject, systemLanguage) getBaseContentFromPost(postJsonObject, systemLanguage)
description = firstParagraphFromString(content) description = firstParagraphFromString(content)
description = removeHtml(description) description = removeHtml(description)
tagsFromPost = _getHashtagsFromPost(postJsonObject) tagsFromPost = _getHashtagsFromPost(postJsonObject)

View File

@ -16,7 +16,7 @@ from posts import outboxMessageCreateWrap
from posts import savePostToBox from posts import savePostToBox
from posts import sendToFollowersThread from posts import sendToFollowersThread
from posts import sendToNamedAddresses from posts import sendToNamedAddresses
from utils import getContentFromPost from utils import getBaseContentFromPost
from utils import hasObjectDict from utils import hasObjectDict
from utils import getLocalNetworkAddresses from utils import getLocalNetworkAddresses
from utils import getFullDomain from utils import getFullDomain
@ -213,7 +213,7 @@ def postMessageToOutbox(session, translate: {},
# check that the outgoing post doesn't contain any markup # check that the outgoing post doesn't contain any markup
# which can be used to implement exploits # which can be used to implement exploits
if hasObjectDict(messageJson): if hasObjectDict(messageJson):
contentStr = getContentFromPost(messageJson, systemLanguage) contentStr = getBaseContentFromPost(messageJson, systemLanguage)
if contentStr: if contentStr:
if dangerousMarkup(contentStr, allowLocalNetworkAccess): if dangerousMarkup(contentStr, allowLocalNetworkAccess):
print('POST to outbox contains dangerous markup: ' + print('POST to outbox contains dangerous markup: ' +

View File

@ -32,7 +32,7 @@ from webfinger import webfingerHandle
from httpsig import createSignedHeader from httpsig import createSignedHeader
from siteactive import siteIsActive from siteactive import siteIsActive
from languages import understoodPostLanguage from languages import understoodPostLanguage
from utils import getContentFromPost from utils import getBaseContentFromPost
from utils import removeDomainPort from utils import removeDomainPort
from utils import getPortFromDomain from utils import getPortFromDomain
from utils import hasObjectDict from utils import hasObjectDict
@ -387,7 +387,7 @@ def _getPosts(session, outboxUrl: str, maxPosts: int,
if not isPublic: if not isPublic:
continue continue
content = getContentFromPost(item, systemLanguage) content = getBaseContentFromPost(item, systemLanguage)
content = content.replace('&apos;', "'") content = content.replace('&apos;', "'")
mentions = [] mentions = []
@ -565,7 +565,7 @@ def getPostDomains(session, outboxUrl: str, maxPosts: int,
break break
if not hasObjectDict(item): if not hasObjectDict(item):
continue continue
contentStr = getContentFromPost(item, systemLanguage) contentStr = getBaseContentFromPost(item, systemLanguage)
if contentStr: if contentStr:
_updateWordFrequency(contentStr, wordFrequency) _updateWordFrequency(contentStr, wordFrequency)
if item['object'].get('inReplyTo'): if item['object'].get('inReplyTo'):

View File

@ -28,7 +28,32 @@ invalidCharacters = (
) )
def getContentFromPost(postJsonObject: {}, systemLanguage: str) -> str: def getActorLanguagesList(actorJson: {}) -> []:
"""Returns a list containing languages used by the given actor
"""
if not actorJson.get('attachment'):
return []
for propertyValue in actorJson['attachment']:
if not propertyValue.get('name'):
continue
if not propertyValue['name'].lower().startswith('languages'):
continue
if not propertyValue.get('type'):
continue
if not propertyValue.get('value'):
continue
if not isinstance(propertyValue['value'], list):
continue
if propertyValue['type'] != 'PropertyValue':
continue
langList = propertyValue['value']
langList.sort()
return langList
return []
def getContentFromPost(postJsonObject: {}, systemLanguage: str,
languagesUnderstood: []) -> str:
"""Returns the content from the post in the given language """Returns the content from the post in the given language
including searching for a matching entry within contentMap including searching for a matching entry within contentMap
""" """
@ -43,6 +68,12 @@ def getContentFromPost(postJsonObject: {}, systemLanguage: str) -> str:
if thisPostJson['contentMap'].get(systemLanguage): if thisPostJson['contentMap'].get(systemLanguage):
if isinstance(thisPostJson['contentMap'][systemLanguage], str): if isinstance(thisPostJson['contentMap'][systemLanguage], str):
return thisPostJson['contentMap'][systemLanguage] return thisPostJson['contentMap'][systemLanguage]
else:
# is there a contentMap entry for one of
# the understood languages?
for lang in languagesUnderstood:
if thisPostJson['contentMap'].get(lang):
return thisPostJson['contentMap'][lang]
else: else:
if isinstance(thisPostJson['content'], str): if isinstance(thisPostJson['content'], str):
content = thisPostJson['content'] content = thisPostJson['content']

View File

@ -11,7 +11,7 @@ import os
from datetime import datetime from datetime import datetime
from content import removeLongWords from content import removeLongWords
from content import limitRepeatedWords from content import limitRepeatedWords
from utils import getContentFromPost from utils import getBaseContentFromPost
from utils import removeHtml from utils import removeHtml
from utils import locatePost from utils import locatePost
from utils import loadJson from utils import loadJson
@ -698,7 +698,7 @@ def htmlEditNewsPost(cssCache: {}, translate: {}, baseDir: str, path: str,
' <input type="text" name="newsPostTitle" value="' + \ ' <input type="text" name="newsPostTitle" value="' + \
newsPostTitle + '"><br>\n' newsPostTitle + '"><br>\n'
newsPostContent = getContentFromPost(postJsonObject, systemLanguage) newsPostContent = getBaseContentFromPost(postJsonObject, systemLanguage)
editNewsPostForm += \ editNewsPostForm += \
' <textarea id="message" name="editedNewsPost" ' + \ ' <textarea id="message" name="editedNewsPost" ' + \
'style="height:600px" spellcheck="true">' + \ 'style="height:600px" spellcheck="true">' + \

View File

@ -22,8 +22,8 @@ from posts import postIsMuted
from posts import getPersonBox from posts import getPersonBox
from posts import downloadAnnounce from posts import downloadAnnounce
from posts import populateRepliesJson from posts import populateRepliesJson
from utils import getActorLanguagesList
from utils import getBaseContentFromPost from utils import getBaseContentFromPost
from utils import getContentFromPost
from utils import hasObjectDict from utils import hasObjectDict
from utils import updateAnnounceCollection from utils import updateAnnounceCollection
from utils import isPGPEncrypted from utils import isPGPEncrypted
@ -1592,7 +1592,16 @@ def individualPostAsHtml(allowDownloads: bool,
postJsonObject['object']['contentMap'][systemLanguage] = \ postJsonObject['object']['contentMap'][systemLanguage] = \
postJsonObject['object']['content'] postJsonObject['object']['content']
contentStr = getContentFromPost(postJsonObject, systemLanguage) domainFull = getFullDomain(domain, port)
personUrl = \
httpPrefix + '://' + domainFull + '/users/' + nickname
actorJson = \
getPersonFromCache(baseDir, personUrl, personCache, False)
languagesUnderstood = []
if actorJson:
languagesUnderstood = getActorLanguagesList(actorJson)
contentStr = getBaseContentFromPost(postJsonObject, systemLanguage,
languagesUnderstood)
if not contentStr: if not contentStr:
contentStr = \ contentStr = \
autoTranslatePost(baseDir, postJsonObject, autoTranslatePost(baseDir, postJsonObject,

View File

@ -11,7 +11,7 @@ import os
from shutil import copyfile from shutil import copyfile
import urllib.parse import urllib.parse
from datetime import datetime from datetime import datetime
from utils import getContentFromPost from utils import getBaseContentFromPost
from utils import isAccountDir from utils import isAccountDir
from utils import getConfigParam from utils import getConfigParam
from utils import getFullDomain from utils import getFullDomain
@ -904,7 +904,7 @@ def rssHashtagSearch(nickname: str, domain: str, port: int,
postJsonObject['object']['summary'] + \ postJsonObject['object']['summary'] + \
'</title>' '</title>'
description = \ description = \
getContentFromPost(postJsonObject, systemLanguage) getBaseContentFromPost(postJsonObject, systemLanguage)
description = firstParagraphFromString(description) description = firstParagraphFromString(description)
hashtagFeed += \ hashtagFeed += \
' <description>' + description + '</description>' ' <description>' + description + '</description>'