mirror of https://gitlab.com/bashrc2/epicyon
Handling of understood languages prior to automatic translation
parent
bb3dee7533
commit
bb3de9e173
40
blog.py
40
blog.py
|
@ -16,6 +16,8 @@ from webapp_utils import htmlHeaderWithBlogMarkup
|
|||
from webapp_utils import htmlFooter
|
||||
from webapp_utils import getPostAttachmentsAsHtml
|
||||
from webapp_media import addEmbeddedElements
|
||||
from utils import getActorLanguagesList
|
||||
from utils import getBaseContentFromPost
|
||||
from utils import getContentFromPost
|
||||
from utils import isAccountDir
|
||||
from utils import removeHtml
|
||||
|
@ -32,6 +34,7 @@ from utils import acctDir
|
|||
from posts import createBlogsTimeline
|
||||
from newswire import rss2Header
|
||||
from newswire import rss2Footer
|
||||
from cache import getPersonFromCache
|
||||
|
||||
|
||||
def _noOfBlogReplies(baseDir: str, httpPrefix: str, translate: {},
|
||||
|
@ -166,6 +169,7 @@ def _htmlBlogPostContent(authorized: bool,
|
|||
handle: str, restrictToDomain: bool,
|
||||
peertubeInstances: [],
|
||||
systemLanguage: str,
|
||||
personCache: {},
|
||||
blogSeparator: str = '<hr>') -> str:
|
||||
"""Returns the content for a single blog post
|
||||
"""
|
||||
|
@ -237,7 +241,15 @@ def _htmlBlogPostContent(authorized: bool,
|
|||
if attachmentStr:
|
||||
blogStr += '<br><center>' + attachmentStr + '</center>'
|
||||
|
||||
jsonContent = getContentFromPost(postJsonObject, systemLanguage)
|
||||
personUrl = \
|
||||
httpPrefix + '://' + domainFull + '/users/' + nickname
|
||||
actorJson = \
|
||||
getPersonFromCache(baseDir, personUrl, personCache, False)
|
||||
languagesUnderstood = []
|
||||
if actorJson:
|
||||
languagesUnderstood = getActorLanguagesList(actorJson)
|
||||
jsonContent = getContentFromPost(postJsonObject, systemLanguage,
|
||||
languagesUnderstood)
|
||||
if jsonContent:
|
||||
contentStr = addEmbeddedElements(translate, jsonContent,
|
||||
peertubeInstances)
|
||||
|
@ -330,7 +342,8 @@ def _htmlBlogPostRSS2(authorized: bool,
|
|||
pubDate = datetime.strptime(published, "%Y-%m-%dT%H:%M:%SZ")
|
||||
titleStr = postJsonObject['object']['summary']
|
||||
rssDateStr = pubDate.strftime("%a, %d %b %Y %H:%M:%S UT")
|
||||
content = getContentFromPost(postJsonObject, systemLanguage)
|
||||
content = \
|
||||
getBaseContentFromPost(postJsonObject, systemLanguage)
|
||||
description = firstParagraphFromString(content)
|
||||
rssStr = ' <item>'
|
||||
rssStr += ' <title>' + titleStr + '</title>'
|
||||
|
@ -362,7 +375,8 @@ def _htmlBlogPostRSS3(authorized: bool,
|
|||
pubDate = datetime.strptime(published, "%Y-%m-%dT%H:%M:%SZ")
|
||||
titleStr = postJsonObject['object']['summary']
|
||||
rssDateStr = pubDate.strftime("%a, %d %b %Y %H:%M:%S UT")
|
||||
content = getContentFromPost(postJsonObject, systemLanguage)
|
||||
content = \
|
||||
getBaseContentFromPost(postJsonObject, systemLanguage)
|
||||
description = firstParagraphFromString(content)
|
||||
rssStr = 'title: ' + titleStr + '\n'
|
||||
rssStr += 'link: ' + messageLink + '\n'
|
||||
|
@ -386,7 +400,7 @@ def _htmlBlogRemoveCwButton(blogStr: str, translate: {}) -> str:
|
|||
def _getSnippetFromBlogContent(postJsonObject: {}, systemLanguage: str) -> str:
|
||||
"""Returns a snippet of text from the blog post as a preview
|
||||
"""
|
||||
content = getContentFromPost(postJsonObject, systemLanguage)
|
||||
content = getBaseContentFromPost(postJsonObject, systemLanguage)
|
||||
if '<p>' in content:
|
||||
content = content.split('<p>', 1)[1]
|
||||
if '</p>' in content:
|
||||
|
@ -404,7 +418,7 @@ def htmlBlogPost(authorized: bool,
|
|||
nickname: str, domain: str, domainFull: str,
|
||||
postJsonObject: {},
|
||||
peertubeInstances: [],
|
||||
systemLanguage: str) -> str:
|
||||
systemLanguage: str, personCache: {}) -> str:
|
||||
"""Returns a html blog post
|
||||
"""
|
||||
blogStr = ''
|
||||
|
@ -428,7 +442,8 @@ def htmlBlogPost(authorized: bool,
|
|||
nickname, domain,
|
||||
domainFull, postJsonObject,
|
||||
None, False,
|
||||
peertubeInstances, systemLanguage)
|
||||
peertubeInstances, systemLanguage,
|
||||
personCache)
|
||||
|
||||
# show rss links
|
||||
blogStr += '<p class="rssfeed">'
|
||||
|
@ -456,7 +471,8 @@ def htmlBlogPage(authorized: bool, session,
|
|||
baseDir: str, httpPrefix: str, translate: {},
|
||||
nickname: str, domain: str, port: int,
|
||||
noOfItems: int, pageNumber: int,
|
||||
peertubeInstances: [], systemLanguage: str) -> str:
|
||||
peertubeInstances: [], systemLanguage: str,
|
||||
personCache: {}) -> str:
|
||||
"""Returns a html blog page containing posts
|
||||
"""
|
||||
if ' ' in nickname or '@' in nickname or \
|
||||
|
@ -519,7 +535,8 @@ def htmlBlogPage(authorized: bool, session,
|
|||
domainFull, item,
|
||||
None, True,
|
||||
peertubeInstances,
|
||||
systemLanguage)
|
||||
systemLanguage,
|
||||
personCache)
|
||||
|
||||
if len(timelineJson['orderedItems']) >= noOfItems:
|
||||
blogStr += navigateStr
|
||||
|
@ -677,7 +694,8 @@ def htmlBlogView(authorized: bool,
|
|||
session, baseDir: str, httpPrefix: str,
|
||||
translate: {}, domain: str, port: int,
|
||||
noOfItems: int,
|
||||
peertubeInstances: [], systemLanguage: str) -> str:
|
||||
peertubeInstances: [], systemLanguage: str,
|
||||
personCache: {}) -> str:
|
||||
"""Show the blog main page
|
||||
"""
|
||||
blogStr = ''
|
||||
|
@ -696,7 +714,7 @@ def htmlBlogView(authorized: bool,
|
|||
baseDir, httpPrefix, translate,
|
||||
nickname, domain, port,
|
||||
noOfItems, 1, peertubeInstances,
|
||||
systemLanguage)
|
||||
systemLanguage, personCache)
|
||||
|
||||
domainFull = getFullDomain(domain, port)
|
||||
|
||||
|
@ -840,7 +858,7 @@ def htmlEditBlog(mediaInstance: bool, translate: {},
|
|||
placeholderMessage + '</label>'
|
||||
messageBoxHeight = 800
|
||||
|
||||
contentStr = getContentFromPost(postJsonObject, systemLanguage)
|
||||
contentStr = getBaseContentFromPost(postJsonObject, systemLanguage)
|
||||
contentStr = contentStr.replace('<p>', '').replace('</p>', '\n')
|
||||
|
||||
editBlogForm += \
|
||||
|
|
15
daemon.py
15
daemon.py
|
@ -210,7 +210,7 @@ from shares import expireShares
|
|||
from categories import setHashtagCategory
|
||||
from languages import getActorLanguages
|
||||
from languages import setActorLanguages
|
||||
from utils import getContentFromPost
|
||||
from utils import getBaseContentFromPost
|
||||
from utils import acctDir
|
||||
from utils import getImageExtensionFromMimeType
|
||||
from utils import getImageMimeType
|
||||
|
@ -9825,7 +9825,8 @@ class PubServer(BaseHTTPRequestHandler):
|
|||
domain, port,
|
||||
maxPostsInBlogsFeed, pageNumber,
|
||||
self.server.peertubeInstances,
|
||||
self.server.systemLanguage)
|
||||
self.server.systemLanguage,
|
||||
self.server.personCache)
|
||||
if msg is not None:
|
||||
msg = msg.encode('utf-8')
|
||||
msglen = len(msg)
|
||||
|
@ -10955,7 +10956,8 @@ class PubServer(BaseHTTPRequestHandler):
|
|||
self.server.port,
|
||||
maxPostsInBlogsFeed,
|
||||
self.server.peertubeInstances,
|
||||
self.server.systemLanguage)
|
||||
self.server.systemLanguage,
|
||||
self.server.personCache)
|
||||
if msg is not None:
|
||||
msg = msg.encode('utf-8')
|
||||
msglen = len(msg)
|
||||
|
@ -11054,7 +11056,8 @@ class PubServer(BaseHTTPRequestHandler):
|
|||
self.server.domainFull,
|
||||
postJsonObject,
|
||||
self.server.peertubeInstances,
|
||||
self.server.systemLanguage)
|
||||
self.server.systemLanguage,
|
||||
self.server.personCache)
|
||||
if msg is not None:
|
||||
msg = msg.encode('utf-8')
|
||||
msglen = len(msg)
|
||||
|
@ -13186,8 +13189,8 @@ class PubServer(BaseHTTPRequestHandler):
|
|||
return 1
|
||||
if pinToProfile:
|
||||
contentStr = \
|
||||
getContentFromPost(messageJson,
|
||||
self.server.systemLanguage)
|
||||
getBaseContentFromPost(messageJson,
|
||||
self.server.systemLanguage)
|
||||
pinPost(self.server.baseDir,
|
||||
nickname, self.server.domain, contentStr)
|
||||
return 1
|
||||
|
|
|
@ -16,7 +16,7 @@ import webbrowser
|
|||
import urllib.parse
|
||||
from pathlib import Path
|
||||
from random import randint
|
||||
from utils import getContentFromPost
|
||||
from utils import getBaseContentFromPost
|
||||
from utils import hasObjectDict
|
||||
from utils import getFullDomain
|
||||
from utils import isDM
|
||||
|
@ -700,7 +700,7 @@ def _readLocalBoxPost(session, nickname: str, domain: str,
|
|||
postJsonObject2['object'].get('content'):
|
||||
attributedTo = postJsonObject2['object']['attributedTo']
|
||||
content = \
|
||||
getContentFromPost(postJsonObject2, systemLanguage)
|
||||
getBaseContentFromPost(postJsonObject2, systemLanguage)
|
||||
if isinstance(attributedTo, str) and content:
|
||||
actor = attributedTo
|
||||
nameStr += ' ' + translate['announces'] + ' ' + \
|
||||
|
@ -725,7 +725,7 @@ def _readLocalBoxPost(session, nickname: str, domain: str,
|
|||
attributedTo = postJsonObject['object']['attributedTo']
|
||||
if not attributedTo:
|
||||
return {}
|
||||
content = getContentFromPost(postJsonObject, systemLanguage)
|
||||
content = getBaseContentFromPost(postJsonObject, systemLanguage)
|
||||
if not isinstance(attributedTo, str) or \
|
||||
not isinstance(content, str):
|
||||
return {}
|
||||
|
@ -1048,7 +1048,7 @@ def _desktopShowBox(indent: str,
|
|||
|
||||
published = _formatPublished(postJsonObject['published'])
|
||||
|
||||
contentStr = getContentFromPost(postJsonObject, systemLanguage)
|
||||
contentStr = getBaseContentFromPost(postJsonObject, systemLanguage)
|
||||
content = _textOnlyContent(contentStr)
|
||||
if boxName != 'dm':
|
||||
if isDM(postJsonObject):
|
||||
|
@ -2334,7 +2334,7 @@ def runDesktopClient(baseDir: str, proxyType: str, httpPrefix: str,
|
|||
postJsonObject = postJsonObject2
|
||||
if postJsonObject:
|
||||
content = \
|
||||
getContentFromPost(postJsonObject, systemLanguage)
|
||||
getBaseContentFromPost(postJsonObject, systemLanguage)
|
||||
messageStr, detectedLinks = \
|
||||
speakableText(baseDir, content, translate)
|
||||
linkOpened = False
|
||||
|
@ -2390,8 +2390,8 @@ def runDesktopClient(baseDir: str, proxyType: str, httpPrefix: str,
|
|||
print('')
|
||||
if postJsonObject['object'].get('summary'):
|
||||
print(postJsonObject['object']['summary'])
|
||||
contentStr = getContentFromPost(postJsonObject,
|
||||
systemLanguage)
|
||||
contentStr = getBaseContentFromPost(postJsonObject,
|
||||
systemLanguage)
|
||||
print(contentStr)
|
||||
print('')
|
||||
sayStr = 'Confirm delete, yes or no?'
|
||||
|
|
10
inbox.py
10
inbox.py
|
@ -14,7 +14,7 @@ import time
|
|||
import random
|
||||
from linked_data_sig import verifyJsonSignature
|
||||
from languages import understoodPostLanguage
|
||||
from utils import getContentFromPost
|
||||
from utils import getBaseContentFromPost
|
||||
from utils import acctDir
|
||||
from utils import removeDomainPort
|
||||
from utils import getPortFromDomain
|
||||
|
@ -353,7 +353,7 @@ def savePostToInboxQueue(baseDir: str, httpPrefix: str,
|
|||
httpHeaders: {},
|
||||
postPath: str, debug: bool,
|
||||
blockedCache: [], systemLanguage: str) -> str:
|
||||
"""Saves the give json to the inbox queue for the person
|
||||
"""Saves the given json to the inbox queue for the person
|
||||
keyId specifies the actor sending the post
|
||||
"""
|
||||
if len(messageBytes) > 10240:
|
||||
|
@ -416,7 +416,7 @@ def savePostToInboxQueue(baseDir: str, httpPrefix: str,
|
|||
replyNickname + '@' + replyDomain)
|
||||
return None
|
||||
if postJsonObject['object'].get('content'):
|
||||
contentStr = getContentFromPost(postJsonObject, systemLanguage)
|
||||
contentStr = getBaseContentFromPost(postJsonObject, systemLanguage)
|
||||
if contentStr:
|
||||
if isFiltered(baseDir, nickname, domain, contentStr):
|
||||
if debug:
|
||||
|
@ -1649,7 +1649,7 @@ def _validPostContent(baseDir: str, nickname: str, domain: str,
|
|||
messageJson['object']['content']):
|
||||
return True
|
||||
|
||||
contentStr = getContentFromPost(messageJson, systemLanguage)
|
||||
contentStr = getBaseContentFromPost(messageJson, systemLanguage)
|
||||
if dangerousMarkup(contentStr, allowLocalNetworkAccess):
|
||||
if messageJson['object'].get('id'):
|
||||
print('REJECT ARBITRARY HTML: ' + messageJson['object']['id'])
|
||||
|
@ -1951,7 +1951,7 @@ def _sendToGroupMembers(session, baseDir: str, handle: str, port: int,
|
|||
sendingActorDomainFull = \
|
||||
getFullDomain(sendingActorDomain, sendingActorPort)
|
||||
senderStr = '@' + sendingActorNickname + '@' + sendingActorDomainFull
|
||||
contentStr = getContentFromPost(postJsonObject, systemLanguage)
|
||||
contentStr = getBaseContentFromPost(postJsonObject, systemLanguage)
|
||||
if not contentStr.startswith(senderStr):
|
||||
postJsonObject['object']['content'] = \
|
||||
senderStr + ' ' + contentStr
|
||||
|
|
30
languages.py
30
languages.py
|
@ -10,41 +10,17 @@ __module_group__ = "Core"
|
|||
import os
|
||||
import json
|
||||
from urllib import request, parse
|
||||
from utils import getActorLanguagesList
|
||||
from utils import removeHtml
|
||||
from utils import acctDir
|
||||
from utils import hasObjectDict
|
||||
from utils import getConfigParam
|
||||
from cache import getPersonFromCache
|
||||
|
||||
|
||||
def _getActorLanguagesList(actorJson: {}) -> []:
|
||||
"""Returns a list containing languages used by the given actor
|
||||
"""
|
||||
if not actorJson.get('attachment'):
|
||||
return []
|
||||
for propertyValue in actorJson['attachment']:
|
||||
if not propertyValue.get('name'):
|
||||
continue
|
||||
if not propertyValue['name'].lower().startswith('languages'):
|
||||
continue
|
||||
if not propertyValue.get('type'):
|
||||
continue
|
||||
if not propertyValue.get('value'):
|
||||
continue
|
||||
if not isinstance(propertyValue['value'], list):
|
||||
continue
|
||||
if propertyValue['type'] != 'PropertyValue':
|
||||
continue
|
||||
langList = propertyValue['value']
|
||||
langList.sort()
|
||||
return langList
|
||||
return []
|
||||
|
||||
|
||||
def getActorLanguages(actorJson: {}) -> str:
|
||||
"""Returns a string containing languages used by the given actor
|
||||
"""
|
||||
langList = _getActorLanguagesList(actorJson)
|
||||
langList = getActorLanguagesList(actorJson)
|
||||
if not langList:
|
||||
return ''
|
||||
languagesStr = ''
|
||||
|
@ -121,7 +97,7 @@ def understoodPostLanguage(baseDir: str, nickname: str, domain: str,
|
|||
if not actorJson:
|
||||
print('WARN: unable to load actor to check languages ' + personUrl)
|
||||
return False
|
||||
languagesUnderstood = _getActorLanguagesList(actorJson)
|
||||
languagesUnderstood = getActorLanguagesList(actorJson)
|
||||
if not languagesUnderstood:
|
||||
return True
|
||||
for lang in languagesUnderstood:
|
||||
|
|
7
media.py
7
media.py
|
@ -13,7 +13,7 @@ import subprocess
|
|||
from random import randint
|
||||
from hashlib import sha1
|
||||
from auth import createPassword
|
||||
from utils import getContentFromPost
|
||||
from utils import getBaseContentFromPost
|
||||
from utils import getFullDomain
|
||||
from utils import getImageExtensions
|
||||
from utils import getVideoExtensions
|
||||
|
@ -38,12 +38,13 @@ def replaceYouTube(postJsonObject: {}, replacementDomain: str,
|
|||
return
|
||||
if not postJsonObject['object'].get('content'):
|
||||
return
|
||||
contentStr = getContentFromPost(postJsonObject, systemLanguage)
|
||||
contentStr = getBaseContentFromPost(postJsonObject, systemLanguage)
|
||||
if 'www.youtube.com' not in contentStr:
|
||||
return
|
||||
contentStr = contentStr.replace('www.youtube.com', replacementDomain)
|
||||
postJsonObject['object']['content'] = contentStr
|
||||
postJsonObject['object']['contentMap'][systemLanguage] = contentStr
|
||||
if postJsonObject['object'].get('contentMap'):
|
||||
postJsonObject['object']['contentMap'][systemLanguage] = contentStr
|
||||
|
||||
|
||||
def _removeMetaData(imageFilename: str, outputFilename: str) -> None:
|
||||
|
|
|
@ -25,7 +25,7 @@ from newswire import getDictFromNewswire
|
|||
from posts import createNewsPost
|
||||
from posts import archivePostsForPerson
|
||||
from content import validHashTag
|
||||
from utils import getContentFromPost
|
||||
from utils import getBaseContentFromPost
|
||||
from utils import removeHtml
|
||||
from utils import getFullDomain
|
||||
from utils import loadJson
|
||||
|
@ -314,7 +314,7 @@ def _hashtagAdd(baseDir: str, httpPrefix: str, domainFull: str,
|
|||
hashtagHtml = \
|
||||
" <a href=\"" + hashtagUrl + "\" class=\"addedHashtag\" " + \
|
||||
"rel=\"tag\">#<span>" + htId + "</span></a>"
|
||||
content = getContentFromPost(postJsonObject, systemLanguage)
|
||||
content = getBaseContentFromPost(postJsonObject, systemLanguage)
|
||||
if hashtagHtml in content:
|
||||
return
|
||||
|
||||
|
@ -344,7 +344,7 @@ def _hashtagRemove(httpPrefix: str, domainFull: str, postJsonObject: {},
|
|||
hashtagHtml = \
|
||||
"<a href=\"" + hashtagUrl + "\" class=\"addedHashtag\" " + \
|
||||
"rel=\"tag\">#<span>" + htId + "</span></a>"
|
||||
content = getContentFromPost(postJsonObject, systemLanguage)
|
||||
content = getBaseContentFromPost(postJsonObject, systemLanguage)
|
||||
if hashtagHtml in content:
|
||||
content = content.replace(hashtagHtml, '').replace(' ', ' ')
|
||||
postJsonObject['object']['content'] = content
|
||||
|
@ -385,7 +385,7 @@ def _newswireHashtagProcessing(session, baseDir: str, postJsonObject: {},
|
|||
# get the full text content of the post
|
||||
content = ''
|
||||
if postJsonObject['object'].get('content'):
|
||||
content += getContentFromPost(postJsonObject, systemLanguage)
|
||||
content += getBaseContentFromPost(postJsonObject, systemLanguage)
|
||||
if postJsonObject['object'].get('summary'):
|
||||
content += ' ' + postJsonObject['object']['summary']
|
||||
content = content.lower()
|
||||
|
@ -667,7 +667,7 @@ def _convertRSStoActivityPub(baseDir: str, httpPrefix: str,
|
|||
"\" class=\"addedHashtag\" " + \
|
||||
"rel=\"tag\">#<span>" + \
|
||||
htId + "</span></a>"
|
||||
content = getContentFromPost(blog, systemLanguage)
|
||||
content = getBaseContentFromPost(blog, systemLanguage)
|
||||
if hashtagHtml not in content:
|
||||
if content.endswith('</p>'):
|
||||
content = \
|
||||
|
|
|
@ -18,7 +18,7 @@ from datetime import timezone
|
|||
from collections import OrderedDict
|
||||
from utils import validPostDate
|
||||
from categories import setHashtagCategory
|
||||
from utils import getContentFromPost
|
||||
from utils import getBaseContentFromPost
|
||||
from utils import hasObjectDict
|
||||
from utils import firstParagraphFromString
|
||||
from utils import isPublicPost
|
||||
|
@ -963,7 +963,7 @@ def _addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str,
|
|||
if os.path.isfile(fullPostFilename + '.votes'):
|
||||
votes = loadJson(fullPostFilename + '.votes')
|
||||
content = \
|
||||
getContentFromPost(postJsonObject, systemLanguage)
|
||||
getBaseContentFromPost(postJsonObject, systemLanguage)
|
||||
description = firstParagraphFromString(content)
|
||||
description = removeHtml(description)
|
||||
tagsFromPost = _getHashtagsFromPost(postJsonObject)
|
||||
|
|
|
@ -16,7 +16,7 @@ from posts import outboxMessageCreateWrap
|
|||
from posts import savePostToBox
|
||||
from posts import sendToFollowersThread
|
||||
from posts import sendToNamedAddresses
|
||||
from utils import getContentFromPost
|
||||
from utils import getBaseContentFromPost
|
||||
from utils import hasObjectDict
|
||||
from utils import getLocalNetworkAddresses
|
||||
from utils import getFullDomain
|
||||
|
@ -213,7 +213,7 @@ def postMessageToOutbox(session, translate: {},
|
|||
# check that the outgoing post doesn't contain any markup
|
||||
# which can be used to implement exploits
|
||||
if hasObjectDict(messageJson):
|
||||
contentStr = getContentFromPost(messageJson, systemLanguage)
|
||||
contentStr = getBaseContentFromPost(messageJson, systemLanguage)
|
||||
if contentStr:
|
||||
if dangerousMarkup(contentStr, allowLocalNetworkAccess):
|
||||
print('POST to outbox contains dangerous markup: ' +
|
||||
|
|
6
posts.py
6
posts.py
|
@ -32,7 +32,7 @@ from webfinger import webfingerHandle
|
|||
from httpsig import createSignedHeader
|
||||
from siteactive import siteIsActive
|
||||
from languages import understoodPostLanguage
|
||||
from utils import getContentFromPost
|
||||
from utils import getBaseContentFromPost
|
||||
from utils import removeDomainPort
|
||||
from utils import getPortFromDomain
|
||||
from utils import hasObjectDict
|
||||
|
@ -387,7 +387,7 @@ def _getPosts(session, outboxUrl: str, maxPosts: int,
|
|||
if not isPublic:
|
||||
continue
|
||||
|
||||
content = getContentFromPost(item, systemLanguage)
|
||||
content = getBaseContentFromPost(item, systemLanguage)
|
||||
content = content.replace(''', "'")
|
||||
|
||||
mentions = []
|
||||
|
@ -565,7 +565,7 @@ def getPostDomains(session, outboxUrl: str, maxPosts: int,
|
|||
break
|
||||
if not hasObjectDict(item):
|
||||
continue
|
||||
contentStr = getContentFromPost(item, systemLanguage)
|
||||
contentStr = getBaseContentFromPost(item, systemLanguage)
|
||||
if contentStr:
|
||||
_updateWordFrequency(contentStr, wordFrequency)
|
||||
if item['object'].get('inReplyTo'):
|
||||
|
|
33
utils.py
33
utils.py
|
@ -28,7 +28,32 @@ invalidCharacters = (
|
|||
)
|
||||
|
||||
|
||||
def getContentFromPost(postJsonObject: {}, systemLanguage: str) -> str:
|
||||
def getActorLanguagesList(actorJson: {}) -> []:
|
||||
"""Returns a list containing languages used by the given actor
|
||||
"""
|
||||
if not actorJson.get('attachment'):
|
||||
return []
|
||||
for propertyValue in actorJson['attachment']:
|
||||
if not propertyValue.get('name'):
|
||||
continue
|
||||
if not propertyValue['name'].lower().startswith('languages'):
|
||||
continue
|
||||
if not propertyValue.get('type'):
|
||||
continue
|
||||
if not propertyValue.get('value'):
|
||||
continue
|
||||
if not isinstance(propertyValue['value'], list):
|
||||
continue
|
||||
if propertyValue['type'] != 'PropertyValue':
|
||||
continue
|
||||
langList = propertyValue['value']
|
||||
langList.sort()
|
||||
return langList
|
||||
return []
|
||||
|
||||
|
||||
def getContentFromPost(postJsonObject: {}, systemLanguage: str,
|
||||
languagesUnderstood: []) -> str:
|
||||
"""Returns the content from the post in the given language
|
||||
including searching for a matching entry within contentMap
|
||||
"""
|
||||
|
@ -43,6 +68,12 @@ def getContentFromPost(postJsonObject: {}, systemLanguage: str) -> str:
|
|||
if thisPostJson['contentMap'].get(systemLanguage):
|
||||
if isinstance(thisPostJson['contentMap'][systemLanguage], str):
|
||||
return thisPostJson['contentMap'][systemLanguage]
|
||||
else:
|
||||
# is there a contentMap entry for one of
|
||||
# the understood languages?
|
||||
for lang in languagesUnderstood:
|
||||
if thisPostJson['contentMap'].get(lang):
|
||||
return thisPostJson['contentMap'][lang]
|
||||
else:
|
||||
if isinstance(thisPostJson['content'], str):
|
||||
content = thisPostJson['content']
|
||||
|
|
|
@ -11,7 +11,7 @@ import os
|
|||
from datetime import datetime
|
||||
from content import removeLongWords
|
||||
from content import limitRepeatedWords
|
||||
from utils import getContentFromPost
|
||||
from utils import getBaseContentFromPost
|
||||
from utils import removeHtml
|
||||
from utils import locatePost
|
||||
from utils import loadJson
|
||||
|
@ -698,7 +698,7 @@ def htmlEditNewsPost(cssCache: {}, translate: {}, baseDir: str, path: str,
|
|||
' <input type="text" name="newsPostTitle" value="' + \
|
||||
newsPostTitle + '"><br>\n'
|
||||
|
||||
newsPostContent = getContentFromPost(postJsonObject, systemLanguage)
|
||||
newsPostContent = getBaseContentFromPost(postJsonObject, systemLanguage)
|
||||
editNewsPostForm += \
|
||||
' <textarea id="message" name="editedNewsPost" ' + \
|
||||
'style="height:600px" spellcheck="true">' + \
|
||||
|
|
|
@ -22,8 +22,8 @@ from posts import postIsMuted
|
|||
from posts import getPersonBox
|
||||
from posts import downloadAnnounce
|
||||
from posts import populateRepliesJson
|
||||
from utils import getActorLanguagesList
|
||||
from utils import getBaseContentFromPost
|
||||
from utils import getContentFromPost
|
||||
from utils import hasObjectDict
|
||||
from utils import updateAnnounceCollection
|
||||
from utils import isPGPEncrypted
|
||||
|
@ -1592,7 +1592,16 @@ def individualPostAsHtml(allowDownloads: bool,
|
|||
postJsonObject['object']['contentMap'][systemLanguage] = \
|
||||
postJsonObject['object']['content']
|
||||
|
||||
contentStr = getContentFromPost(postJsonObject, systemLanguage)
|
||||
domainFull = getFullDomain(domain, port)
|
||||
personUrl = \
|
||||
httpPrefix + '://' + domainFull + '/users/' + nickname
|
||||
actorJson = \
|
||||
getPersonFromCache(baseDir, personUrl, personCache, False)
|
||||
languagesUnderstood = []
|
||||
if actorJson:
|
||||
languagesUnderstood = getActorLanguagesList(actorJson)
|
||||
contentStr = getBaseContentFromPost(postJsonObject, systemLanguage,
|
||||
languagesUnderstood)
|
||||
if not contentStr:
|
||||
contentStr = \
|
||||
autoTranslatePost(baseDir, postJsonObject,
|
||||
|
|
|
@ -11,7 +11,7 @@ import os
|
|||
from shutil import copyfile
|
||||
import urllib.parse
|
||||
from datetime import datetime
|
||||
from utils import getContentFromPost
|
||||
from utils import getBaseContentFromPost
|
||||
from utils import isAccountDir
|
||||
from utils import getConfigParam
|
||||
from utils import getFullDomain
|
||||
|
@ -904,7 +904,7 @@ def rssHashtagSearch(nickname: str, domain: str, port: int,
|
|||
postJsonObject['object']['summary'] + \
|
||||
'</title>'
|
||||
description = \
|
||||
getContentFromPost(postJsonObject, systemLanguage)
|
||||
getBaseContentFromPost(postJsonObject, systemLanguage)
|
||||
description = firstParagraphFromString(description)
|
||||
hashtagFeed += \
|
||||
' <description>' + description + '</description>'
|
||||
|
|
Loading…
Reference in New Issue