mirror of https://gitlab.com/bashrc2/epicyon
Handling of understood languages prior to automatic translation
parent
bb3dee7533
commit
bb3de9e173
40
blog.py
40
blog.py
|
@ -16,6 +16,8 @@ from webapp_utils import htmlHeaderWithBlogMarkup
|
||||||
from webapp_utils import htmlFooter
|
from webapp_utils import htmlFooter
|
||||||
from webapp_utils import getPostAttachmentsAsHtml
|
from webapp_utils import getPostAttachmentsAsHtml
|
||||||
from webapp_media import addEmbeddedElements
|
from webapp_media import addEmbeddedElements
|
||||||
|
from utils import getActorLanguagesList
|
||||||
|
from utils import getBaseContentFromPost
|
||||||
from utils import getContentFromPost
|
from utils import getContentFromPost
|
||||||
from utils import isAccountDir
|
from utils import isAccountDir
|
||||||
from utils import removeHtml
|
from utils import removeHtml
|
||||||
|
@ -32,6 +34,7 @@ from utils import acctDir
|
||||||
from posts import createBlogsTimeline
|
from posts import createBlogsTimeline
|
||||||
from newswire import rss2Header
|
from newswire import rss2Header
|
||||||
from newswire import rss2Footer
|
from newswire import rss2Footer
|
||||||
|
from cache import getPersonFromCache
|
||||||
|
|
||||||
|
|
||||||
def _noOfBlogReplies(baseDir: str, httpPrefix: str, translate: {},
|
def _noOfBlogReplies(baseDir: str, httpPrefix: str, translate: {},
|
||||||
|
@ -166,6 +169,7 @@ def _htmlBlogPostContent(authorized: bool,
|
||||||
handle: str, restrictToDomain: bool,
|
handle: str, restrictToDomain: bool,
|
||||||
peertubeInstances: [],
|
peertubeInstances: [],
|
||||||
systemLanguage: str,
|
systemLanguage: str,
|
||||||
|
personCache: {},
|
||||||
blogSeparator: str = '<hr>') -> str:
|
blogSeparator: str = '<hr>') -> str:
|
||||||
"""Returns the content for a single blog post
|
"""Returns the content for a single blog post
|
||||||
"""
|
"""
|
||||||
|
@ -237,7 +241,15 @@ def _htmlBlogPostContent(authorized: bool,
|
||||||
if attachmentStr:
|
if attachmentStr:
|
||||||
blogStr += '<br><center>' + attachmentStr + '</center>'
|
blogStr += '<br><center>' + attachmentStr + '</center>'
|
||||||
|
|
||||||
jsonContent = getContentFromPost(postJsonObject, systemLanguage)
|
personUrl = \
|
||||||
|
httpPrefix + '://' + domainFull + '/users/' + nickname
|
||||||
|
actorJson = \
|
||||||
|
getPersonFromCache(baseDir, personUrl, personCache, False)
|
||||||
|
languagesUnderstood = []
|
||||||
|
if actorJson:
|
||||||
|
languagesUnderstood = getActorLanguagesList(actorJson)
|
||||||
|
jsonContent = getContentFromPost(postJsonObject, systemLanguage,
|
||||||
|
languagesUnderstood)
|
||||||
if jsonContent:
|
if jsonContent:
|
||||||
contentStr = addEmbeddedElements(translate, jsonContent,
|
contentStr = addEmbeddedElements(translate, jsonContent,
|
||||||
peertubeInstances)
|
peertubeInstances)
|
||||||
|
@ -330,7 +342,8 @@ def _htmlBlogPostRSS2(authorized: bool,
|
||||||
pubDate = datetime.strptime(published, "%Y-%m-%dT%H:%M:%SZ")
|
pubDate = datetime.strptime(published, "%Y-%m-%dT%H:%M:%SZ")
|
||||||
titleStr = postJsonObject['object']['summary']
|
titleStr = postJsonObject['object']['summary']
|
||||||
rssDateStr = pubDate.strftime("%a, %d %b %Y %H:%M:%S UT")
|
rssDateStr = pubDate.strftime("%a, %d %b %Y %H:%M:%S UT")
|
||||||
content = getContentFromPost(postJsonObject, systemLanguage)
|
content = \
|
||||||
|
getBaseContentFromPost(postJsonObject, systemLanguage)
|
||||||
description = firstParagraphFromString(content)
|
description = firstParagraphFromString(content)
|
||||||
rssStr = ' <item>'
|
rssStr = ' <item>'
|
||||||
rssStr += ' <title>' + titleStr + '</title>'
|
rssStr += ' <title>' + titleStr + '</title>'
|
||||||
|
@ -362,7 +375,8 @@ def _htmlBlogPostRSS3(authorized: bool,
|
||||||
pubDate = datetime.strptime(published, "%Y-%m-%dT%H:%M:%SZ")
|
pubDate = datetime.strptime(published, "%Y-%m-%dT%H:%M:%SZ")
|
||||||
titleStr = postJsonObject['object']['summary']
|
titleStr = postJsonObject['object']['summary']
|
||||||
rssDateStr = pubDate.strftime("%a, %d %b %Y %H:%M:%S UT")
|
rssDateStr = pubDate.strftime("%a, %d %b %Y %H:%M:%S UT")
|
||||||
content = getContentFromPost(postJsonObject, systemLanguage)
|
content = \
|
||||||
|
getBaseContentFromPost(postJsonObject, systemLanguage)
|
||||||
description = firstParagraphFromString(content)
|
description = firstParagraphFromString(content)
|
||||||
rssStr = 'title: ' + titleStr + '\n'
|
rssStr = 'title: ' + titleStr + '\n'
|
||||||
rssStr += 'link: ' + messageLink + '\n'
|
rssStr += 'link: ' + messageLink + '\n'
|
||||||
|
@ -386,7 +400,7 @@ def _htmlBlogRemoveCwButton(blogStr: str, translate: {}) -> str:
|
||||||
def _getSnippetFromBlogContent(postJsonObject: {}, systemLanguage: str) -> str:
|
def _getSnippetFromBlogContent(postJsonObject: {}, systemLanguage: str) -> str:
|
||||||
"""Returns a snippet of text from the blog post as a preview
|
"""Returns a snippet of text from the blog post as a preview
|
||||||
"""
|
"""
|
||||||
content = getContentFromPost(postJsonObject, systemLanguage)
|
content = getBaseContentFromPost(postJsonObject, systemLanguage)
|
||||||
if '<p>' in content:
|
if '<p>' in content:
|
||||||
content = content.split('<p>', 1)[1]
|
content = content.split('<p>', 1)[1]
|
||||||
if '</p>' in content:
|
if '</p>' in content:
|
||||||
|
@ -404,7 +418,7 @@ def htmlBlogPost(authorized: bool,
|
||||||
nickname: str, domain: str, domainFull: str,
|
nickname: str, domain: str, domainFull: str,
|
||||||
postJsonObject: {},
|
postJsonObject: {},
|
||||||
peertubeInstances: [],
|
peertubeInstances: [],
|
||||||
systemLanguage: str) -> str:
|
systemLanguage: str, personCache: {}) -> str:
|
||||||
"""Returns a html blog post
|
"""Returns a html blog post
|
||||||
"""
|
"""
|
||||||
blogStr = ''
|
blogStr = ''
|
||||||
|
@ -428,7 +442,8 @@ def htmlBlogPost(authorized: bool,
|
||||||
nickname, domain,
|
nickname, domain,
|
||||||
domainFull, postJsonObject,
|
domainFull, postJsonObject,
|
||||||
None, False,
|
None, False,
|
||||||
peertubeInstances, systemLanguage)
|
peertubeInstances, systemLanguage,
|
||||||
|
personCache)
|
||||||
|
|
||||||
# show rss links
|
# show rss links
|
||||||
blogStr += '<p class="rssfeed">'
|
blogStr += '<p class="rssfeed">'
|
||||||
|
@ -456,7 +471,8 @@ def htmlBlogPage(authorized: bool, session,
|
||||||
baseDir: str, httpPrefix: str, translate: {},
|
baseDir: str, httpPrefix: str, translate: {},
|
||||||
nickname: str, domain: str, port: int,
|
nickname: str, domain: str, port: int,
|
||||||
noOfItems: int, pageNumber: int,
|
noOfItems: int, pageNumber: int,
|
||||||
peertubeInstances: [], systemLanguage: str) -> str:
|
peertubeInstances: [], systemLanguage: str,
|
||||||
|
personCache: {}) -> str:
|
||||||
"""Returns a html blog page containing posts
|
"""Returns a html blog page containing posts
|
||||||
"""
|
"""
|
||||||
if ' ' in nickname or '@' in nickname or \
|
if ' ' in nickname or '@' in nickname or \
|
||||||
|
@ -519,7 +535,8 @@ def htmlBlogPage(authorized: bool, session,
|
||||||
domainFull, item,
|
domainFull, item,
|
||||||
None, True,
|
None, True,
|
||||||
peertubeInstances,
|
peertubeInstances,
|
||||||
systemLanguage)
|
systemLanguage,
|
||||||
|
personCache)
|
||||||
|
|
||||||
if len(timelineJson['orderedItems']) >= noOfItems:
|
if len(timelineJson['orderedItems']) >= noOfItems:
|
||||||
blogStr += navigateStr
|
blogStr += navigateStr
|
||||||
|
@ -677,7 +694,8 @@ def htmlBlogView(authorized: bool,
|
||||||
session, baseDir: str, httpPrefix: str,
|
session, baseDir: str, httpPrefix: str,
|
||||||
translate: {}, domain: str, port: int,
|
translate: {}, domain: str, port: int,
|
||||||
noOfItems: int,
|
noOfItems: int,
|
||||||
peertubeInstances: [], systemLanguage: str) -> str:
|
peertubeInstances: [], systemLanguage: str,
|
||||||
|
personCache: {}) -> str:
|
||||||
"""Show the blog main page
|
"""Show the blog main page
|
||||||
"""
|
"""
|
||||||
blogStr = ''
|
blogStr = ''
|
||||||
|
@ -696,7 +714,7 @@ def htmlBlogView(authorized: bool,
|
||||||
baseDir, httpPrefix, translate,
|
baseDir, httpPrefix, translate,
|
||||||
nickname, domain, port,
|
nickname, domain, port,
|
||||||
noOfItems, 1, peertubeInstances,
|
noOfItems, 1, peertubeInstances,
|
||||||
systemLanguage)
|
systemLanguage, personCache)
|
||||||
|
|
||||||
domainFull = getFullDomain(domain, port)
|
domainFull = getFullDomain(domain, port)
|
||||||
|
|
||||||
|
@ -840,7 +858,7 @@ def htmlEditBlog(mediaInstance: bool, translate: {},
|
||||||
placeholderMessage + '</label>'
|
placeholderMessage + '</label>'
|
||||||
messageBoxHeight = 800
|
messageBoxHeight = 800
|
||||||
|
|
||||||
contentStr = getContentFromPost(postJsonObject, systemLanguage)
|
contentStr = getBaseContentFromPost(postJsonObject, systemLanguage)
|
||||||
contentStr = contentStr.replace('<p>', '').replace('</p>', '\n')
|
contentStr = contentStr.replace('<p>', '').replace('</p>', '\n')
|
||||||
|
|
||||||
editBlogForm += \
|
editBlogForm += \
|
||||||
|
|
15
daemon.py
15
daemon.py
|
@ -210,7 +210,7 @@ from shares import expireShares
|
||||||
from categories import setHashtagCategory
|
from categories import setHashtagCategory
|
||||||
from languages import getActorLanguages
|
from languages import getActorLanguages
|
||||||
from languages import setActorLanguages
|
from languages import setActorLanguages
|
||||||
from utils import getContentFromPost
|
from utils import getBaseContentFromPost
|
||||||
from utils import acctDir
|
from utils import acctDir
|
||||||
from utils import getImageExtensionFromMimeType
|
from utils import getImageExtensionFromMimeType
|
||||||
from utils import getImageMimeType
|
from utils import getImageMimeType
|
||||||
|
@ -9825,7 +9825,8 @@ class PubServer(BaseHTTPRequestHandler):
|
||||||
domain, port,
|
domain, port,
|
||||||
maxPostsInBlogsFeed, pageNumber,
|
maxPostsInBlogsFeed, pageNumber,
|
||||||
self.server.peertubeInstances,
|
self.server.peertubeInstances,
|
||||||
self.server.systemLanguage)
|
self.server.systemLanguage,
|
||||||
|
self.server.personCache)
|
||||||
if msg is not None:
|
if msg is not None:
|
||||||
msg = msg.encode('utf-8')
|
msg = msg.encode('utf-8')
|
||||||
msglen = len(msg)
|
msglen = len(msg)
|
||||||
|
@ -10955,7 +10956,8 @@ class PubServer(BaseHTTPRequestHandler):
|
||||||
self.server.port,
|
self.server.port,
|
||||||
maxPostsInBlogsFeed,
|
maxPostsInBlogsFeed,
|
||||||
self.server.peertubeInstances,
|
self.server.peertubeInstances,
|
||||||
self.server.systemLanguage)
|
self.server.systemLanguage,
|
||||||
|
self.server.personCache)
|
||||||
if msg is not None:
|
if msg is not None:
|
||||||
msg = msg.encode('utf-8')
|
msg = msg.encode('utf-8')
|
||||||
msglen = len(msg)
|
msglen = len(msg)
|
||||||
|
@ -11054,7 +11056,8 @@ class PubServer(BaseHTTPRequestHandler):
|
||||||
self.server.domainFull,
|
self.server.domainFull,
|
||||||
postJsonObject,
|
postJsonObject,
|
||||||
self.server.peertubeInstances,
|
self.server.peertubeInstances,
|
||||||
self.server.systemLanguage)
|
self.server.systemLanguage,
|
||||||
|
self.server.personCache)
|
||||||
if msg is not None:
|
if msg is not None:
|
||||||
msg = msg.encode('utf-8')
|
msg = msg.encode('utf-8')
|
||||||
msglen = len(msg)
|
msglen = len(msg)
|
||||||
|
@ -13186,8 +13189,8 @@ class PubServer(BaseHTTPRequestHandler):
|
||||||
return 1
|
return 1
|
||||||
if pinToProfile:
|
if pinToProfile:
|
||||||
contentStr = \
|
contentStr = \
|
||||||
getContentFromPost(messageJson,
|
getBaseContentFromPost(messageJson,
|
||||||
self.server.systemLanguage)
|
self.server.systemLanguage)
|
||||||
pinPost(self.server.baseDir,
|
pinPost(self.server.baseDir,
|
||||||
nickname, self.server.domain, contentStr)
|
nickname, self.server.domain, contentStr)
|
||||||
return 1
|
return 1
|
||||||
|
|
|
@ -16,7 +16,7 @@ import webbrowser
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from random import randint
|
from random import randint
|
||||||
from utils import getContentFromPost
|
from utils import getBaseContentFromPost
|
||||||
from utils import hasObjectDict
|
from utils import hasObjectDict
|
||||||
from utils import getFullDomain
|
from utils import getFullDomain
|
||||||
from utils import isDM
|
from utils import isDM
|
||||||
|
@ -700,7 +700,7 @@ def _readLocalBoxPost(session, nickname: str, domain: str,
|
||||||
postJsonObject2['object'].get('content'):
|
postJsonObject2['object'].get('content'):
|
||||||
attributedTo = postJsonObject2['object']['attributedTo']
|
attributedTo = postJsonObject2['object']['attributedTo']
|
||||||
content = \
|
content = \
|
||||||
getContentFromPost(postJsonObject2, systemLanguage)
|
getBaseContentFromPost(postJsonObject2, systemLanguage)
|
||||||
if isinstance(attributedTo, str) and content:
|
if isinstance(attributedTo, str) and content:
|
||||||
actor = attributedTo
|
actor = attributedTo
|
||||||
nameStr += ' ' + translate['announces'] + ' ' + \
|
nameStr += ' ' + translate['announces'] + ' ' + \
|
||||||
|
@ -725,7 +725,7 @@ def _readLocalBoxPost(session, nickname: str, domain: str,
|
||||||
attributedTo = postJsonObject['object']['attributedTo']
|
attributedTo = postJsonObject['object']['attributedTo']
|
||||||
if not attributedTo:
|
if not attributedTo:
|
||||||
return {}
|
return {}
|
||||||
content = getContentFromPost(postJsonObject, systemLanguage)
|
content = getBaseContentFromPost(postJsonObject, systemLanguage)
|
||||||
if not isinstance(attributedTo, str) or \
|
if not isinstance(attributedTo, str) or \
|
||||||
not isinstance(content, str):
|
not isinstance(content, str):
|
||||||
return {}
|
return {}
|
||||||
|
@ -1048,7 +1048,7 @@ def _desktopShowBox(indent: str,
|
||||||
|
|
||||||
published = _formatPublished(postJsonObject['published'])
|
published = _formatPublished(postJsonObject['published'])
|
||||||
|
|
||||||
contentStr = getContentFromPost(postJsonObject, systemLanguage)
|
contentStr = getBaseContentFromPost(postJsonObject, systemLanguage)
|
||||||
content = _textOnlyContent(contentStr)
|
content = _textOnlyContent(contentStr)
|
||||||
if boxName != 'dm':
|
if boxName != 'dm':
|
||||||
if isDM(postJsonObject):
|
if isDM(postJsonObject):
|
||||||
|
@ -2334,7 +2334,7 @@ def runDesktopClient(baseDir: str, proxyType: str, httpPrefix: str,
|
||||||
postJsonObject = postJsonObject2
|
postJsonObject = postJsonObject2
|
||||||
if postJsonObject:
|
if postJsonObject:
|
||||||
content = \
|
content = \
|
||||||
getContentFromPost(postJsonObject, systemLanguage)
|
getBaseContentFromPost(postJsonObject, systemLanguage)
|
||||||
messageStr, detectedLinks = \
|
messageStr, detectedLinks = \
|
||||||
speakableText(baseDir, content, translate)
|
speakableText(baseDir, content, translate)
|
||||||
linkOpened = False
|
linkOpened = False
|
||||||
|
@ -2390,8 +2390,8 @@ def runDesktopClient(baseDir: str, proxyType: str, httpPrefix: str,
|
||||||
print('')
|
print('')
|
||||||
if postJsonObject['object'].get('summary'):
|
if postJsonObject['object'].get('summary'):
|
||||||
print(postJsonObject['object']['summary'])
|
print(postJsonObject['object']['summary'])
|
||||||
contentStr = getContentFromPost(postJsonObject,
|
contentStr = getBaseContentFromPost(postJsonObject,
|
||||||
systemLanguage)
|
systemLanguage)
|
||||||
print(contentStr)
|
print(contentStr)
|
||||||
print('')
|
print('')
|
||||||
sayStr = 'Confirm delete, yes or no?'
|
sayStr = 'Confirm delete, yes or no?'
|
||||||
|
|
10
inbox.py
10
inbox.py
|
@ -14,7 +14,7 @@ import time
|
||||||
import random
|
import random
|
||||||
from linked_data_sig import verifyJsonSignature
|
from linked_data_sig import verifyJsonSignature
|
||||||
from languages import understoodPostLanguage
|
from languages import understoodPostLanguage
|
||||||
from utils import getContentFromPost
|
from utils import getBaseContentFromPost
|
||||||
from utils import acctDir
|
from utils import acctDir
|
||||||
from utils import removeDomainPort
|
from utils import removeDomainPort
|
||||||
from utils import getPortFromDomain
|
from utils import getPortFromDomain
|
||||||
|
@ -353,7 +353,7 @@ def savePostToInboxQueue(baseDir: str, httpPrefix: str,
|
||||||
httpHeaders: {},
|
httpHeaders: {},
|
||||||
postPath: str, debug: bool,
|
postPath: str, debug: bool,
|
||||||
blockedCache: [], systemLanguage: str) -> str:
|
blockedCache: [], systemLanguage: str) -> str:
|
||||||
"""Saves the give json to the inbox queue for the person
|
"""Saves the given json to the inbox queue for the person
|
||||||
keyId specifies the actor sending the post
|
keyId specifies the actor sending the post
|
||||||
"""
|
"""
|
||||||
if len(messageBytes) > 10240:
|
if len(messageBytes) > 10240:
|
||||||
|
@ -416,7 +416,7 @@ def savePostToInboxQueue(baseDir: str, httpPrefix: str,
|
||||||
replyNickname + '@' + replyDomain)
|
replyNickname + '@' + replyDomain)
|
||||||
return None
|
return None
|
||||||
if postJsonObject['object'].get('content'):
|
if postJsonObject['object'].get('content'):
|
||||||
contentStr = getContentFromPost(postJsonObject, systemLanguage)
|
contentStr = getBaseContentFromPost(postJsonObject, systemLanguage)
|
||||||
if contentStr:
|
if contentStr:
|
||||||
if isFiltered(baseDir, nickname, domain, contentStr):
|
if isFiltered(baseDir, nickname, domain, contentStr):
|
||||||
if debug:
|
if debug:
|
||||||
|
@ -1649,7 +1649,7 @@ def _validPostContent(baseDir: str, nickname: str, domain: str,
|
||||||
messageJson['object']['content']):
|
messageJson['object']['content']):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
contentStr = getContentFromPost(messageJson, systemLanguage)
|
contentStr = getBaseContentFromPost(messageJson, systemLanguage)
|
||||||
if dangerousMarkup(contentStr, allowLocalNetworkAccess):
|
if dangerousMarkup(contentStr, allowLocalNetworkAccess):
|
||||||
if messageJson['object'].get('id'):
|
if messageJson['object'].get('id'):
|
||||||
print('REJECT ARBITRARY HTML: ' + messageJson['object']['id'])
|
print('REJECT ARBITRARY HTML: ' + messageJson['object']['id'])
|
||||||
|
@ -1951,7 +1951,7 @@ def _sendToGroupMembers(session, baseDir: str, handle: str, port: int,
|
||||||
sendingActorDomainFull = \
|
sendingActorDomainFull = \
|
||||||
getFullDomain(sendingActorDomain, sendingActorPort)
|
getFullDomain(sendingActorDomain, sendingActorPort)
|
||||||
senderStr = '@' + sendingActorNickname + '@' + sendingActorDomainFull
|
senderStr = '@' + sendingActorNickname + '@' + sendingActorDomainFull
|
||||||
contentStr = getContentFromPost(postJsonObject, systemLanguage)
|
contentStr = getBaseContentFromPost(postJsonObject, systemLanguage)
|
||||||
if not contentStr.startswith(senderStr):
|
if not contentStr.startswith(senderStr):
|
||||||
postJsonObject['object']['content'] = \
|
postJsonObject['object']['content'] = \
|
||||||
senderStr + ' ' + contentStr
|
senderStr + ' ' + contentStr
|
||||||
|
|
30
languages.py
30
languages.py
|
@ -10,41 +10,17 @@ __module_group__ = "Core"
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
from urllib import request, parse
|
from urllib import request, parse
|
||||||
|
from utils import getActorLanguagesList
|
||||||
from utils import removeHtml
|
from utils import removeHtml
|
||||||
from utils import acctDir
|
|
||||||
from utils import hasObjectDict
|
from utils import hasObjectDict
|
||||||
from utils import getConfigParam
|
from utils import getConfigParam
|
||||||
from cache import getPersonFromCache
|
from cache import getPersonFromCache
|
||||||
|
|
||||||
|
|
||||||
def _getActorLanguagesList(actorJson: {}) -> []:
|
|
||||||
"""Returns a list containing languages used by the given actor
|
|
||||||
"""
|
|
||||||
if not actorJson.get('attachment'):
|
|
||||||
return []
|
|
||||||
for propertyValue in actorJson['attachment']:
|
|
||||||
if not propertyValue.get('name'):
|
|
||||||
continue
|
|
||||||
if not propertyValue['name'].lower().startswith('languages'):
|
|
||||||
continue
|
|
||||||
if not propertyValue.get('type'):
|
|
||||||
continue
|
|
||||||
if not propertyValue.get('value'):
|
|
||||||
continue
|
|
||||||
if not isinstance(propertyValue['value'], list):
|
|
||||||
continue
|
|
||||||
if propertyValue['type'] != 'PropertyValue':
|
|
||||||
continue
|
|
||||||
langList = propertyValue['value']
|
|
||||||
langList.sort()
|
|
||||||
return langList
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
def getActorLanguages(actorJson: {}) -> str:
|
def getActorLanguages(actorJson: {}) -> str:
|
||||||
"""Returns a string containing languages used by the given actor
|
"""Returns a string containing languages used by the given actor
|
||||||
"""
|
"""
|
||||||
langList = _getActorLanguagesList(actorJson)
|
langList = getActorLanguagesList(actorJson)
|
||||||
if not langList:
|
if not langList:
|
||||||
return ''
|
return ''
|
||||||
languagesStr = ''
|
languagesStr = ''
|
||||||
|
@ -121,7 +97,7 @@ def understoodPostLanguage(baseDir: str, nickname: str, domain: str,
|
||||||
if not actorJson:
|
if not actorJson:
|
||||||
print('WARN: unable to load actor to check languages ' + personUrl)
|
print('WARN: unable to load actor to check languages ' + personUrl)
|
||||||
return False
|
return False
|
||||||
languagesUnderstood = _getActorLanguagesList(actorJson)
|
languagesUnderstood = getActorLanguagesList(actorJson)
|
||||||
if not languagesUnderstood:
|
if not languagesUnderstood:
|
||||||
return True
|
return True
|
||||||
for lang in languagesUnderstood:
|
for lang in languagesUnderstood:
|
||||||
|
|
7
media.py
7
media.py
|
@ -13,7 +13,7 @@ import subprocess
|
||||||
from random import randint
|
from random import randint
|
||||||
from hashlib import sha1
|
from hashlib import sha1
|
||||||
from auth import createPassword
|
from auth import createPassword
|
||||||
from utils import getContentFromPost
|
from utils import getBaseContentFromPost
|
||||||
from utils import getFullDomain
|
from utils import getFullDomain
|
||||||
from utils import getImageExtensions
|
from utils import getImageExtensions
|
||||||
from utils import getVideoExtensions
|
from utils import getVideoExtensions
|
||||||
|
@ -38,12 +38,13 @@ def replaceYouTube(postJsonObject: {}, replacementDomain: str,
|
||||||
return
|
return
|
||||||
if not postJsonObject['object'].get('content'):
|
if not postJsonObject['object'].get('content'):
|
||||||
return
|
return
|
||||||
contentStr = getContentFromPost(postJsonObject, systemLanguage)
|
contentStr = getBaseContentFromPost(postJsonObject, systemLanguage)
|
||||||
if 'www.youtube.com' not in contentStr:
|
if 'www.youtube.com' not in contentStr:
|
||||||
return
|
return
|
||||||
contentStr = contentStr.replace('www.youtube.com', replacementDomain)
|
contentStr = contentStr.replace('www.youtube.com', replacementDomain)
|
||||||
postJsonObject['object']['content'] = contentStr
|
postJsonObject['object']['content'] = contentStr
|
||||||
postJsonObject['object']['contentMap'][systemLanguage] = contentStr
|
if postJsonObject['object'].get('contentMap'):
|
||||||
|
postJsonObject['object']['contentMap'][systemLanguage] = contentStr
|
||||||
|
|
||||||
|
|
||||||
def _removeMetaData(imageFilename: str, outputFilename: str) -> None:
|
def _removeMetaData(imageFilename: str, outputFilename: str) -> None:
|
||||||
|
|
|
@ -25,7 +25,7 @@ from newswire import getDictFromNewswire
|
||||||
from posts import createNewsPost
|
from posts import createNewsPost
|
||||||
from posts import archivePostsForPerson
|
from posts import archivePostsForPerson
|
||||||
from content import validHashTag
|
from content import validHashTag
|
||||||
from utils import getContentFromPost
|
from utils import getBaseContentFromPost
|
||||||
from utils import removeHtml
|
from utils import removeHtml
|
||||||
from utils import getFullDomain
|
from utils import getFullDomain
|
||||||
from utils import loadJson
|
from utils import loadJson
|
||||||
|
@ -314,7 +314,7 @@ def _hashtagAdd(baseDir: str, httpPrefix: str, domainFull: str,
|
||||||
hashtagHtml = \
|
hashtagHtml = \
|
||||||
" <a href=\"" + hashtagUrl + "\" class=\"addedHashtag\" " + \
|
" <a href=\"" + hashtagUrl + "\" class=\"addedHashtag\" " + \
|
||||||
"rel=\"tag\">#<span>" + htId + "</span></a>"
|
"rel=\"tag\">#<span>" + htId + "</span></a>"
|
||||||
content = getContentFromPost(postJsonObject, systemLanguage)
|
content = getBaseContentFromPost(postJsonObject, systemLanguage)
|
||||||
if hashtagHtml in content:
|
if hashtagHtml in content:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -344,7 +344,7 @@ def _hashtagRemove(httpPrefix: str, domainFull: str, postJsonObject: {},
|
||||||
hashtagHtml = \
|
hashtagHtml = \
|
||||||
"<a href=\"" + hashtagUrl + "\" class=\"addedHashtag\" " + \
|
"<a href=\"" + hashtagUrl + "\" class=\"addedHashtag\" " + \
|
||||||
"rel=\"tag\">#<span>" + htId + "</span></a>"
|
"rel=\"tag\">#<span>" + htId + "</span></a>"
|
||||||
content = getContentFromPost(postJsonObject, systemLanguage)
|
content = getBaseContentFromPost(postJsonObject, systemLanguage)
|
||||||
if hashtagHtml in content:
|
if hashtagHtml in content:
|
||||||
content = content.replace(hashtagHtml, '').replace(' ', ' ')
|
content = content.replace(hashtagHtml, '').replace(' ', ' ')
|
||||||
postJsonObject['object']['content'] = content
|
postJsonObject['object']['content'] = content
|
||||||
|
@ -385,7 +385,7 @@ def _newswireHashtagProcessing(session, baseDir: str, postJsonObject: {},
|
||||||
# get the full text content of the post
|
# get the full text content of the post
|
||||||
content = ''
|
content = ''
|
||||||
if postJsonObject['object'].get('content'):
|
if postJsonObject['object'].get('content'):
|
||||||
content += getContentFromPost(postJsonObject, systemLanguage)
|
content += getBaseContentFromPost(postJsonObject, systemLanguage)
|
||||||
if postJsonObject['object'].get('summary'):
|
if postJsonObject['object'].get('summary'):
|
||||||
content += ' ' + postJsonObject['object']['summary']
|
content += ' ' + postJsonObject['object']['summary']
|
||||||
content = content.lower()
|
content = content.lower()
|
||||||
|
@ -667,7 +667,7 @@ def _convertRSStoActivityPub(baseDir: str, httpPrefix: str,
|
||||||
"\" class=\"addedHashtag\" " + \
|
"\" class=\"addedHashtag\" " + \
|
||||||
"rel=\"tag\">#<span>" + \
|
"rel=\"tag\">#<span>" + \
|
||||||
htId + "</span></a>"
|
htId + "</span></a>"
|
||||||
content = getContentFromPost(blog, systemLanguage)
|
content = getBaseContentFromPost(blog, systemLanguage)
|
||||||
if hashtagHtml not in content:
|
if hashtagHtml not in content:
|
||||||
if content.endswith('</p>'):
|
if content.endswith('</p>'):
|
||||||
content = \
|
content = \
|
||||||
|
|
|
@ -18,7 +18,7 @@ from datetime import timezone
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from utils import validPostDate
|
from utils import validPostDate
|
||||||
from categories import setHashtagCategory
|
from categories import setHashtagCategory
|
||||||
from utils import getContentFromPost
|
from utils import getBaseContentFromPost
|
||||||
from utils import hasObjectDict
|
from utils import hasObjectDict
|
||||||
from utils import firstParagraphFromString
|
from utils import firstParagraphFromString
|
||||||
from utils import isPublicPost
|
from utils import isPublicPost
|
||||||
|
@ -963,7 +963,7 @@ def _addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str,
|
||||||
if os.path.isfile(fullPostFilename + '.votes'):
|
if os.path.isfile(fullPostFilename + '.votes'):
|
||||||
votes = loadJson(fullPostFilename + '.votes')
|
votes = loadJson(fullPostFilename + '.votes')
|
||||||
content = \
|
content = \
|
||||||
getContentFromPost(postJsonObject, systemLanguage)
|
getBaseContentFromPost(postJsonObject, systemLanguage)
|
||||||
description = firstParagraphFromString(content)
|
description = firstParagraphFromString(content)
|
||||||
description = removeHtml(description)
|
description = removeHtml(description)
|
||||||
tagsFromPost = _getHashtagsFromPost(postJsonObject)
|
tagsFromPost = _getHashtagsFromPost(postJsonObject)
|
||||||
|
|
|
@ -16,7 +16,7 @@ from posts import outboxMessageCreateWrap
|
||||||
from posts import savePostToBox
|
from posts import savePostToBox
|
||||||
from posts import sendToFollowersThread
|
from posts import sendToFollowersThread
|
||||||
from posts import sendToNamedAddresses
|
from posts import sendToNamedAddresses
|
||||||
from utils import getContentFromPost
|
from utils import getBaseContentFromPost
|
||||||
from utils import hasObjectDict
|
from utils import hasObjectDict
|
||||||
from utils import getLocalNetworkAddresses
|
from utils import getLocalNetworkAddresses
|
||||||
from utils import getFullDomain
|
from utils import getFullDomain
|
||||||
|
@ -213,7 +213,7 @@ def postMessageToOutbox(session, translate: {},
|
||||||
# check that the outgoing post doesn't contain any markup
|
# check that the outgoing post doesn't contain any markup
|
||||||
# which can be used to implement exploits
|
# which can be used to implement exploits
|
||||||
if hasObjectDict(messageJson):
|
if hasObjectDict(messageJson):
|
||||||
contentStr = getContentFromPost(messageJson, systemLanguage)
|
contentStr = getBaseContentFromPost(messageJson, systemLanguage)
|
||||||
if contentStr:
|
if contentStr:
|
||||||
if dangerousMarkup(contentStr, allowLocalNetworkAccess):
|
if dangerousMarkup(contentStr, allowLocalNetworkAccess):
|
||||||
print('POST to outbox contains dangerous markup: ' +
|
print('POST to outbox contains dangerous markup: ' +
|
||||||
|
|
6
posts.py
6
posts.py
|
@ -32,7 +32,7 @@ from webfinger import webfingerHandle
|
||||||
from httpsig import createSignedHeader
|
from httpsig import createSignedHeader
|
||||||
from siteactive import siteIsActive
|
from siteactive import siteIsActive
|
||||||
from languages import understoodPostLanguage
|
from languages import understoodPostLanguage
|
||||||
from utils import getContentFromPost
|
from utils import getBaseContentFromPost
|
||||||
from utils import removeDomainPort
|
from utils import removeDomainPort
|
||||||
from utils import getPortFromDomain
|
from utils import getPortFromDomain
|
||||||
from utils import hasObjectDict
|
from utils import hasObjectDict
|
||||||
|
@ -387,7 +387,7 @@ def _getPosts(session, outboxUrl: str, maxPosts: int,
|
||||||
if not isPublic:
|
if not isPublic:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
content = getContentFromPost(item, systemLanguage)
|
content = getBaseContentFromPost(item, systemLanguage)
|
||||||
content = content.replace(''', "'")
|
content = content.replace(''', "'")
|
||||||
|
|
||||||
mentions = []
|
mentions = []
|
||||||
|
@ -565,7 +565,7 @@ def getPostDomains(session, outboxUrl: str, maxPosts: int,
|
||||||
break
|
break
|
||||||
if not hasObjectDict(item):
|
if not hasObjectDict(item):
|
||||||
continue
|
continue
|
||||||
contentStr = getContentFromPost(item, systemLanguage)
|
contentStr = getBaseContentFromPost(item, systemLanguage)
|
||||||
if contentStr:
|
if contentStr:
|
||||||
_updateWordFrequency(contentStr, wordFrequency)
|
_updateWordFrequency(contentStr, wordFrequency)
|
||||||
if item['object'].get('inReplyTo'):
|
if item['object'].get('inReplyTo'):
|
||||||
|
|
33
utils.py
33
utils.py
|
@ -28,7 +28,32 @@ invalidCharacters = (
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def getContentFromPost(postJsonObject: {}, systemLanguage: str) -> str:
|
def getActorLanguagesList(actorJson: {}) -> []:
|
||||||
|
"""Returns a list containing languages used by the given actor
|
||||||
|
"""
|
||||||
|
if not actorJson.get('attachment'):
|
||||||
|
return []
|
||||||
|
for propertyValue in actorJson['attachment']:
|
||||||
|
if not propertyValue.get('name'):
|
||||||
|
continue
|
||||||
|
if not propertyValue['name'].lower().startswith('languages'):
|
||||||
|
continue
|
||||||
|
if not propertyValue.get('type'):
|
||||||
|
continue
|
||||||
|
if not propertyValue.get('value'):
|
||||||
|
continue
|
||||||
|
if not isinstance(propertyValue['value'], list):
|
||||||
|
continue
|
||||||
|
if propertyValue['type'] != 'PropertyValue':
|
||||||
|
continue
|
||||||
|
langList = propertyValue['value']
|
||||||
|
langList.sort()
|
||||||
|
return langList
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def getContentFromPost(postJsonObject: {}, systemLanguage: str,
|
||||||
|
languagesUnderstood: []) -> str:
|
||||||
"""Returns the content from the post in the given language
|
"""Returns the content from the post in the given language
|
||||||
including searching for a matching entry within contentMap
|
including searching for a matching entry within contentMap
|
||||||
"""
|
"""
|
||||||
|
@ -43,6 +68,12 @@ def getContentFromPost(postJsonObject: {}, systemLanguage: str) -> str:
|
||||||
if thisPostJson['contentMap'].get(systemLanguage):
|
if thisPostJson['contentMap'].get(systemLanguage):
|
||||||
if isinstance(thisPostJson['contentMap'][systemLanguage], str):
|
if isinstance(thisPostJson['contentMap'][systemLanguage], str):
|
||||||
return thisPostJson['contentMap'][systemLanguage]
|
return thisPostJson['contentMap'][systemLanguage]
|
||||||
|
else:
|
||||||
|
# is there a contentMap entry for one of
|
||||||
|
# the understood languages?
|
||||||
|
for lang in languagesUnderstood:
|
||||||
|
if thisPostJson['contentMap'].get(lang):
|
||||||
|
return thisPostJson['contentMap'][lang]
|
||||||
else:
|
else:
|
||||||
if isinstance(thisPostJson['content'], str):
|
if isinstance(thisPostJson['content'], str):
|
||||||
content = thisPostJson['content']
|
content = thisPostJson['content']
|
||||||
|
|
|
@ -11,7 +11,7 @@ import os
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from content import removeLongWords
|
from content import removeLongWords
|
||||||
from content import limitRepeatedWords
|
from content import limitRepeatedWords
|
||||||
from utils import getContentFromPost
|
from utils import getBaseContentFromPost
|
||||||
from utils import removeHtml
|
from utils import removeHtml
|
||||||
from utils import locatePost
|
from utils import locatePost
|
||||||
from utils import loadJson
|
from utils import loadJson
|
||||||
|
@ -698,7 +698,7 @@ def htmlEditNewsPost(cssCache: {}, translate: {}, baseDir: str, path: str,
|
||||||
' <input type="text" name="newsPostTitle" value="' + \
|
' <input type="text" name="newsPostTitle" value="' + \
|
||||||
newsPostTitle + '"><br>\n'
|
newsPostTitle + '"><br>\n'
|
||||||
|
|
||||||
newsPostContent = getContentFromPost(postJsonObject, systemLanguage)
|
newsPostContent = getBaseContentFromPost(postJsonObject, systemLanguage)
|
||||||
editNewsPostForm += \
|
editNewsPostForm += \
|
||||||
' <textarea id="message" name="editedNewsPost" ' + \
|
' <textarea id="message" name="editedNewsPost" ' + \
|
||||||
'style="height:600px" spellcheck="true">' + \
|
'style="height:600px" spellcheck="true">' + \
|
||||||
|
|
|
@ -22,8 +22,8 @@ from posts import postIsMuted
|
||||||
from posts import getPersonBox
|
from posts import getPersonBox
|
||||||
from posts import downloadAnnounce
|
from posts import downloadAnnounce
|
||||||
from posts import populateRepliesJson
|
from posts import populateRepliesJson
|
||||||
|
from utils import getActorLanguagesList
|
||||||
from utils import getBaseContentFromPost
|
from utils import getBaseContentFromPost
|
||||||
from utils import getContentFromPost
|
|
||||||
from utils import hasObjectDict
|
from utils import hasObjectDict
|
||||||
from utils import updateAnnounceCollection
|
from utils import updateAnnounceCollection
|
||||||
from utils import isPGPEncrypted
|
from utils import isPGPEncrypted
|
||||||
|
@ -1592,7 +1592,16 @@ def individualPostAsHtml(allowDownloads: bool,
|
||||||
postJsonObject['object']['contentMap'][systemLanguage] = \
|
postJsonObject['object']['contentMap'][systemLanguage] = \
|
||||||
postJsonObject['object']['content']
|
postJsonObject['object']['content']
|
||||||
|
|
||||||
contentStr = getContentFromPost(postJsonObject, systemLanguage)
|
domainFull = getFullDomain(domain, port)
|
||||||
|
personUrl = \
|
||||||
|
httpPrefix + '://' + domainFull + '/users/' + nickname
|
||||||
|
actorJson = \
|
||||||
|
getPersonFromCache(baseDir, personUrl, personCache, False)
|
||||||
|
languagesUnderstood = []
|
||||||
|
if actorJson:
|
||||||
|
languagesUnderstood = getActorLanguagesList(actorJson)
|
||||||
|
contentStr = getBaseContentFromPost(postJsonObject, systemLanguage,
|
||||||
|
languagesUnderstood)
|
||||||
if not contentStr:
|
if not contentStr:
|
||||||
contentStr = \
|
contentStr = \
|
||||||
autoTranslatePost(baseDir, postJsonObject,
|
autoTranslatePost(baseDir, postJsonObject,
|
||||||
|
|
|
@ -11,7 +11,7 @@ import os
|
||||||
from shutil import copyfile
|
from shutil import copyfile
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from utils import getContentFromPost
|
from utils import getBaseContentFromPost
|
||||||
from utils import isAccountDir
|
from utils import isAccountDir
|
||||||
from utils import getConfigParam
|
from utils import getConfigParam
|
||||||
from utils import getFullDomain
|
from utils import getFullDomain
|
||||||
|
@ -904,7 +904,7 @@ def rssHashtagSearch(nickname: str, domain: str, port: int,
|
||||||
postJsonObject['object']['summary'] + \
|
postJsonObject['object']['summary'] + \
|
||||||
'</title>'
|
'</title>'
|
||||||
description = \
|
description = \
|
||||||
getContentFromPost(postJsonObject, systemLanguage)
|
getBaseContentFromPost(postJsonObject, systemLanguage)
|
||||||
description = firstParagraphFromString(description)
|
description = firstParagraphFromString(description)
|
||||||
hashtagFeed += \
|
hashtagFeed += \
|
||||||
' <description>' + description + '</description>'
|
' <description>' + description + '</description>'
|
||||||
|
|
Loading…
Reference in New Issue