Append links to translated text

merge-requests/20/merge
Bob Mottram 2021-07-20 11:45:04 +01:00
parent 8f29e1a557
commit e3c696a0b3
2 changed files with 55 additions and 1 deletions

View File

@ -185,6 +185,27 @@ def _libretranslateLanguages(url: str, apiKey: str = None) -> []:
return langList
def getLinksFromContent(content: str) -> []:
"""Returns a list of links within the given content
"""
if '<a href' not in content:
return []
sections = content.split('<a href')
first = True
links = []
for subsection in sections:
if first:
first = False
continue
if '"' not in subsection:
continue
url = subsection.split('"')[1].strip()
if '://' in url and '.' in url:
if url not in links:
links.append(url)
return links
def _libretranslate(url: str, text: str,
source: str, target: str, apiKey: str = None) -> str:
"""Translate string using libretranslate
@ -196,6 +217,9 @@ def _libretranslate(url: str, text: str,
else:
url += "translate"
# get any links from the text
links = getLinksFromContent(text)
# LibreTranslate doesn't like markup
text = removeHtml(text)
@ -215,7 +239,16 @@ def _libretranslate(url: str, text: str,
response_str = response.read().decode()
return json.loads(response_str)["translatedText"]
translatedText = \
'<p>' + json.loads(response_str)['translatedText'] + '</p>'
if links:
for url in links:
urlDesc = url
if len(urlDesc) > 30:
urlDesc = urlDesc[:30]
translatedText += \
'<p><a href="' + url + '">' + urlDesc + '</a></p>'
return translatedText
def autoTranslatePost(baseDir: str, postJsonObject: {},

View File

@ -125,6 +125,7 @@ from speaker import speakerReplaceLinks
from markdown import markdownToHtml
from languages import setActorLanguages
from languages import getActorLanguages
from languages import getLinksFromContent
testServerAliceRunning = False
testServerBobRunning = False
@ -4226,9 +4227,29 @@ def _testSetActorLanguages():
assert languagesStr == 'en / es / fr'
def _testGetLinksFromContent():
print('testGetLinksFromContent')
content = 'This text has no links'
links = getLinksFromContent(content)
assert not links
link1 = 'https://somewebsite.net'
link2 = 'http://somewhere.or.other'
content = \
'This is <a href="' + link1 + '">a link</a>. ' + \
'And <a href="' + link2 + '">another</a>.'
links = getLinksFromContent(content)
assert len(links) == 2
assert link1 in links
assert link2 in links
def runAllTests():
print('Running tests...')
_testGetLinksFromContent()
return
updateDefaultThemesList(os.getcwd())
_testGetLinksFromContent()
_testSetActorLanguages()
_testLimitRepetedWords()
_testLimitWordLengths()