Append links to translated text

main
Bob Mottram 2021-07-20 11:45:04 +01:00
parent 8f29e1a557
commit e3c696a0b3
2 changed files with 55 additions and 1 deletions

View File

@ -185,6 +185,27 @@ def _libretranslateLanguages(url: str, apiKey: str = None) -> []:
return langList return langList
def getLinksFromContent(content: str) -> []:
"""Returns a list of links within the given content
"""
if '<a href' not in content:
return []
sections = content.split('<a href')
first = True
links = []
for subsection in sections:
if first:
first = False
continue
if '"' not in subsection:
continue
url = subsection.split('"')[1].strip()
if '://' in url and '.' in url:
if url not in links:
links.append(url)
return links
def _libretranslate(url: str, text: str, def _libretranslate(url: str, text: str,
source: str, target: str, apiKey: str = None) -> str: source: str, target: str, apiKey: str = None) -> str:
"""Translate string using libretranslate """Translate string using libretranslate
@ -196,6 +217,9 @@ def _libretranslate(url: str, text: str,
else: else:
url += "translate" url += "translate"
# get any links from the text
links = getLinksFromContent(text)
# LibreTranslate doesn't like markup # LibreTranslate doesn't like markup
text = removeHtml(text) text = removeHtml(text)
@ -215,7 +239,16 @@ def _libretranslate(url: str, text: str,
response_str = response.read().decode() response_str = response.read().decode()
return json.loads(response_str)["translatedText"] translatedText = \
'<p>' + json.loads(response_str)['translatedText'] + '</p>'
if links:
for url in links:
urlDesc = url
if len(urlDesc) > 30:
urlDesc = urlDesc[:30]
translatedText += \
'<p><a href="' + url + '">' + urlDesc + '</a></p>'
return translatedText
def autoTranslatePost(baseDir: str, postJsonObject: {}, def autoTranslatePost(baseDir: str, postJsonObject: {},

View File

@ -125,6 +125,7 @@ from speaker import speakerReplaceLinks
from markdown import markdownToHtml from markdown import markdownToHtml
from languages import setActorLanguages from languages import setActorLanguages
from languages import getActorLanguages from languages import getActorLanguages
from languages import getLinksFromContent
testServerAliceRunning = False testServerAliceRunning = False
testServerBobRunning = False testServerBobRunning = False
@ -4226,9 +4227,29 @@ def _testSetActorLanguages():
assert languagesStr == 'en / es / fr' assert languagesStr == 'en / es / fr'
def _testGetLinksFromContent():
print('testGetLinksFromContent')
content = 'This text has no links'
links = getLinksFromContent(content)
assert not links
link1 = 'https://somewebsite.net'
link2 = 'http://somewhere.or.other'
content = \
'This is <a href="' + link1 + '">a link</a>. ' + \
'And <a href="' + link2 + '">another</a>.'
links = getLinksFromContent(content)
assert len(links) == 2
assert link1 in links
assert link2 in links
def runAllTests(): def runAllTests():
print('Running tests...') print('Running tests...')
_testGetLinksFromContent()
return
updateDefaultThemesList(os.getcwd()) updateDefaultThemesList(os.getcwd())
_testGetLinksFromContent()
_testSetActorLanguages() _testSetActorLanguages()
_testLimitRepetedWords() _testLimitRepetedWords()
_testLimitWordLengths() _testLimitWordLengths()