mirror of https://gitlab.com/bashrc2/epicyon
Append links to translated text
parent
8f29e1a557
commit
e3c696a0b3
35
languages.py
35
languages.py
|
@ -185,6 +185,27 @@ def _libretranslateLanguages(url: str, apiKey: str = None) -> []:
|
|||
return langList
|
||||
|
||||
|
||||
def getLinksFromContent(content: str) -> []:
|
||||
"""Returns a list of links within the given content
|
||||
"""
|
||||
if '<a href' not in content:
|
||||
return []
|
||||
sections = content.split('<a href')
|
||||
first = True
|
||||
links = []
|
||||
for subsection in sections:
|
||||
if first:
|
||||
first = False
|
||||
continue
|
||||
if '"' not in subsection:
|
||||
continue
|
||||
url = subsection.split('"')[1].strip()
|
||||
if '://' in url and '.' in url:
|
||||
if url not in links:
|
||||
links.append(url)
|
||||
return links
|
||||
|
||||
|
||||
def _libretranslate(url: str, text: str,
|
||||
source: str, target: str, apiKey: str = None) -> str:
|
||||
"""Translate string using libretranslate
|
||||
|
@ -196,6 +217,9 @@ def _libretranslate(url: str, text: str,
|
|||
else:
|
||||
url += "translate"
|
||||
|
||||
# get any links from the text
|
||||
links = getLinksFromContent(text)
|
||||
|
||||
# LibreTranslate doesn't like markup
|
||||
text = removeHtml(text)
|
||||
|
||||
|
@ -215,7 +239,16 @@ def _libretranslate(url: str, text: str,
|
|||
|
||||
response_str = response.read().decode()
|
||||
|
||||
return json.loads(response_str)["translatedText"]
|
||||
translatedText = \
|
||||
'<p>' + json.loads(response_str)['translatedText'] + '</p>'
|
||||
if links:
|
||||
for url in links:
|
||||
urlDesc = url
|
||||
if len(urlDesc) > 30:
|
||||
urlDesc = urlDesc[:30]
|
||||
translatedText += \
|
||||
'<p><a href="' + url + '">' + urlDesc + '</a></p>'
|
||||
return translatedText
|
||||
|
||||
|
||||
def autoTranslatePost(baseDir: str, postJsonObject: {},
|
||||
|
|
21
tests.py
21
tests.py
|
@ -125,6 +125,7 @@ from speaker import speakerReplaceLinks
|
|||
from markdown import markdownToHtml
|
||||
from languages import setActorLanguages
|
||||
from languages import getActorLanguages
|
||||
from languages import getLinksFromContent
|
||||
|
||||
testServerAliceRunning = False
|
||||
testServerBobRunning = False
|
||||
|
@ -4226,9 +4227,29 @@ def _testSetActorLanguages():
|
|||
assert languagesStr == 'en / es / fr'
|
||||
|
||||
|
||||
def _testGetLinksFromContent():
|
||||
print('testGetLinksFromContent')
|
||||
content = 'This text has no links'
|
||||
links = getLinksFromContent(content)
|
||||
assert not links
|
||||
|
||||
link1 = 'https://somewebsite.net'
|
||||
link2 = 'http://somewhere.or.other'
|
||||
content = \
|
||||
'This is <a href="' + link1 + '">a link</a>. ' + \
|
||||
'And <a href="' + link2 + '">another</a>.'
|
||||
links = getLinksFromContent(content)
|
||||
assert len(links) == 2
|
||||
assert link1 in links
|
||||
assert link2 in links
|
||||
|
||||
|
||||
def runAllTests():
|
||||
print('Running tests...')
|
||||
_testGetLinksFromContent()
|
||||
return
|
||||
updateDefaultThemesList(os.getcwd())
|
||||
_testGetLinksFromContent()
|
||||
_testSetActorLanguages()
|
||||
_testLimitRepetedWords()
|
||||
_testLimitWordLengths()
|
||||
|
|
Loading…
Reference in New Issue