| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  | __filename__ = "languages.py" | 
					
						
							|  |  |  | __author__ = "Bob Mottram" | 
					
						
							|  |  |  | __license__ = "AGPL3+" | 
					
						
							|  |  |  | __version__ = "1.2.0" | 
					
						
							|  |  |  | __maintainer__ = "Bob Mottram" | 
					
						
							|  |  |  | __email__ = "bob@freedombone.net" | 
					
						
							|  |  |  | __status__ = "Production" | 
					
						
							|  |  |  | __module_group__ = "Core" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import os | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  | import json | 
					
						
							|  |  |  | from urllib import request, parse | 
					
						
							| 
									
										
										
										
											2021-07-20 13:33:27 +00:00
										 |  |  | from utils import getActorLanguagesList | 
					
						
							| 
									
										
										
										
											2021-07-20 10:13:22 +00:00
										 |  |  | from utils import removeHtml | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  | from utils import hasObjectDict | 
					
						
							|  |  |  | from utils import getConfigParam | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  | from cache import getPersonFromCache | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def getActorLanguages(actorJson: {}) -> str: | 
					
						
							|  |  |  |     """Returns a string containing languages used by the given actor
 | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-07-20 13:33:27 +00:00
										 |  |  |     langList = getActorLanguagesList(actorJson) | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  |     if not langList: | 
					
						
							|  |  |  |         return '' | 
					
						
							|  |  |  |     languagesStr = '' | 
					
						
							|  |  |  |     for lang in langList: | 
					
						
							|  |  |  |         if languagesStr: | 
					
						
							|  |  |  |             languagesStr += ' / ' + lang | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             languagesStr = lang | 
					
						
							|  |  |  |     return languagesStr | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def setActorLanguages(baseDir: str, actorJson: {}, languagesStr: str) -> None: | 
					
						
							|  |  |  |     """Sets the languages used by the given actor
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     separator = ',' | 
					
						
							|  |  |  |     if '/' in languagesStr: | 
					
						
							|  |  |  |         separator = '/' | 
					
						
							|  |  |  |     elif ';' in languagesStr: | 
					
						
							|  |  |  |         separator = ';' | 
					
						
							|  |  |  |     langList = languagesStr.lower().split(separator) | 
					
						
							|  |  |  |     langList2 = [] | 
					
						
							|  |  |  |     for lang in langList: | 
					
						
							|  |  |  |         lang = lang.strip() | 
					
						
							| 
									
										
										
										
											2021-07-19 10:07:29 +00:00
										 |  |  |         if baseDir: | 
					
						
							|  |  |  |             languageFilename = baseDir + '/translations/' + lang + '.json' | 
					
						
							|  |  |  |             if os.path.isfile(languageFilename): | 
					
						
							|  |  |  |                 langList2.append(lang) | 
					
						
							|  |  |  |         else: | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  |             langList2.append(lang) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # remove any existing value | 
					
						
							|  |  |  |     propertyFound = None | 
					
						
							|  |  |  |     for propertyValue in actorJson['attachment']: | 
					
						
							|  |  |  |         if not propertyValue.get('name'): | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         if not propertyValue.get('type'): | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         if not propertyValue['name'].lower().startswith('languages'): | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         propertyFound = propertyValue | 
					
						
							|  |  |  |         break | 
					
						
							|  |  |  |     if propertyFound: | 
					
						
							|  |  |  |         actorJson['attachment'].remove(propertyFound) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if not langList2: | 
					
						
							|  |  |  |         return | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     newLanguages = { | 
					
						
							|  |  |  |         "name": "Languages", | 
					
						
							|  |  |  |         "type": "PropertyValue", | 
					
						
							|  |  |  |         "value": langList2 | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     actorJson['attachment'].append(newLanguages) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def understoodPostLanguage(baseDir: str, nickname: str, domain: str, | 
					
						
							|  |  |  |                            messageJson: {}, systemLanguage: str, | 
					
						
							|  |  |  |                            httpPrefix: str, domainFull: str, | 
					
						
							|  |  |  |                            personCache: {}) -> bool: | 
					
						
							|  |  |  |     """Returns true if the post is written in a language
 | 
					
						
							|  |  |  |     understood by this account | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     msgObject = messageJson | 
					
						
							| 
									
										
										
										
											2021-07-20 11:59:29 +00:00
										 |  |  |     if hasObjectDict(messageJson): | 
					
						
							|  |  |  |         msgObject = messageJson['object'] | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  |     if not msgObject.get('contentMap'): | 
					
						
							|  |  |  |         return True | 
					
						
							|  |  |  |     if not isinstance(msgObject['contentMap'], dict): | 
					
						
							|  |  |  |         return True | 
					
						
							|  |  |  |     if msgObject['contentMap'].get(systemLanguage): | 
					
						
							|  |  |  |         return True | 
					
						
							|  |  |  |     personUrl = httpPrefix + '://' + domainFull + '/users/' + nickname | 
					
						
							|  |  |  |     actorJson = getPersonFromCache(baseDir, personUrl, personCache, False) | 
					
						
							|  |  |  |     if not actorJson: | 
					
						
							| 
									
										
										
										
											2021-07-20 11:59:29 +00:00
										 |  |  |         print('WARN: unable to load actor to check languages ' + personUrl) | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  |         return False | 
					
						
							| 
									
										
										
										
											2021-07-20 13:33:27 +00:00
										 |  |  |     languagesUnderstood = getActorLanguagesList(actorJson) | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  |     if not languagesUnderstood: | 
					
						
							|  |  |  |         return True | 
					
						
							|  |  |  |     for lang in languagesUnderstood: | 
					
						
							|  |  |  |         if msgObject['contentMap'].get(lang): | 
					
						
							|  |  |  |             return True | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  |     # is the language for this post supported by libretranslate? | 
					
						
							|  |  |  |     libretranslateUrl = getConfigParam(baseDir, "libretranslateUrl") | 
					
						
							|  |  |  |     if libretranslateUrl: | 
					
						
							|  |  |  |         libretranslateApiKey = getConfigParam(baseDir, "libretranslateApiKey") | 
					
						
							|  |  |  |         langList = \ | 
					
						
							|  |  |  |             _libretranslateLanguages(libretranslateUrl, libretranslateApiKey) | 
					
						
							|  |  |  |         for lang in langList: | 
					
						
							|  |  |  |             if msgObject['contentMap'].get(lang): | 
					
						
							|  |  |  |                 return True | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  |     return False | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def _libretranslateLanguages(url: str, apiKey: str = None) -> []: | 
					
						
							|  |  |  |     """Returns a list of supported languages
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     if not url.endswith('/languages'): | 
					
						
							|  |  |  |         if not url.endswith('/'): | 
					
						
							|  |  |  |             url += "/languages" | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             url += "languages" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     params = dict() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if apiKey: | 
					
						
							|  |  |  |         params["api_key"] = apiKey | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     urlParams = parse.urlencode(params) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     req = request.Request(url, data=urlParams.encode()) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     response = request.urlopen(req) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     response_str = response.read().decode() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     result = json.loads(response_str) | 
					
						
							|  |  |  |     if not result: | 
					
						
							|  |  |  |         return [] | 
					
						
							|  |  |  |     if not isinstance(result, list): | 
					
						
							|  |  |  |         return [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     langList = [] | 
					
						
							|  |  |  |     for lang in result: | 
					
						
							|  |  |  |         if not isinstance(lang, dict): | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         if not lang.get('code'): | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         langCode = lang['code'] | 
					
						
							|  |  |  |         if len(langCode) != 2: | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         langList.append(langCode) | 
					
						
							|  |  |  |     langList.sort() | 
					
						
							|  |  |  |     return langList | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-20 17:49:12 +00:00
										 |  |  | def getLinksFromContent(content: str) -> {}: | 
					
						
							| 
									
										
										
										
											2021-07-20 10:45:04 +00:00
										 |  |  |     """Returns a list of links within the given content
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     if '<a href' not in content: | 
					
						
							| 
									
										
										
										
											2021-07-20 17:49:12 +00:00
										 |  |  |         return {} | 
					
						
							| 
									
										
										
										
											2021-07-20 10:45:04 +00:00
										 |  |  |     sections = content.split('<a href') | 
					
						
							|  |  |  |     first = True | 
					
						
							| 
									
										
										
										
											2021-07-20 17:49:12 +00:00
										 |  |  |     links = {} | 
					
						
							| 
									
										
										
										
											2021-07-20 10:45:04 +00:00
										 |  |  |     for subsection in sections: | 
					
						
							|  |  |  |         if first: | 
					
						
							|  |  |  |             first = False | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         if '"' not in subsection: | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         url = subsection.split('"')[1].strip() | 
					
						
							| 
									
										
										
										
											2021-07-20 17:49:12 +00:00
										 |  |  |         if '://' in url and '.' in url and \ | 
					
						
							|  |  |  |            '>' in subsection: | 
					
						
							| 
									
										
										
										
											2021-07-20 10:45:04 +00:00
										 |  |  |             if url not in links: | 
					
						
							| 
									
										
										
										
											2021-07-20 17:49:12 +00:00
										 |  |  |                 linkText = subsection.split('>')[1] | 
					
						
							|  |  |  |                 if '<' in linkText: | 
					
						
							|  |  |  |                     linkText = linkText.split('<')[0] | 
					
						
							|  |  |  |                     links[linkText] = url | 
					
						
							| 
									
										
										
										
											2021-07-20 10:45:04 +00:00
										 |  |  |     return links | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-20 18:02:42 +00:00
										 |  |  | def addLinksToContent(content: str, links: {}) -> str: | 
					
						
							|  |  |  |     """Adds links back into plain text
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     for linkText, url in links.items(): | 
					
						
							|  |  |  |         urlDesc = url | 
					
						
							|  |  |  |         if linkText.startswith('@') and linkText in content: | 
					
						
							|  |  |  |             content = \ | 
					
						
							|  |  |  |                 content.replace(linkText, | 
					
						
							|  |  |  |                                 '<a href="' + url + | 
					
						
							|  |  |  |                                 '" rel="nofollow noopener ' + | 
					
						
							|  |  |  |                                 'noreferrer" target="_blank">' + | 
					
						
							|  |  |  |                                 linkText + '</a>') | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             if len(urlDesc) > 40: | 
					
						
							|  |  |  |                 urlDesc = urlDesc[:40] | 
					
						
							|  |  |  |             content += \ | 
					
						
							|  |  |  |                 '<p><a href="' + url + \ | 
					
						
							|  |  |  |                 '" rel="nofollow noopener noreferrer" target="_blank">' + \ | 
					
						
							|  |  |  |                 urlDesc + '</a></p>' | 
					
						
							|  |  |  |     return content | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  | def _libretranslate(url: str, text: str, | 
					
						
							|  |  |  |                     source: str, target: str, apiKey: str = None) -> str: | 
					
						
							|  |  |  |     """Translate string using libretranslate
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     if not url.endswith('/translate'): | 
					
						
							|  |  |  |         if not url.endswith('/'): | 
					
						
							|  |  |  |             url += "/translate" | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             url += "translate" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-20 20:09:39 +00:00
										 |  |  |     originalText = text | 
					
						
							| 
									
										
										
										
											2021-07-20 20:12:24 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-20 10:45:04 +00:00
										 |  |  |     # get any links from the text | 
					
						
							|  |  |  |     links = getLinksFromContent(text) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-20 10:13:22 +00:00
										 |  |  |     # LibreTranslate doesn't like markup | 
					
						
							|  |  |  |     text = removeHtml(text) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-20 20:04:49 +00:00
										 |  |  |     # remove any links from plain text version of the content | 
					
						
							|  |  |  |     for _, url in links.items(): | 
					
						
							|  |  |  |         text = text.replace(url, '') | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  |     ltParams = { | 
					
						
							|  |  |  |         "q": text, | 
					
						
							|  |  |  |         "source": source, | 
					
						
							|  |  |  |         "target": target | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if apiKey: | 
					
						
							|  |  |  |         ltParams["api_key"] = apiKey | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     urlParams = parse.urlencode(ltParams) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     req = request.Request(url, data=urlParams.encode()) | 
					
						
							| 
									
										
										
										
											2021-07-20 20:09:39 +00:00
										 |  |  |     try: | 
					
						
							|  |  |  |         response = request.urlopen(req) | 
					
						
							|  |  |  |     except BaseException: | 
					
						
							| 
									
										
										
										
											2021-07-20 20:12:24 +00:00
										 |  |  |         print('Unable to translate: ' + text) | 
					
						
							| 
									
										
										
										
											2021-07-20 20:09:39 +00:00
										 |  |  |         return originalText | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     response_str = response.read().decode() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-20 10:45:04 +00:00
										 |  |  |     translatedText = \ | 
					
						
							|  |  |  |         '<p>' + json.loads(response_str)['translatedText'] + '</p>' | 
					
						
							| 
									
										
										
										
											2021-07-20 10:46:58 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # append links form the original text | 
					
						
							| 
									
										
										
										
											2021-07-20 10:45:04 +00:00
										 |  |  |     if links: | 
					
						
							| 
									
										
										
										
											2021-07-20 18:02:42 +00:00
										 |  |  |         translatedText = addLinksToContent(translatedText, links) | 
					
						
							| 
									
										
										
										
											2021-07-20 10:45:04 +00:00
										 |  |  |     return translatedText | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def autoTranslatePost(baseDir: str, postJsonObject: {}, | 
					
						
							| 
									
										
										
										
											2021-07-20 11:21:15 +00:00
										 |  |  |                       systemLanguage: str, translate: {}) -> str: | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  |     """Tries to automatically translate the given post
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     if not hasObjectDict(postJsonObject): | 
					
						
							|  |  |  |         return '' | 
					
						
							|  |  |  |     msgObject = postJsonObject['object'] | 
					
						
							|  |  |  |     if not msgObject.get('contentMap'): | 
					
						
							|  |  |  |         return '' | 
					
						
							|  |  |  |     if not isinstance(msgObject['contentMap'], dict): | 
					
						
							|  |  |  |         return '' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # is the language for this post supported by libretranslate? | 
					
						
							|  |  |  |     libretranslateUrl = getConfigParam(baseDir, "libretranslateUrl") | 
					
						
							|  |  |  |     if not libretranslateUrl: | 
					
						
							|  |  |  |         return '' | 
					
						
							|  |  |  |     libretranslateApiKey = getConfigParam(baseDir, "libretranslateApiKey") | 
					
						
							|  |  |  |     langList = \ | 
					
						
							|  |  |  |         _libretranslateLanguages(libretranslateUrl, libretranslateApiKey) | 
					
						
							|  |  |  |     for lang in langList: | 
					
						
							|  |  |  |         if msgObject['contentMap'].get(lang): | 
					
						
							| 
									
										
										
										
											2021-07-20 19:46:44 +00:00
										 |  |  |             content = msgObject['contentMap'][lang] | 
					
						
							| 
									
										
										
										
											2021-07-20 11:21:15 +00:00
										 |  |  |             translatedText = \ | 
					
						
							| 
									
										
										
										
											2021-07-20 19:46:44 +00:00
										 |  |  |                 _libretranslate(libretranslateUrl, content, | 
					
						
							| 
									
										
										
										
											2021-07-20 11:21:15 +00:00
										 |  |  |                                 lang, systemLanguage, | 
					
						
							|  |  |  |                                 libretranslateApiKey) | 
					
						
							|  |  |  |             if translatedText: | 
					
						
							| 
									
										
										
										
											2021-07-20 19:55:48 +00:00
										 |  |  |                 if removeHtml(translatedText) == removeHtml(content): | 
					
						
							|  |  |  |                     return content | 
					
						
							| 
									
										
										
										
											2021-07-20 11:21:15 +00:00
										 |  |  |                 translatedText = \ | 
					
						
							|  |  |  |                     '<p>' + translate['Translated'].upper() + '</p>' + \ | 
					
						
							|  |  |  |                     translatedText | 
					
						
							|  |  |  |             return translatedText | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  |     return '' |