| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  | __filename__ = "languages.py" | 
					
						
							|  |  |  | __author__ = "Bob Mottram" | 
					
						
							|  |  |  | __license__ = "AGPL3+" | 
					
						
							| 
									
										
										
										
											2024-12-22 23:37:30 +00:00
										 |  |  | __version__ = "1.6.0" | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  | __maintainer__ = "Bob Mottram" | 
					
						
							| 
									
										
										
										
											2021-09-10 16:14:50 +00:00
										 |  |  | __email__ = "bob@libreserver.org" | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  | __status__ = "Production" | 
					
						
							|  |  |  | __module_group__ = "Core" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  | import json | 
					
						
							| 
									
										
										
										
											2022-12-08 15:28:17 +00:00
										 |  |  | import os | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  | from urllib import request, parse | 
					
						
							| 
									
										
										
										
											2024-05-12 12:35:26 +00:00
										 |  |  | from utils import data_dir | 
					
						
							| 
									
										
										
										
											2022-12-08 15:28:17 +00:00
										 |  |  | from utils import is_account_dir | 
					
						
							|  |  |  | from utils import acct_dir | 
					
						
							| 
									
										
										
										
											2021-12-26 10:22:19 +00:00
										 |  |  | from utils import get_actor_languages_list | 
					
						
							| 
									
										
										
										
											2021-12-27 15:43:22 +00:00
										 |  |  | from utils import remove_html | 
					
						
							| 
									
										
										
										
											2021-12-26 10:57:03 +00:00
										 |  |  | from utils import has_object_dict | 
					
						
							| 
									
										
										
										
											2021-12-26 14:08:58 +00:00
										 |  |  | from utils import get_config_param | 
					
						
							| 
									
										
										
										
											2021-12-26 10:19:59 +00:00
										 |  |  | from utils import local_actor_url | 
					
						
							| 
									
										
										
										
											2024-01-27 17:04:21 +00:00
										 |  |  | from utils import resembles_url | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  | from cache import get_person_from_cache | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  | def get_actor_languages(actor_json: {}) -> str: | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  |     """Returns a string containing languages used by the given actor
 | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-12-26 10:35:37 +00:00
										 |  |  |     lang_list = get_actor_languages_list(actor_json) | 
					
						
							|  |  |  |     if not lang_list: | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  |         return '' | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |     languages_str = '' | 
					
						
							| 
									
										
										
										
											2021-12-26 10:35:37 +00:00
										 |  |  |     for lang in lang_list: | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |         if languages_str: | 
					
						
							|  |  |  |             languages_str += ' / ' + lang | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  |         else: | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |             languages_str = lang | 
					
						
							|  |  |  |     return languages_str | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-28 11:29:01 +00:00
										 |  |  | def get_understood_languages(base_dir: str, http_prefix: str, | 
					
						
							|  |  |  |                              nickname: str, domain_full: str, | 
					
						
							|  |  |  |                              person_cache: {}) -> []: | 
					
						
							|  |  |  |     """Returns a list of understood languages for the given account
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     person_url = local_actor_url(http_prefix, nickname, domain_full) | 
					
						
							|  |  |  |     actor_json = \ | 
					
						
							| 
									
										
										
										
											2022-06-09 16:54:44 +00:00
										 |  |  |         get_person_from_cache(base_dir, person_url, person_cache) | 
					
						
							| 
									
										
										
										
											2022-01-28 11:29:01 +00:00
										 |  |  |     if not actor_json: | 
					
						
							|  |  |  |         print('WARN: unable to load actor to obtain languages ' + person_url) | 
					
						
							|  |  |  |         return [] | 
					
						
							|  |  |  |     return get_actor_languages_list(actor_json) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-06-01 14:26:50 +00:00
										 |  |  | def set_actor_languages(actor_json: {}, languages_str: str) -> None: | 
					
						
							| 
									
										
										
										
											2022-02-26 13:41:48 +00:00
										 |  |  |     """Sets the languages understood by the given actor
 | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2022-02-25 15:13:23 +00:00
										 |  |  |     languages_str = languages_str.strip() | 
					
						
							|  |  |  |     separator = None | 
					
						
							| 
									
										
										
										
											2022-02-26 13:47:33 +00:00
										 |  |  |     possible_separators = (',', '/', ';', '+', ' ') | 
					
						
							|  |  |  |     for poss in possible_separators: | 
					
						
							|  |  |  |         if poss in languages_str: | 
					
						
							|  |  |  |             separator = poss | 
					
						
							|  |  |  |             break | 
					
						
							| 
									
										
										
										
											2022-02-25 15:13:23 +00:00
										 |  |  |     if separator: | 
					
						
							|  |  |  |         lang_list = languages_str.lower().split(separator) | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         lang_list = [languages_str.lower()] | 
					
						
							| 
									
										
										
										
											2021-12-26 10:35:37 +00:00
										 |  |  |     lang_list2 = '' | 
					
						
							|  |  |  |     for lang in lang_list: | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  |         lang = lang.strip() | 
					
						
							| 
									
										
										
										
											2022-02-26 13:41:48 +00:00
										 |  |  |         if lang_list2: | 
					
						
							|  |  |  |             if ' ' + lang not in lang_list2: | 
					
						
							| 
									
										
										
										
											2022-02-26 13:43:27 +00:00
										 |  |  |                 lang_list2 += ', ' + lang | 
					
						
							| 
									
										
										
										
											2022-02-26 13:41:48 +00:00
										 |  |  |         else: | 
					
						
							| 
									
										
										
										
											2022-02-26 13:43:27 +00:00
										 |  |  |             lang_list2 += lang | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # remove any existing value | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |     property_found = None | 
					
						
							| 
									
										
										
										
											2021-12-26 10:32:45 +00:00
										 |  |  |     for property_value in actor_json['attachment']: | 
					
						
							| 
									
										
										
										
											2022-05-11 16:10:38 +00:00
										 |  |  |         name_value = None | 
					
						
							|  |  |  |         if property_value.get('name'): | 
					
						
							|  |  |  |             name_value = property_value['name'] | 
					
						
							|  |  |  |         elif property_value.get('schema:name'): | 
					
						
							|  |  |  |             name_value = property_value['schema:name'] | 
					
						
							|  |  |  |         if not name_value: | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  |             continue | 
					
						
							| 
									
										
										
										
											2021-12-26 10:32:45 +00:00
										 |  |  |         if not property_value.get('type'): | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  |             continue | 
					
						
							| 
									
										
										
										
											2022-05-11 16:10:38 +00:00
										 |  |  |         if not name_value.lower().startswith('languages'): | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  |             continue | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |         property_found = property_value | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  |         break | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |     if property_found: | 
					
						
							|  |  |  |         actor_json['attachment'].remove(property_found) | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-26 10:35:37 +00:00
										 |  |  |     if not lang_list2: | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  |         return | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |     new_languages = { | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  |         "name": "Languages", | 
					
						
							|  |  |  |         "type": "PropertyValue", | 
					
						
							| 
									
										
										
										
											2021-12-26 10:35:37 +00:00
										 |  |  |         "value": lang_list2 | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |     actor_json['attachment'].append(new_languages) | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-06-14 10:51:40 +00:00
										 |  |  | def understood_post_language(base_dir: str, nickname: str, | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |                              message_json: {}, system_language: str, | 
					
						
							|  |  |  |                              http_prefix: str, domain_full: str, | 
					
						
							|  |  |  |                              person_cache: {}) -> bool: | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  |     """Returns true if the post is written in a language
 | 
					
						
							|  |  |  |     understood by this account | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |     msg_object = message_json | 
					
						
							| 
									
										
										
										
											2021-12-26 10:57:03 +00:00
										 |  |  |     if has_object_dict(message_json): | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |         msg_object = message_json['object'] | 
					
						
							|  |  |  |     if not msg_object.get('contentMap'): | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  |         return True | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |     if not isinstance(msg_object['contentMap'], dict): | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  |         return True | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |     if msg_object['contentMap'].get(system_language): | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  |         return True | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |     person_url = local_actor_url(http_prefix, nickname, domain_full) | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |     actor_json = \ | 
					
						
							| 
									
										
										
										
											2022-06-09 16:54:44 +00:00
										 |  |  |         get_person_from_cache(base_dir, person_url, person_cache) | 
					
						
							| 
									
										
										
										
											2021-12-26 10:29:52 +00:00
										 |  |  |     if not actor_json: | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |         print('WARN: unable to load actor to check languages ' + person_url) | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  |         return False | 
					
						
							| 
									
										
										
										
											2021-12-26 10:52:54 +00:00
										 |  |  |     languages_understood = get_actor_languages_list(actor_json) | 
					
						
							|  |  |  |     if not languages_understood: | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  |         return True | 
					
						
							| 
									
										
										
										
											2021-12-26 10:52:54 +00:00
										 |  |  |     for lang in languages_understood: | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |         if msg_object['contentMap'].get(lang): | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  |             return True | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  |     # is the language for this post supported by libretranslate? | 
					
						
							| 
									
										
										
										
											2022-01-02 21:45:26 +00:00
										 |  |  |     libretranslate_url = get_config_param(base_dir, "libretranslateUrl") | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |     if libretranslate_url: | 
					
						
							|  |  |  |         libretranslate_api_key = \ | 
					
						
							| 
									
										
										
										
											2021-12-26 14:08:58 +00:00
										 |  |  |             get_config_param(base_dir, "libretranslateApiKey") | 
					
						
							| 
									
										
										
										
											2021-12-26 10:35:37 +00:00
										 |  |  |         lang_list = \ | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |             libretranslate_languages(libretranslate_url, | 
					
						
							|  |  |  |                                      libretranslate_api_key) | 
					
						
							| 
									
										
										
										
											2021-12-26 10:35:37 +00:00
										 |  |  |         for lang in lang_list: | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |             if msg_object['contentMap'].get(lang): | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  |                 return True | 
					
						
							| 
									
										
										
										
											2021-07-19 08:46:21 +00:00
										 |  |  |     return False | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-02-19 15:38:08 +00:00
										 |  |  | def libretranslate_languages(url: str, api_key: str) -> []: | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  |     """Returns a list of supported languages
 | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-08-08 11:16:18 +00:00
										 |  |  |     if not url: | 
					
						
							|  |  |  |         return [] | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  |     if not url.endswith('/languages'): | 
					
						
							|  |  |  |         if not url.endswith('/'): | 
					
						
							|  |  |  |             url += "/languages" | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             url += "languages" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-05-30 18:33:51 +00:00
										 |  |  |     params = {} | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |     if api_key: | 
					
						
							|  |  |  |         params["api_key"] = api_key | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |     url_params = parse.urlencode(params) | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |     req = request.Request(url, data=url_params.encode()) | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-05-30 18:33:51 +00:00
										 |  |  |     response_str = '' | 
					
						
							|  |  |  |     with request.urlopen(req) as response: | 
					
						
							|  |  |  |         response_str = response.read().decode() | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-19 21:43:48 +00:00
										 |  |  |     try: | 
					
						
							|  |  |  |         result = json.loads(response_str) | 
					
						
							|  |  |  |     except json.decoder.JSONDecodeError as ex: | 
					
						
							|  |  |  |         print('EX: json decode error ' + str(ex) + | 
					
						
							|  |  |  |               ' from libretranslate_languages ' + | 
					
						
							|  |  |  |               str(response_str)) | 
					
						
							|  |  |  |         return [] | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  |     if not result: | 
					
						
							|  |  |  |         return [] | 
					
						
							|  |  |  |     if not isinstance(result, list): | 
					
						
							|  |  |  |         return [] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-12-23 17:45:20 +00:00
										 |  |  |     lang_list: list[str] = [] | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  |     for lang in result: | 
					
						
							|  |  |  |         if not isinstance(lang, dict): | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         if not lang.get('code'): | 
					
						
							|  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |         lang_code = lang['code'] | 
					
						
							|  |  |  |         if len(lang_code) != 2: | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  |             continue | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |         lang_list.append(lang_code) | 
					
						
							| 
									
										
										
										
											2021-12-26 10:35:37 +00:00
										 |  |  |     lang_list.sort() | 
					
						
							|  |  |  |     return lang_list | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  | def get_links_from_content(content: str) -> {}: | 
					
						
							| 
									
										
										
										
											2021-07-20 10:45:04 +00:00
										 |  |  |     """Returns a list of links within the given content
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     if '<a href' not in content: | 
					
						
							| 
									
										
										
										
											2021-07-20 17:49:12 +00:00
										 |  |  |         return {} | 
					
						
							| 
									
										
										
										
											2021-07-20 10:45:04 +00:00
										 |  |  |     sections = content.split('<a href') | 
					
						
							|  |  |  |     first = True | 
					
						
							| 
									
										
										
										
											2021-07-20 17:49:12 +00:00
										 |  |  |     links = {} | 
					
						
							| 
									
										
										
										
											2021-07-20 10:45:04 +00:00
										 |  |  |     for subsection in sections: | 
					
						
							|  |  |  |         if first: | 
					
						
							|  |  |  |             first = False | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         if '"' not in subsection: | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         url = subsection.split('"')[1].strip() | 
					
						
							| 
									
										
										
										
											2024-01-27 17:04:21 +00:00
										 |  |  |         if resembles_url(url) and \ | 
					
						
							| 
									
										
										
										
											2021-07-20 17:49:12 +00:00
										 |  |  |            '>' in subsection: | 
					
						
							| 
									
										
										
										
											2021-07-20 10:45:04 +00:00
										 |  |  |             if url not in links: | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |                 link_text = subsection.split('>')[1] | 
					
						
							|  |  |  |                 if '<' in link_text: | 
					
						
							|  |  |  |                     link_text = link_text.split('<')[0] | 
					
						
							|  |  |  |                     links[link_text] = url | 
					
						
							| 
									
										
										
										
											2021-07-20 10:45:04 +00:00
										 |  |  |     return links | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  | def add_links_to_content(content: str, links: {}) -> str: | 
					
						
							| 
									
										
										
										
											2021-07-20 18:02:42 +00:00
										 |  |  |     """Adds links back into plain text
 | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |     for link_text, url in links.items(): | 
					
						
							|  |  |  |         url_desc = url | 
					
						
							|  |  |  |         if link_text.startswith('@') and link_text in content: | 
					
						
							| 
									
										
										
										
											2021-07-20 18:02:42 +00:00
										 |  |  |             content = \ | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |                 content.replace(link_text, | 
					
						
							| 
									
										
										
										
											2021-07-20 18:02:42 +00:00
										 |  |  |                                 '<a href="' + url + | 
					
						
							|  |  |  |                                 '" rel="nofollow noopener ' + | 
					
						
							|  |  |  |                                 'noreferrer" target="_blank">' + | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |                                 link_text + '</a>') | 
					
						
							| 
									
										
										
										
											2021-07-20 18:02:42 +00:00
										 |  |  |         else: | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |             if len(url_desc) > 40: | 
					
						
							|  |  |  |                 url_desc = url_desc[:40] | 
					
						
							| 
									
										
										
										
											2021-07-20 18:02:42 +00:00
										 |  |  |             content += \ | 
					
						
							|  |  |  |                 '<p><a href="' + url + \ | 
					
						
							|  |  |  |                 '" rel="nofollow noopener noreferrer" target="_blank">' + \ | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |                 url_desc + '</a></p>' | 
					
						
							| 
									
										
										
										
											2021-07-20 18:02:42 +00:00
										 |  |  |     return content | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-08 11:16:18 +00:00
										 |  |  | def libretranslate(url: str, text: str, | 
					
						
							| 
									
										
										
										
											2024-02-19 13:41:52 +00:00
										 |  |  |                    source: str, target: str, api_key: str) -> str: | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  |     """Translate string using libretranslate
 | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-08-08 11:16:18 +00:00
										 |  |  |     if not url: | 
					
						
							|  |  |  |         return None | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  |     if not url.endswith('/translate'): | 
					
						
							|  |  |  |         if not url.endswith('/'): | 
					
						
							|  |  |  |             url += "/translate" | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             url += "translate" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |     original_text = text | 
					
						
							| 
									
										
										
										
											2021-07-20 20:12:24 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-20 10:45:04 +00:00
										 |  |  |     # get any links from the text | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |     links = get_links_from_content(text) | 
					
						
							| 
									
										
										
										
											2021-07-20 10:45:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-20 10:13:22 +00:00
										 |  |  |     # LibreTranslate doesn't like markup | 
					
						
							| 
									
										
										
										
											2021-12-27 15:43:22 +00:00
										 |  |  |     text = remove_html(text) | 
					
						
							| 
									
										
										
										
											2021-07-20 10:13:22 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-20 20:04:49 +00:00
										 |  |  |     # remove any links from plain text version of the content | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |     for _, url2 in links.items(): | 
					
						
							|  |  |  |         text = text.replace(url2, '') | 
					
						
							| 
									
										
										
										
											2021-07-20 20:04:49 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |     lt_params = { | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  |         "q": text, | 
					
						
							|  |  |  |         "source": source, | 
					
						
							|  |  |  |         "target": target | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |     if api_key: | 
					
						
							|  |  |  |         lt_params["api_key"] = api_key | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |     url_params = parse.urlencode(lt_params) | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |     req = request.Request(url, data=url_params.encode()) | 
					
						
							| 
									
										
										
										
											2022-05-30 18:33:51 +00:00
										 |  |  |     response_str = None | 
					
						
							| 
									
										
										
										
											2021-07-20 20:09:39 +00:00
										 |  |  |     try: | 
					
						
							| 
									
										
										
										
											2022-05-30 18:33:51 +00:00
										 |  |  |         with request.urlopen(req) as response: | 
					
						
							|  |  |  |             response_str = response.read().decode() | 
					
						
							|  |  |  |     except BaseException as ex: | 
					
						
							|  |  |  |         print('EX: Unable to translate: ' + text + ' ' + str(ex)) | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |         return original_text | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-05-30 18:33:51 +00:00
										 |  |  |     if not response_str: | 
					
						
							|  |  |  |         return original_text | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-19 21:43:48 +00:00
										 |  |  |     try: | 
					
						
							|  |  |  |         translated_text = \ | 
					
						
							|  |  |  |             '<p>' + json.loads(response_str)['translatedText'] + '</p>' | 
					
						
							|  |  |  |     except json.decoder.JSONDecodeError as ex: | 
					
						
							|  |  |  |         print('EX: json decode error ' + str(ex) + | 
					
						
							|  |  |  |               ' from libretranslate ' + | 
					
						
							|  |  |  |               str(response_str)) | 
					
						
							|  |  |  |         return original_text | 
					
						
							| 
									
										
										
										
											2021-07-20 10:46:58 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # append links form the original text | 
					
						
							| 
									
										
										
										
											2021-07-20 10:45:04 +00:00
										 |  |  |     if links: | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |         translated_text = add_links_to_content(translated_text, links) | 
					
						
							|  |  |  |     return translated_text | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  | def auto_translate_post(base_dir: str, post_json_object: {}, | 
					
						
							|  |  |  |                         system_language: str, translate: {}) -> str: | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  |     """Tries to automatically translate the given post
 | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-12-26 10:57:03 +00:00
										 |  |  |     if not has_object_dict(post_json_object): | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  |         return '' | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |     msg_object = post_json_object['object'] | 
					
						
							|  |  |  |     if not msg_object.get('contentMap'): | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  |         return '' | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |     if not isinstance(msg_object['contentMap'], dict): | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  |         return '' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # is the language for this post supported by libretranslate? | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |     libretranslate_url = get_config_param(base_dir, "libretranslateUrl") | 
					
						
							|  |  |  |     if not libretranslate_url: | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  |         return '' | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |     libretranslate_api_key = get_config_param(base_dir, "libretranslateApiKey") | 
					
						
							| 
									
										
										
										
											2021-12-26 10:35:37 +00:00
										 |  |  |     lang_list = \ | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |         libretranslate_languages(libretranslate_url, libretranslate_api_key) | 
					
						
							| 
									
										
										
										
											2021-12-26 10:35:37 +00:00
										 |  |  |     for lang in lang_list: | 
					
						
							| 
									
										
										
										
											2022-05-09 17:20:05 +00:00
										 |  |  |         content = None | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |         if msg_object['contentMap'].get(lang): | 
					
						
							|  |  |  |             content = msg_object['contentMap'][lang] | 
					
						
							| 
									
										
										
										
											2022-05-09 17:20:05 +00:00
										 |  |  |         if not content: | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         translated_text = \ | 
					
						
							|  |  |  |             libretranslate(libretranslate_url, content, | 
					
						
							|  |  |  |                            lang, system_language, | 
					
						
							|  |  |  |                            libretranslate_api_key) | 
					
						
							|  |  |  |         if translated_text: | 
					
						
							|  |  |  |             if remove_html(translated_text) == remove_html(content): | 
					
						
							|  |  |  |                 return content | 
					
						
							| 
									
										
										
										
											2022-01-02 21:27:49 +00:00
										 |  |  |             translated_text = \ | 
					
						
							| 
									
										
										
										
											2022-05-09 17:20:05 +00:00
										 |  |  |                 '<p>' + translate['Translated'].upper() + '</p>' + \ | 
					
						
							|  |  |  |                 translated_text | 
					
						
							|  |  |  |         return translated_text | 
					
						
							| 
									
										
										
										
											2021-07-19 19:40:04 +00:00
										 |  |  |     return '' | 
					
						
							| 
									
										
										
										
											2022-12-08 15:28:17 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def set_default_post_language(base_dir: str, nickname: str, domain: str, | 
					
						
							|  |  |  |                               language: str) -> None: | 
					
						
							|  |  |  |     """Sets the default language for new posts
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     default_post_language_filename = \ | 
					
						
							|  |  |  |         acct_dir(base_dir, nickname, domain) + '/.new_post_language' | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         with open(default_post_language_filename, 'w+', | 
					
						
							|  |  |  |                   encoding='utf-8') as fp_lang: | 
					
						
							|  |  |  |             fp_lang.write(language) | 
					
						
							|  |  |  |     except OSError: | 
					
						
							|  |  |  |         print('EX: Unable to write default post language ' + | 
					
						
							|  |  |  |               default_post_language_filename) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def load_default_post_languages(base_dir: str) -> {}: | 
					
						
							|  |  |  |     """Returns a dictionary containing the default languages
 | 
					
						
							|  |  |  |     for new posts for each account | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     result = {} | 
					
						
							| 
									
										
										
										
											2024-05-12 12:35:26 +00:00
										 |  |  |     dir_str = data_dir(base_dir) | 
					
						
							|  |  |  |     for _, dirs, _ in os.walk(dir_str): | 
					
						
							| 
									
										
										
										
											2022-12-08 15:28:17 +00:00
										 |  |  |         for handle in dirs: | 
					
						
							|  |  |  |             if not is_account_dir(handle): | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             nickname = handle.split('@')[0] | 
					
						
							|  |  |  |             domain = handle.split('@')[1] | 
					
						
							|  |  |  |             default_post_language_filename = \ | 
					
						
							|  |  |  |                 acct_dir(base_dir, nickname, domain) + '/.new_post_language' | 
					
						
							|  |  |  |             if not os.path.isfile(default_post_language_filename): | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             try: | 
					
						
							|  |  |  |                 with open(default_post_language_filename, 'r', | 
					
						
							|  |  |  |                           encoding='utf-8') as fp_lang: | 
					
						
							|  |  |  |                     result[nickname] = fp_lang.read() | 
					
						
							|  |  |  |             except OSError: | 
					
						
							|  |  |  |                 print('EX: Unable to read default post language ' + | 
					
						
							|  |  |  |                       default_post_language_filename) | 
					
						
							| 
									
										
										
										
											2023-07-28 12:54:02 +00:00
										 |  |  |         break | 
					
						
							| 
									
										
										
										
											2022-12-08 15:28:17 +00:00
										 |  |  |     return result | 
					
						
							| 
									
										
										
										
											2022-12-08 16:52:47 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def get_reply_language(base_dir: str, | 
					
						
							|  |  |  |                        post_json_object: {}) -> str: | 
					
						
							|  |  |  |     """Returns the language that te given post was written in
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     post_obj = post_json_object | 
					
						
							|  |  |  |     if has_object_dict(post_json_object): | 
					
						
							|  |  |  |         post_obj = post_json_object['object'] | 
					
						
							|  |  |  |     if not post_obj.get('contentMap'): | 
					
						
							|  |  |  |         return None | 
					
						
							| 
									
										
										
										
											2023-09-20 12:23:45 +00:00
										 |  |  |     for lang, _ in post_obj['contentMap'].items(): | 
					
						
							| 
									
										
										
										
											2022-12-08 16:52:47 +00:00
										 |  |  |         lang_filename = base_dir + '/translations/' + lang + '.json' | 
					
						
							|  |  |  |         if not os.path.isfile(lang_filename): | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         return lang | 
					
						
							|  |  |  |     return None |