| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |  | __filename__ = "content.py" | 
					
						
							|  |  |  |  | __author__ = "Bob Mottram" | 
					
						
							|  |  |  |  | __license__ = "AGPL3+" | 
					
						
							| 
									
										
										
										
											2024-12-22 23:37:30 +00:00
										 |  |  |  | __version__ = "1.6.0" | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |  | __maintainer__ = "Bob Mottram" | 
					
						
							| 
									
										
										
										
											2021-09-10 16:14:50 +00:00
										 |  |  |  | __email__ = "bob@libreserver.org" | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |  | __status__ = "Production" | 
					
						
							| 
									
										
										
										
											2021-06-25 16:10:09 +00:00
										 |  |  |  | __module_group__ = "Core" | 
					
						
							| 
									
										
										
										
											2019-07-15 14:11:31 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-04-10 19:19:40 +00:00
										 |  |  |  | import difflib | 
					
						
							| 
									
										
										
										
											2022-03-24 16:16:36 +00:00
										 |  |  |  | import math | 
					
						
							| 
									
										
										
										
											2022-03-24 15:15:53 +00:00
										 |  |  |  | import html | 
					
						
							| 
									
										
										
										
											2019-07-15 14:11:31 +00:00
										 |  |  |  | import os | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |  | import email.parser | 
					
						
							| 
									
										
										
										
											2020-12-03 14:59:07 +00:00
										 |  |  |  | import urllib.parse | 
					
						
							| 
									
										
										
										
											2019-08-11 16:55:22 +00:00
										 |  |  |  | from shutil import copyfile | 
					
						
							| 
									
										
										
										
											2022-04-10 22:50:44 +00:00
										 |  |  |  | from dateutil.parser import parse | 
					
						
							| 
									
										
										
										
											2024-09-13 13:58:14 +00:00
										 |  |  |  | from flags import is_pgp_encrypted | 
					
						
							|  |  |  |  | from flags import contains_pgp_public_key | 
					
						
							|  |  |  |  | from flags import is_float | 
					
						
							|  |  |  |  | from flags import is_right_to_left_text | 
					
						
							| 
									
										
										
										
											2024-08-08 17:23:33 +00:00
										 |  |  |  | from utils import replace_strings | 
					
						
							| 
									
										
										
										
											2024-05-12 12:35:26 +00:00
										 |  |  |  | from utils import data_dir | 
					
						
							| 
									
										
										
										
											2024-04-24 19:35:04 +00:00
										 |  |  |  | from utils import remove_link_tracking | 
					
						
							| 
									
										
										
										
											2024-04-10 12:59:57 +00:00
										 |  |  |  | from utils import string_contains | 
					
						
							| 
									
										
										
										
											2024-04-10 12:23:59 +00:00
										 |  |  |  | from utils import string_ends_with | 
					
						
							| 
									
										
										
										
											2024-01-18 14:03:35 +00:00
										 |  |  |  | from utils import is_account_dir | 
					
						
							| 
									
										
										
										
											2023-12-09 14:18:24 +00:00
										 |  |  |  | from utils import get_url_from_post | 
					
						
							| 
									
										
										
										
											2023-09-20 12:23:45 +00:00
										 |  |  |  | from utils import language_right_to_left | 
					
						
							| 
									
										
										
										
											2023-09-12 18:38:56 +00:00
										 |  |  |  | from utils import binary_is_image | 
					
						
							| 
									
										
										
										
											2023-03-20 18:04:38 +00:00
										 |  |  |  | from utils import get_content_from_post | 
					
						
							| 
									
										
										
										
											2022-12-08 14:13:15 +00:00
										 |  |  |  | from utils import get_full_domain | 
					
						
							| 
									
										
										
										
											2022-09-03 17:09:00 +00:00
										 |  |  |  | from utils import get_user_paths | 
					
						
							| 
									
										
										
										
											2022-04-10 22:50:44 +00:00
										 |  |  |  | from utils import convert_published_to_local_timezone | 
					
						
							|  |  |  |  | from utils import has_object_dict | 
					
						
							| 
									
										
										
										
											2022-01-13 15:10:41 +00:00
										 |  |  |  | from utils import valid_hash_tag | 
					
						
							| 
									
										
										
										
											2021-12-27 21:44:48 +00:00
										 |  |  |  | from utils import dangerous_svg | 
					
						
							| 
									
										
										
										
											2021-12-26 18:17:37 +00:00
										 |  |  |  | from utils import remove_domain_port | 
					
						
							| 
									
										
										
										
											2021-12-26 14:26:16 +00:00
										 |  |  |  | from utils import get_image_extensions | 
					
						
							| 
									
										
										
										
											2021-12-26 15:13:34 +00:00
										 |  |  |  | from utils import load_json | 
					
						
							| 
									
										
										
										
											2021-12-26 14:47:21 +00:00
										 |  |  |  | from utils import save_json | 
					
						
							| 
									
										
										
										
											2021-12-28 14:01:37 +00:00
										 |  |  |  | from utils import file_last_modified | 
					
						
							| 
									
										
										
										
											2021-12-27 17:32:34 +00:00
										 |  |  |  | from utils import get_link_prefixes | 
					
						
							| 
									
										
										
										
											2021-12-27 21:42:08 +00:00
										 |  |  |  | from utils import dangerous_markup | 
					
						
							| 
									
										
										
										
											2021-12-26 12:02:29 +00:00
										 |  |  |  | from utils import acct_dir | 
					
						
							| 
									
										
										
										
											2021-12-26 17:29:09 +00:00
										 |  |  |  | from utils import get_currencies | 
					
						
							| 
									
										
										
										
											2021-12-27 15:43:22 +00:00
										 |  |  |  | from utils import remove_html | 
					
						
							| 
									
										
										
										
											2022-06-21 11:58:50 +00:00
										 |  |  |  | from utils import remove_eol | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |  | from petnames import get_pet_name | 
					
						
							|  |  |  |  | from session import download_image | 
					
						
							| 
									
										
										
										
											2019-07-15 14:11:31 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-16 21:31:46 +00:00
										 |  |  |  | MUSIC_SITES = ('soundcloud.com', 'bandcamp.com', 'resonate.coop') | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | MAX_LINK_LENGTH = 40 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | REMOVE_MARKUP = ( | 
					
						
							|  |  |  |  |     'b', 'i', 'ul', 'ol', 'li', 'em', 'strong', | 
					
						
							|  |  |  |  |     'blockquote', 'h1', 'h2', 'h3', 'h4', 'h5' | 
					
						
							|  |  |  |  | ) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | INVALID_CONTENT_STRINGS = ( | 
					
						
							|  |  |  |  |     'mute', 'unmute', 'editeventpost', 'notifypost', | 
					
						
							|  |  |  |  |     'delete', 'options', 'page', 'repeat', | 
					
						
							|  |  |  |  |     'bm', 'tl', 'actor', 'unrepeat', 'eventid', | 
					
						
							|  |  |  |  |     'unannounce', 'like', 'unlike', 'bookmark', | 
					
						
							|  |  |  |  |     'unbookmark', 'likedBy', 'time', | 
					
						
							|  |  |  |  |     'year', 'month', 'day', 'editnewpost', | 
					
						
							|  |  |  |  |     'graph', 'showshare', 'category', 'showwanted', | 
					
						
							|  |  |  |  |     'rmshare', 'rmwanted', 'repeatprivate', | 
					
						
							|  |  |  |  |     'unrepeatprivate', 'replyto', | 
					
						
							| 
									
										
										
										
											2022-02-08 10:52:03 +00:00
										 |  |  |  |     'replyfollowers', 'replydm', 'replychat', 'editblogpost', | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     'handle', 'blockdomain' | 
					
						
							|  |  |  |  | ) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-05-17 11:40:05 +00:00
										 |  |  |  | def valid_url_lengths(content: str, max_url_length: int) -> bool: | 
					
						
							|  |  |  |  |     """Returns true if the given content contains urls which are too long
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     if '://' not in content: | 
					
						
							|  |  |  |  |         return True | 
					
						
							|  |  |  |  |     sections = content.split('://') | 
					
						
							|  |  |  |  |     ctr = 0 | 
					
						
							|  |  |  |  |     for text in sections: | 
					
						
							|  |  |  |  |         if ctr == 0: | 
					
						
							|  |  |  |  |             ctr += 1 | 
					
						
							|  |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2024-08-30 10:40:13 +00:00
										 |  |  |  |         if '"' not in text: | 
					
						
							|  |  |  |  |             continue | 
					
						
							|  |  |  |  |         url = text.split('"')[0] | 
					
						
							|  |  |  |  |         if '<' not in url and '>' not in url: | 
					
						
							|  |  |  |  |             if len(url) > max_url_length: | 
					
						
							|  |  |  |  |                 return False | 
					
						
							| 
									
										
										
										
											2022-05-17 11:40:05 +00:00
										 |  |  |  |     return True | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  | def remove_html_tag(html_str: str, tag: str) -> str: | 
					
						
							| 
									
										
										
										
											2020-10-11 09:33:31 +00:00
										 |  |  |  |     """Removes a given tag from a html string
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     tag_found = True | 
					
						
							|  |  |  |  |     while tag_found: | 
					
						
							|  |  |  |  |         match_str = ' ' + tag + '="' | 
					
						
							|  |  |  |  |         if match_str not in html_str: | 
					
						
							|  |  |  |  |             tag_found = False | 
					
						
							| 
									
										
										
										
											2020-10-11 09:33:31 +00:00
										 |  |  |  |             break | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         sections = html_str.split(match_str, 1) | 
					
						
							| 
									
										
										
										
											2020-10-11 09:33:31 +00:00
										 |  |  |  |         if '"' not in sections[1]: | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |             tag_found = False | 
					
						
							| 
									
										
										
										
											2020-10-11 09:33:31 +00:00
										 |  |  |  |             break | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         html_str = sections[0] + sections[1].split('"', 1)[1] | 
					
						
							|  |  |  |  |     return html_str | 
					
						
							| 
									
										
										
										
											2020-10-11 09:33:31 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |  | def _remove_quotes_within_quotes(content: str) -> str: | 
					
						
							| 
									
										
										
										
											2020-09-30 22:52:39 +00:00
										 |  |  |  |     """Removes any blockquote inside blockquote
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     if '<blockquote>' not in content: | 
					
						
							|  |  |  |  |         return content | 
					
						
							|  |  |  |  |     if '</blockquote>' not in content: | 
					
						
							|  |  |  |  |         return content | 
					
						
							|  |  |  |  |     ctr = 1 | 
					
						
							|  |  |  |  |     found = True | 
					
						
							|  |  |  |  |     while found: | 
					
						
							|  |  |  |  |         prefix = content.split('<blockquote>', ctr)[0] + '<blockquote>' | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         quoted_str = content.split('<blockquote>', ctr)[1] | 
					
						
							|  |  |  |  |         if '</blockquote>' not in quoted_str: | 
					
						
							| 
									
										
										
										
											2020-09-30 22:52:39 +00:00
										 |  |  |  |             found = False | 
					
						
							| 
									
										
										
										
											2024-11-06 10:20:23 +00:00
										 |  |  |  |             continue | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         end_str = quoted_str.split('</blockquote>')[1] | 
					
						
							|  |  |  |  |         quoted_str = quoted_str.split('</blockquote>')[0] | 
					
						
							|  |  |  |  |         if '<blockquote>' not in end_str: | 
					
						
							|  |  |  |  |             found = False | 
					
						
							|  |  |  |  |         if '<blockquote>' in quoted_str: | 
					
						
							|  |  |  |  |             quoted_str = quoted_str.replace('<blockquote>', '') | 
					
						
							|  |  |  |  |             content = prefix + quoted_str + '</blockquote>' + end_str | 
					
						
							| 
									
										
										
										
											2020-09-30 22:52:39 +00:00
										 |  |  |  |         ctr += 1 | 
					
						
							|  |  |  |  |     return content | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |  | def html_replace_email_quote(content: str) -> str: | 
					
						
							| 
									
										
										
										
											2020-09-14 09:33:42 +00:00
										 |  |  |  |     """Replaces an email style quote "> Some quote" with html blockquote
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-12-26 19:15:36 +00:00
										 |  |  |  |     if is_pgp_encrypted(content) or contains_pgp_public_key(content): | 
					
						
							| 
									
										
										
										
											2021-03-11 17:15:32 +00:00
										 |  |  |  |         return content | 
					
						
							| 
									
										
										
										
											2020-09-14 11:30:56 +00:00
										 |  |  |  |     # replace quote paragraph | 
					
						
							|  |  |  |  |     if '<p>"' in content: | 
					
						
							|  |  |  |  |         if '"</p>' in content: | 
					
						
							| 
									
										
										
										
											2020-10-30 12:10:57 +00:00
										 |  |  |  |             if content.count('<p>"') == content.count('"</p>'): | 
					
						
							| 
									
										
										
										
											2024-08-08 17:23:33 +00:00
										 |  |  |  |                 replacements = { | 
					
						
							|  |  |  |  |                     '<p>"': '<p><blockquote>', | 
					
						
							|  |  |  |  |                     '"</p>': '</blockquote></p>' | 
					
						
							|  |  |  |  |                 } | 
					
						
							|  |  |  |  |                 content = replace_strings(content, replacements) | 
					
						
							| 
									
										
										
										
											2020-09-14 12:17:11 +00:00
										 |  |  |  |     if '>\u201c' in content: | 
					
						
							|  |  |  |  |         if '\u201d<' in content: | 
					
						
							| 
									
										
										
										
											2020-10-30 12:10:57 +00:00
										 |  |  |  |             if content.count('>\u201c') == content.count('\u201d<'): | 
					
						
							| 
									
										
										
										
											2024-08-08 17:23:33 +00:00
										 |  |  |  |                 replacements = { | 
					
						
							|  |  |  |  |                     '>\u201c': '><blockquote>', | 
					
						
							|  |  |  |  |                     '\u201d<': '</blockquote><' | 
					
						
							|  |  |  |  |                 } | 
					
						
							|  |  |  |  |                 content = replace_strings(content, replacements) | 
					
						
							| 
									
										
										
										
											2020-09-14 11:30:56 +00:00
										 |  |  |  |     # replace email style quote | 
					
						
							| 
									
										
										
										
											2020-09-14 09:33:42 +00:00
										 |  |  |  |     if '>> ' not in content: | 
					
						
							|  |  |  |  |         return content | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     content_str = content.replace('<p>', '') | 
					
						
							|  |  |  |  |     content_lines = content_str.split('</p>') | 
					
						
							|  |  |  |  |     new_content = '' | 
					
						
							|  |  |  |  |     for line_str in content_lines: | 
					
						
							|  |  |  |  |         if not line_str: | 
					
						
							| 
									
										
										
										
											2020-09-14 09:33:42 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         if '>> ' not in line_str: | 
					
						
							|  |  |  |  |             if line_str.startswith('> '): | 
					
						
							| 
									
										
										
										
											2024-08-08 17:23:33 +00:00
										 |  |  |  |                 replacements = { | 
					
						
							|  |  |  |  |                     '> ': '<blockquote>', | 
					
						
							|  |  |  |  |                     '>': '<br>' | 
					
						
							|  |  |  |  |                 } | 
					
						
							|  |  |  |  |                 line_str = replace_strings(line_str, replacements) | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |                 new_content += '<p>' + line_str + '</blockquote></p>' | 
					
						
							| 
									
										
										
										
											2020-09-14 10:25:12 +00:00
										 |  |  |  |             else: | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |                 new_content += '<p>' + line_str + '</p>' | 
					
						
							| 
									
										
										
										
											2024-11-06 10:21:47 +00:00
										 |  |  |  |             continue | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         line_str = line_str.replace('>> ', '><blockquote>') | 
					
						
							|  |  |  |  |         if line_str.startswith('>'): | 
					
						
							|  |  |  |  |             line_str = line_str.replace('>', '<blockquote>', 1) | 
					
						
							| 
									
										
										
										
											2020-09-14 09:33:42 +00:00
										 |  |  |  |         else: | 
					
						
							| 
									
										
										
										
											2024-11-06 10:21:47 +00:00
										 |  |  |  |             line_str = line_str.replace('>', '<br>') | 
					
						
							|  |  |  |  |         new_content += '<p>' + line_str + '</blockquote></p>' | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     return _remove_quotes_within_quotes(new_content) | 
					
						
							| 
									
										
										
										
											2020-09-14 09:33:42 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |  | def html_replace_quote_marks(content: str) -> str: | 
					
						
							| 
									
										
										
										
											2020-08-02 17:01:12 +00:00
										 |  |  |  |     """Replaces quotes with html formatting
 | 
					
						
							|  |  |  |  |     "hello" becomes <q>hello</q> | 
					
						
							|  |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-12-26 19:15:36 +00:00
										 |  |  |  |     if is_pgp_encrypted(content) or contains_pgp_public_key(content): | 
					
						
							| 
									
										
										
										
											2021-03-11 17:15:32 +00:00
										 |  |  |  |         return content | 
					
						
							| 
									
										
										
										
											2020-08-02 17:01:12 +00:00
										 |  |  |  |     if '"' not in content: | 
					
						
							| 
									
										
										
										
											2020-08-03 17:03:30 +00:00
										 |  |  |  |         if '"' not in content: | 
					
						
							|  |  |  |  |             return content | 
					
						
							| 
									
										
										
										
											2020-10-30 12:03:29 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     # only if there are a few quote marks | 
					
						
							|  |  |  |  |     if content.count('"') > 4: | 
					
						
							|  |  |  |  |         return content | 
					
						
							|  |  |  |  |     if content.count('"') > 4: | 
					
						
							|  |  |  |  |         return content | 
					
						
							| 
									
										
										
										
											2020-08-02 17:01:12 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     new_content = content | 
					
						
							| 
									
										
										
										
											2020-08-03 17:03:30 +00:00
										 |  |  |  |     if '"' in content: | 
					
						
							|  |  |  |  |         sections = content.split('"') | 
					
						
							|  |  |  |  |         if len(sections) > 1: | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |             new_content = '' | 
					
						
							|  |  |  |  |             open_quote = True | 
					
						
							| 
									
										
										
										
											2020-08-02 17:17:51 +00:00
										 |  |  |  |             markup = False | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |             for char in content: | 
					
						
							|  |  |  |  |                 curr_char = char | 
					
						
							|  |  |  |  |                 if char == '<': | 
					
						
							| 
									
										
										
										
											2020-08-03 17:03:30 +00:00
										 |  |  |  |                     markup = True | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |                 elif char == '>': | 
					
						
							| 
									
										
										
										
											2020-08-03 17:03:30 +00:00
										 |  |  |  |                     markup = False | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |                 elif char == '"' and not markup: | 
					
						
							|  |  |  |  |                     if open_quote: | 
					
						
							|  |  |  |  |                         curr_char = '“' | 
					
						
							| 
									
										
										
										
											2020-08-03 17:03:30 +00:00
										 |  |  |  |                     else: | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |                         curr_char = '”' | 
					
						
							|  |  |  |  |                     open_quote = not open_quote | 
					
						
							|  |  |  |  |                 new_content += curr_char | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     if '"' in new_content: | 
					
						
							|  |  |  |  |         open_quote = True | 
					
						
							|  |  |  |  |         content = new_content | 
					
						
							|  |  |  |  |         new_content = '' | 
					
						
							| 
									
										
										
										
											2020-08-02 19:16:22 +00:00
										 |  |  |  |         ctr = 0 | 
					
						
							|  |  |  |  |         sections = content.split('"') | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         no_of_sections = len(sections) | 
					
						
							|  |  |  |  |         for sec in sections: | 
					
						
							|  |  |  |  |             new_content += sec | 
					
						
							|  |  |  |  |             if ctr < no_of_sections - 1: | 
					
						
							|  |  |  |  |                 if open_quote: | 
					
						
							|  |  |  |  |                     new_content += '“' | 
					
						
							| 
									
										
										
										
											2020-08-02 19:16:22 +00:00
										 |  |  |  |                 else: | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |                     new_content += '”' | 
					
						
							|  |  |  |  |                 open_quote = not open_quote | 
					
						
							| 
									
										
										
										
											2020-08-02 19:16:22 +00:00
										 |  |  |  |             ctr += 1 | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     return new_content | 
					
						
							| 
									
										
										
										
											2020-08-02 17:01:12 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |  | def dangerous_css(filename: str, allow_local_network_access: bool) -> bool: | 
					
						
							| 
									
										
										
										
											2020-11-15 11:01:05 +00:00
										 |  |  |  |     """Returns true is the css file contains code which
 | 
					
						
							|  |  |  |  |     can create security problems | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     if not os.path.isfile(filename): | 
					
						
							|  |  |  |  |         return False | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-26 12:28:20 +00:00
										 |  |  |  |     content = None | 
					
						
							|  |  |  |  |     try: | 
					
						
							| 
									
										
										
										
											2024-07-14 13:01:46 +00:00
										 |  |  |  |         with open(filename, 'r', encoding='utf-8') as fp_css: | 
					
						
							|  |  |  |  |             content = fp_css.read().lower() | 
					
						
							| 
									
										
										
										
											2021-11-26 12:28:20 +00:00
										 |  |  |  |     except OSError: | 
					
						
							|  |  |  |  |         print('EX: unable to read css file ' + filename) | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     if not content: | 
					
						
							|  |  |  |  |         return False | 
					
						
							| 
									
										
										
										
											2020-11-15 11:01:05 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     css_matches = ( | 
					
						
							|  |  |  |  |         'behavior:', ':expression', '?php', '.php', | 
					
						
							|  |  |  |  |         'google', 'regexp', 'localhost', | 
					
						
							|  |  |  |  |         '127.0.', '192.168', '10.0.', '@import' | 
					
						
							|  |  |  |  |     ) | 
					
						
							|  |  |  |  |     for cssmatch in css_matches: | 
					
						
							|  |  |  |  |         if cssmatch in content: | 
					
						
							| 
									
										
										
										
											2020-11-15 11:01:05 +00:00
										 |  |  |  |             return True | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     # search for non-local web links | 
					
						
							|  |  |  |  |     if 'url(' in content: | 
					
						
							|  |  |  |  |         url_list = content.split('url(') | 
					
						
							|  |  |  |  |         ctr = 0 | 
					
						
							|  |  |  |  |         for url_str in url_list: | 
					
						
							| 
									
										
										
										
											2024-08-30 11:02:06 +00:00
										 |  |  |  |             if ctr == 0: | 
					
						
							|  |  |  |  |                 ctr = 1 | 
					
						
							|  |  |  |  |                 continue | 
					
						
							|  |  |  |  |             if ')' in url_str: | 
					
						
							|  |  |  |  |                 url_str = url_str.split(')')[0] | 
					
						
							|  |  |  |  |                 if string_contains(url_str, ('http', 'ipfs', 'ipns')): | 
					
						
							|  |  |  |  |                     print('ERROR: non-local web link in CSS ' + | 
					
						
							|  |  |  |  |                           filename) | 
					
						
							|  |  |  |  |                     return True | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |             ctr += 1 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     # an attacker can include html inside of the css | 
					
						
							|  |  |  |  |     # file as a comment and this may then be run from the html | 
					
						
							| 
									
										
										
										
											2023-05-18 11:15:18 +00:00
										 |  |  |  |     if dangerous_markup(content, allow_local_network_access, []): | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         return True | 
					
						
							| 
									
										
										
										
											2020-11-15 11:01:05 +00:00
										 |  |  |  |     return False | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |  | def switch_words(base_dir: str, nickname: str, domain: str, content: str, | 
					
						
							| 
									
										
										
										
											2024-12-23 15:39:55 +00:00
										 |  |  |  |                  rules: list[str] = []) -> str: | 
					
						
							| 
									
										
										
										
											2020-02-19 18:51:08 +00:00
										 |  |  |  |     """Performs word replacements. eg. Trump -> The Orange Menace
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-12-26 19:15:36 +00:00
										 |  |  |  |     if is_pgp_encrypted(content) or contains_pgp_public_key(content): | 
					
						
							| 
									
										
										
										
											2021-03-11 17:15:32 +00:00
										 |  |  |  |         return content | 
					
						
							| 
									
										
										
										
											2021-07-06 16:29:03 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     if not rules: | 
					
						
							| 
									
										
										
										
											2021-12-30 12:23:55 +00:00
										 |  |  |  |         switch_words_filename = \ | 
					
						
							| 
									
										
										
										
											2021-12-26 12:02:29 +00:00
										 |  |  |  |             acct_dir(base_dir, nickname, domain) + '/replacewords.txt' | 
					
						
							| 
									
										
										
										
											2021-12-30 12:23:55 +00:00
										 |  |  |  |         if not os.path.isfile(switch_words_filename): | 
					
						
							| 
									
										
										
										
											2021-07-06 16:29:03 +00:00
										 |  |  |  |             return content | 
					
						
							| 
									
										
										
										
											2021-11-26 12:28:20 +00:00
										 |  |  |  |         try: | 
					
						
							| 
									
										
										
										
											2022-06-09 14:46:30 +00:00
										 |  |  |  |             with open(switch_words_filename, 'r', | 
					
						
							| 
									
										
										
										
											2024-07-14 13:01:46 +00:00
										 |  |  |  |                       encoding='utf-8') as fp_words: | 
					
						
							|  |  |  |  |                 rules = fp_words.readlines() | 
					
						
							| 
									
										
										
										
											2021-11-26 12:28:20 +00:00
										 |  |  |  |         except OSError: | 
					
						
							| 
									
										
										
										
											2021-12-30 12:23:55 +00:00
										 |  |  |  |             print('EX: unable to read switches ' + switch_words_filename) | 
					
						
							| 
									
										
										
										
											2021-07-06 16:29:03 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     for line in rules: | 
					
						
							| 
									
										
										
										
											2022-06-21 11:58:50 +00:00
										 |  |  |  |         replace_str = remove_eol(line) | 
					
						
							| 
									
										
										
										
											2021-07-06 16:29:03 +00:00
										 |  |  |  |         splitters = ('->', ':', ',', ';', '-') | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         word_transform = None | 
					
						
							|  |  |  |  |         for split_str in splitters: | 
					
						
							|  |  |  |  |             if split_str in replace_str: | 
					
						
							|  |  |  |  |                 word_transform = replace_str.split(split_str) | 
					
						
							| 
									
										
										
										
											2021-07-06 16:29:03 +00:00
										 |  |  |  |                 break | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         if not word_transform: | 
					
						
							| 
									
										
										
										
											2021-07-06 16:29:03 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         if len(word_transform) == 2: | 
					
						
							|  |  |  |  |             replace_str1 = word_transform[0].strip().replace('"', '') | 
					
						
							|  |  |  |  |             replace_str2 = word_transform[1].strip().replace('"', '') | 
					
						
							|  |  |  |  |             content = content.replace(replace_str1, replace_str2) | 
					
						
							| 
									
										
										
										
											2020-02-19 18:51:08 +00:00
										 |  |  |  |     return content | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-18 16:37:19 +00:00
										 |  |  |  | def _save_custom_emoji(session, base_dir: str, emoji_name: str, url: str, | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |  |                        debug: bool) -> None: | 
					
						
							| 
									
										
										
										
											2021-11-01 17:12:17 +00:00
										 |  |  |  |     """Saves custom emoji to file
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     if not session: | 
					
						
							| 
									
										
										
										
											2021-11-01 17:50:38 +00:00
										 |  |  |  |         if debug: | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |  |             print('EX: _save_custom_emoji no session') | 
					
						
							| 
									
										
										
										
											2021-11-01 17:12:17 +00:00
										 |  |  |  |         return | 
					
						
							|  |  |  |  |     if '.' not in url: | 
					
						
							|  |  |  |  |         return | 
					
						
							|  |  |  |  |     ext = url.split('.')[-1] | 
					
						
							|  |  |  |  |     if ext != 'png': | 
					
						
							| 
									
										
										
										
											2021-11-01 17:50:38 +00:00
										 |  |  |  |         if debug: | 
					
						
							| 
									
										
										
										
											2021-11-01 18:33:32 +00:00
										 |  |  |  |             print('EX: Custom emoji is wrong format ' + url) | 
					
						
							| 
									
										
										
										
											2021-11-01 17:12:17 +00:00
										 |  |  |  |         return | 
					
						
							| 
									
										
										
										
											2024-05-18 16:37:19 +00:00
										 |  |  |  |     emoji_name = emoji_name.replace(':', '').strip().lower() | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     custom_emoji_dir = base_dir + '/emojicustom' | 
					
						
							|  |  |  |  |     if not os.path.isdir(custom_emoji_dir): | 
					
						
							|  |  |  |  |         os.mkdir(custom_emoji_dir) | 
					
						
							| 
									
										
										
										
											2024-05-18 16:37:19 +00:00
										 |  |  |  |     emoji_image_filename = custom_emoji_dir + '/' + emoji_name + '.' + ext | 
					
						
							| 
									
										
										
										
											2022-06-14 10:24:29 +00:00
										 |  |  |  |     if not download_image(session, url, | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |                           emoji_image_filename, debug, False): | 
					
						
							| 
									
										
										
										
											2021-11-01 18:33:32 +00:00
										 |  |  |  |         if debug: | 
					
						
							|  |  |  |  |             print('EX: custom emoji not downloaded ' + url) | 
					
						
							| 
									
										
										
										
											2021-11-01 17:12:17 +00:00
										 |  |  |  |         return | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     emoji_json_filename = custom_emoji_dir + '/emoji.json' | 
					
						
							|  |  |  |  |     emoji_json = {} | 
					
						
							|  |  |  |  |     if os.path.isfile(emoji_json_filename): | 
					
						
							| 
									
										
										
										
											2024-06-20 10:47:58 +00:00
										 |  |  |  |         emoji_json = load_json(emoji_json_filename) | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         if not emoji_json: | 
					
						
							|  |  |  |  |             emoji_json = {} | 
					
						
							| 
									
										
										
										
											2024-05-18 16:37:19 +00:00
										 |  |  |  |     if not emoji_json.get(emoji_name): | 
					
						
							|  |  |  |  |         emoji_json[emoji_name] = emoji_name | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         save_json(emoji_json, emoji_json_filename) | 
					
						
							| 
									
										
										
										
											2021-11-01 17:50:38 +00:00
										 |  |  |  |         if debug: | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |             print('EX: Saved custom emoji ' + emoji_json_filename) | 
					
						
							| 
									
										
										
										
											2021-11-01 18:33:32 +00:00
										 |  |  |  |     elif debug: | 
					
						
							|  |  |  |  |         print('EX: cusom emoji already saved') | 
					
						
							| 
									
										
										
										
											2021-11-01 17:12:17 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-04-19 12:54:37 +00:00
										 |  |  |  | def _get_emoji_name_from_code(base_dir: str, emoji_code: str) -> str: | 
					
						
							|  |  |  |  |     """Returns the emoji name from its code
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     emojis_filename = base_dir + '/emoji/emoji.json' | 
					
						
							|  |  |  |  |     if not os.path.isfile(emojis_filename): | 
					
						
							|  |  |  |  |         emojis_filename = base_dir + '/emoji/default_emoji.json' | 
					
						
							|  |  |  |  |         if not os.path.isfile(emojis_filename): | 
					
						
							|  |  |  |  |             return None | 
					
						
							|  |  |  |  |     emojis_json = load_json(emojis_filename) | 
					
						
							|  |  |  |  |     if not emojis_json: | 
					
						
							|  |  |  |  |         return None | 
					
						
							|  |  |  |  |     for emoji_name, code in emojis_json.items(): | 
					
						
							|  |  |  |  |         if code == emoji_code: | 
					
						
							|  |  |  |  |             return emoji_name | 
					
						
							|  |  |  |  |     return None | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-04-18 21:02:03 +00:00
										 |  |  |  | def _update_common_emoji(base_dir: str, emoji_content: str) -> None: | 
					
						
							|  |  |  |  |     """Updates the list of commonly used emoji
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2022-04-18 21:13:08 +00:00
										 |  |  |  |     if '.' in emoji_content: | 
					
						
							|  |  |  |  |         emoji_content = emoji_content.split('.')[0] | 
					
						
							| 
									
										
										
										
											2022-04-18 21:02:03 +00:00
										 |  |  |  |     emoji_content = emoji_content.replace(':', '') | 
					
						
							| 
									
										
										
										
											2022-04-19 12:54:37 +00:00
										 |  |  |  |     if emoji_content.startswith('0x'): | 
					
						
							|  |  |  |  |         # lookup the name for an emoji code | 
					
						
							|  |  |  |  |         emoji_code = emoji_content[2:] | 
					
						
							|  |  |  |  |         emoji_content = _get_emoji_name_from_code(base_dir, emoji_code) | 
					
						
							|  |  |  |  |         if not emoji_content: | 
					
						
							|  |  |  |  |             return | 
					
						
							| 
									
										
										
										
											2024-05-12 12:35:26 +00:00
										 |  |  |  |     common_emoji_filename = data_dir(base_dir) + '/common_emoji.txt' | 
					
						
							| 
									
										
										
										
											2022-04-18 21:02:03 +00:00
										 |  |  |  |     common_emoji = None | 
					
						
							|  |  |  |  |     if os.path.isfile(common_emoji_filename): | 
					
						
							|  |  |  |  |         try: | 
					
						
							| 
									
										
										
										
											2022-06-09 14:46:30 +00:00
										 |  |  |  |             with open(common_emoji_filename, 'r', | 
					
						
							|  |  |  |  |                       encoding='utf-8') as fp_emoji: | 
					
						
							| 
									
										
										
										
											2022-04-18 21:02:03 +00:00
										 |  |  |  |                 common_emoji = fp_emoji.readlines() | 
					
						
							|  |  |  |  |         except OSError: | 
					
						
							|  |  |  |  |             print('EX: unable to load common emoji file') | 
					
						
							|  |  |  |  |     if common_emoji: | 
					
						
							| 
									
										
										
										
											2024-12-23 15:39:55 +00:00
										 |  |  |  |         new_common_emoji: list[str] = [] | 
					
						
							| 
									
										
										
										
											2022-04-18 21:02:03 +00:00
										 |  |  |  |         emoji_found = False | 
					
						
							|  |  |  |  |         for line in common_emoji: | 
					
						
							|  |  |  |  |             if ' ' + emoji_content in line: | 
					
						
							|  |  |  |  |                 if not emoji_found: | 
					
						
							|  |  |  |  |                     emoji_found = True | 
					
						
							|  |  |  |  |                     counter = 1 | 
					
						
							|  |  |  |  |                     count_str = line.split(' ')[0] | 
					
						
							|  |  |  |  |                     if count_str.isdigit(): | 
					
						
							|  |  |  |  |                         counter = int(count_str) + 1 | 
					
						
							|  |  |  |  |                     count_str = str(counter).zfill(16) | 
					
						
							|  |  |  |  |                     line = count_str + ' ' + emoji_content | 
					
						
							|  |  |  |  |                     new_common_emoji.append(line) | 
					
						
							|  |  |  |  |             else: | 
					
						
							| 
									
										
										
										
											2022-06-21 11:58:50 +00:00
										 |  |  |  |                 line1 = remove_eol(line) | 
					
						
							|  |  |  |  |                 new_common_emoji.append(line1) | 
					
						
							| 
									
										
										
										
											2022-04-18 21:02:03 +00:00
										 |  |  |  |         if not emoji_found: | 
					
						
							|  |  |  |  |             new_common_emoji.append(str(1).zfill(16) + ' ' + emoji_content) | 
					
						
							|  |  |  |  |         new_common_emoji.sort(reverse=True) | 
					
						
							|  |  |  |  |         try: | 
					
						
							| 
									
										
										
										
											2022-06-09 14:46:30 +00:00
										 |  |  |  |             with open(common_emoji_filename, 'w+', | 
					
						
							|  |  |  |  |                       encoding='utf-8') as fp_emoji: | 
					
						
							| 
									
										
										
										
											2022-04-18 21:02:03 +00:00
										 |  |  |  |                 for line in new_common_emoji: | 
					
						
							|  |  |  |  |                     fp_emoji.write(line + '\n') | 
					
						
							|  |  |  |  |         except OSError: | 
					
						
							|  |  |  |  |             print('EX: error writing common emoji 1') | 
					
						
							|  |  |  |  |             return | 
					
						
							|  |  |  |  |     else: | 
					
						
							|  |  |  |  |         line = str(1).zfill(16) + ' ' + emoji_content + '\n' | 
					
						
							|  |  |  |  |         try: | 
					
						
							| 
									
										
										
										
											2022-06-09 14:46:30 +00:00
										 |  |  |  |             with open(common_emoji_filename, 'w+', | 
					
						
							|  |  |  |  |                       encoding='utf-8') as fp_emoji: | 
					
						
							| 
									
										
										
										
											2022-04-18 21:02:03 +00:00
										 |  |  |  |                 fp_emoji.write(line) | 
					
						
							|  |  |  |  |         except OSError: | 
					
						
							|  |  |  |  |             print('EX: error writing common emoji 2') | 
					
						
							|  |  |  |  |             return | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |  | def replace_emoji_from_tags(session, base_dir: str, | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |                             content: str, tag: [], message_type: str, | 
					
						
							| 
									
										
										
										
											2022-04-21 13:03:40 +00:00
										 |  |  |  |                             debug: bool, screen_readable: bool) -> str: | 
					
						
							| 
									
										
										
										
											2019-09-29 16:28:02 +00:00
										 |  |  |  |     """Uses the tags to replace :emoji: with html image markup
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     for tag_item in tag: | 
					
						
							| 
									
										
										
										
											2023-09-19 12:34:30 +00:00
										 |  |  |  |         if not isinstance(tag_item, dict): | 
					
						
							|  |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         if not tag_item.get('type'): | 
					
						
							| 
									
										
										
										
											2019-09-29 17:20:10 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         if tag_item['type'] != 'Emoji': | 
					
						
							| 
									
										
										
										
											2019-09-29 17:20:10 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         if not tag_item.get('name'): | 
					
						
							| 
									
										
										
										
											2019-09-29 16:28:02 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         if not tag_item.get('icon'): | 
					
						
							| 
									
										
										
										
											2019-09-29 16:28:02 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         if not tag_item['icon'].get('url'): | 
					
						
							| 
									
										
										
										
											2019-09-29 16:28:02 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2023-12-09 14:18:24 +00:00
										 |  |  |  |         url_str = get_url_from_post(tag_item['icon']['url']) | 
					
						
							|  |  |  |  |         if '/' not in url_str: | 
					
						
							| 
									
										
										
										
											2020-02-21 15:09:31 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         if tag_item['name'] not in content: | 
					
						
							| 
									
										
										
										
											2019-09-29 16:28:02 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2023-12-09 14:18:24 +00:00
										 |  |  |  |         tag_url = remove_html(url_str) | 
					
						
							|  |  |  |  |         if not tag_url: | 
					
						
							|  |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2023-07-12 11:08:02 +00:00
										 |  |  |  |         icon_name = tag_url.split('/')[-1] | 
					
						
							| 
									
										
										
										
											2024-11-06 14:03:30 +00:00
										 |  |  |  |         if len(icon_name) <= 1: | 
					
						
							|  |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2025-01-17 19:28:38 +00:00
										 |  |  |  |         if '.' not in icon_name: | 
					
						
							| 
									
										
										
										
											2024-11-06 14:03:30 +00:00
										 |  |  |  |             continue | 
					
						
							|  |  |  |  |         icon_name = icon_name.split('.')[0] | 
					
						
							|  |  |  |  |         # see https://unicode.org/ | 
					
						
							|  |  |  |  |         # emoji/charts/full-emoji-list.html | 
					
						
							|  |  |  |  |         if '-' not in icon_name: | 
					
						
							|  |  |  |  |             # a single code | 
					
						
							|  |  |  |  |             replaced = False | 
					
						
							|  |  |  |  |             try: | 
					
						
							|  |  |  |  |                 replace_char = chr(int("0x" + icon_name, 16)) | 
					
						
							|  |  |  |  |                 if not screen_readable: | 
					
						
							|  |  |  |  |                     replace_char = \ | 
					
						
							|  |  |  |  |                         '<span aria-hidden="true">' + \ | 
					
						
							|  |  |  |  |                         replace_char + '</span>' | 
					
						
							|  |  |  |  |                 content = \ | 
					
						
							|  |  |  |  |                     content.replace(tag_item['name'], | 
					
						
							|  |  |  |  |                                     replace_char) | 
					
						
							|  |  |  |  |                 replaced = True | 
					
						
							|  |  |  |  |             except BaseException: | 
					
						
							|  |  |  |  |                 if debug: | 
					
						
							|  |  |  |  |                     print('EX: replace_emoji_from_tags 1 ' + | 
					
						
							|  |  |  |  |                           'no conversion of ' + | 
					
						
							|  |  |  |  |                           str(icon_name) + ' to chr ' + | 
					
						
							|  |  |  |  |                           tag_item['name'] + ' ' + | 
					
						
							|  |  |  |  |                           tag_url) | 
					
						
							|  |  |  |  |             if not replaced: | 
					
						
							|  |  |  |  |                 _save_custom_emoji(session, base_dir, | 
					
						
							|  |  |  |  |                                    tag_item['name'], | 
					
						
							|  |  |  |  |                                    tag_url, debug) | 
					
						
							|  |  |  |  |                 _update_common_emoji(base_dir, icon_name) | 
					
						
							|  |  |  |  |             else: | 
					
						
							|  |  |  |  |                 _update_common_emoji(base_dir, | 
					
						
							|  |  |  |  |                                      "0x" + icon_name) | 
					
						
							|  |  |  |  |         else: | 
					
						
							|  |  |  |  |             # sequence of codes | 
					
						
							|  |  |  |  |             icon_codes = icon_name.split('-') | 
					
						
							|  |  |  |  |             icon_code_sequence = '' | 
					
						
							|  |  |  |  |             for icode in icon_codes: | 
					
						
							|  |  |  |  |                 replaced = False | 
					
						
							|  |  |  |  |                 try: | 
					
						
							|  |  |  |  |                     icon_code_sequence += chr(int("0x" + | 
					
						
							|  |  |  |  |                                                   icode, 16)) | 
					
						
							|  |  |  |  |                     replaced = True | 
					
						
							|  |  |  |  |                 except BaseException: | 
					
						
							| 
									
										
										
										
											2024-08-30 12:25:23 +00:00
										 |  |  |  |                     icon_code_sequence = '' | 
					
						
							| 
									
										
										
										
											2024-11-06 14:03:30 +00:00
										 |  |  |  |                     if debug: | 
					
						
							|  |  |  |  |                         print('EX: ' + | 
					
						
							|  |  |  |  |                               'replace_emoji_from_tags 2 ' + | 
					
						
							|  |  |  |  |                               'no conversion of ' + | 
					
						
							|  |  |  |  |                               str(icode) + ' to chr ' + | 
					
						
							|  |  |  |  |                               tag_item['name'] + ' ' + | 
					
						
							|  |  |  |  |                               tag_url) | 
					
						
							|  |  |  |  |                 if not replaced: | 
					
						
							|  |  |  |  |                     _save_custom_emoji(session, base_dir, | 
					
						
							|  |  |  |  |                                        tag_item['name'], | 
					
						
							|  |  |  |  |                                        tag_url, debug) | 
					
						
							|  |  |  |  |                     _update_common_emoji(base_dir, | 
					
						
							|  |  |  |  |                                          icon_name) | 
					
						
							|  |  |  |  |                 else: | 
					
						
							|  |  |  |  |                     _update_common_emoji(base_dir, | 
					
						
							|  |  |  |  |                                          "0x" + icon_name) | 
					
						
							|  |  |  |  |             if icon_code_sequence: | 
					
						
							|  |  |  |  |                 if not screen_readable: | 
					
						
							|  |  |  |  |                     icon_code_sequence = \ | 
					
						
							|  |  |  |  |                         '<span aria-hidden="true">' + \ | 
					
						
							|  |  |  |  |                         icon_code_sequence + '</span>' | 
					
						
							|  |  |  |  |                 content = content.replace(tag_item['name'], | 
					
						
							|  |  |  |  |                                           icon_code_sequence) | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |         html_class = 'emoji' | 
					
						
							|  |  |  |  |         if message_type == 'post header': | 
					
						
							|  |  |  |  |             html_class = 'emojiheader' | 
					
						
							|  |  |  |  |         if message_type == 'profile': | 
					
						
							|  |  |  |  |             html_class = 'emojiprofile' | 
					
						
							| 
									
										
										
										
											2022-04-21 13:03:40 +00:00
										 |  |  |  |         if screen_readable: | 
					
						
							|  |  |  |  |             emoji_tag_name = tag_item['name'].replace(':', '') | 
					
						
							|  |  |  |  |         else: | 
					
						
							|  |  |  |  |             emoji_tag_name = '' | 
					
						
							| 
									
										
										
										
											2023-12-09 14:18:24 +00:00
										 |  |  |  |         url_str = get_url_from_post(tag_item['icon']['url']) | 
					
						
							|  |  |  |  |         tag_url = remove_html(url_str) | 
					
						
							| 
									
										
										
										
											2023-07-12 11:08:02 +00:00
										 |  |  |  |         emoji_html = "<img src=\"" + tag_url + "\" alt=\"" + \ | 
					
						
							| 
									
										
										
										
											2022-04-21 13:03:40 +00:00
										 |  |  |  |             emoji_tag_name + \ | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |             "\" align=\"middle\" class=\"" + html_class + "\"/>" | 
					
						
							|  |  |  |  |         content = content.replace(tag_item['name'], emoji_html) | 
					
						
							| 
									
										
										
										
											2019-09-29 16:28:02 +00:00
										 |  |  |  |     return content | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-02-21 15:09:31 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |  | def _add_music_tag(content: str, tag: str) -> str: | 
					
						
							| 
									
										
										
										
											2020-03-29 09:59:54 +00:00
										 |  |  |  |     """If a music link is found then ensure that the post is
 | 
					
						
							|  |  |  |  |     tagged appropriately | 
					
						
							| 
									
										
										
										
											2019-09-05 09:54:27 +00:00
										 |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2020-10-11 09:50:17 +00:00
										 |  |  |  |     if '#podcast' in content or '#documentary' in content: | 
					
						
							|  |  |  |  |         return content | 
					
						
							| 
									
										
										
										
											2019-09-05 09:54:27 +00:00
										 |  |  |  |     if '#' not in tag: | 
					
						
							| 
									
										
										
										
											2020-10-11 09:50:17 +00:00
										 |  |  |  |         tag = '#' + tag | 
					
						
							| 
									
										
										
										
											2019-09-05 09:54:27 +00:00
										 |  |  |  |     if tag in content: | 
					
						
							|  |  |  |  |         return content | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     music_site_found = False | 
					
						
							|  |  |  |  |     for site in MUSIC_SITES: | 
					
						
							| 
									
										
										
										
											2021-06-22 12:42:52 +00:00
										 |  |  |  |         if site + '/' in content: | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |             music_site_found = True | 
					
						
							| 
									
										
										
										
											2019-09-05 09:54:27 +00:00
										 |  |  |  |             break | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     if not music_site_found: | 
					
						
							| 
									
										
										
										
											2019-09-05 09:54:27 +00:00
										 |  |  |  |         return content | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |  |     return ':music: ' + content + ' ' + tag + ' ' | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-09-05 09:54:27 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-05-04 11:34:33 +00:00
										 |  |  |  | def _shorten_linked_urls(content: str) -> str: | 
					
						
							|  |  |  |  |     """If content comes with a web link included then make sure
 | 
					
						
							|  |  |  |  |     that it is short enough | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     if 'href=' not in content: | 
					
						
							|  |  |  |  |         return content | 
					
						
							|  |  |  |  |     if '>' not in content: | 
					
						
							|  |  |  |  |         return content | 
					
						
							|  |  |  |  |     if '<' not in content: | 
					
						
							|  |  |  |  |         return content | 
					
						
							|  |  |  |  |     sections = content.split('>') | 
					
						
							|  |  |  |  |     ctr = 0 | 
					
						
							|  |  |  |  |     for section_text in sections: | 
					
						
							|  |  |  |  |         if ctr == 0: | 
					
						
							|  |  |  |  |             ctr += 1 | 
					
						
							|  |  |  |  |             continue | 
					
						
							|  |  |  |  |         if '<' not in section_text: | 
					
						
							|  |  |  |  |             ctr += 1 | 
					
						
							|  |  |  |  |             continue | 
					
						
							|  |  |  |  |         section_text = section_text.split('<')[0] | 
					
						
							|  |  |  |  |         if ' ' in section_text: | 
					
						
							|  |  |  |  |             continue | 
					
						
							|  |  |  |  |         if len(section_text) > MAX_LINK_LENGTH: | 
					
						
							|  |  |  |  |             content = content.replace('>' + section_text + '<', | 
					
						
							|  |  |  |  |                                       '>' + | 
					
						
							|  |  |  |  |                                       section_text[:MAX_LINK_LENGTH-1] + '<') | 
					
						
							|  |  |  |  |         ctr += 1 | 
					
						
							|  |  |  |  |     return content | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-04 13:33:05 +00:00
										 |  |  |  | def _contains_doi_reference(wrd: str, replace_dict: {}) -> bool: | 
					
						
							|  |  |  |  |     """Handle DOI scientific references
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     if not wrd.startswith('doi:') and \ | 
					
						
							|  |  |  |  |        not wrd.startswith('DOI:'): | 
					
						
							|  |  |  |  |         return False | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     doi_ref_str = wrd.split(':', 1)[1] | 
					
						
							|  |  |  |  |     doi_site = 'https://sci-hub.ru' | 
					
						
							|  |  |  |  |     markup = '<a href="' + doi_site + '/' + \ | 
					
						
							|  |  |  |  |         doi_ref_str + '" tabindex="10" ' + \ | 
					
						
							|  |  |  |  |         'rel="nofollow noopener noreferrer" ' + \ | 
					
						
							|  |  |  |  |         'target="_blank">' + \ | 
					
						
							|  |  |  |  |         '<span class="ellipsis">doi:' + doi_ref_str + \ | 
					
						
							|  |  |  |  |         '</span></a>' | 
					
						
							|  |  |  |  |     replace_dict[wrd] = markup | 
					
						
							|  |  |  |  |     return True | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-04 12:45:36 +00:00
										 |  |  |  | def _contains_arxiv_reference(wrd: str, replace_dict: {}) -> bool: | 
					
						
							|  |  |  |  |     """Handle arxiv scientific references
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     if not wrd.startswith('arXiv:') and \ | 
					
						
							|  |  |  |  |        not wrd.startswith('arx:') and \ | 
					
						
							|  |  |  |  |        not wrd.startswith('arxiv:'): | 
					
						
							|  |  |  |  |         return False | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     arxiv_ref_str = wrd.split(':', 1)[1].lower() | 
					
						
							|  |  |  |  |     if '.' in arxiv_ref_str: | 
					
						
							|  |  |  |  |         arxiv_ref = arxiv_ref_str.split('.') | 
					
						
							|  |  |  |  |     elif ':' in arxiv_ref_str: | 
					
						
							|  |  |  |  |         arxiv_ref = arxiv_ref_str.split(':') | 
					
						
							|  |  |  |  |     else: | 
					
						
							|  |  |  |  |         return False | 
					
						
							|  |  |  |  |     if len(arxiv_ref) != 2: | 
					
						
							|  |  |  |  |         return False | 
					
						
							|  |  |  |  |     if not arxiv_ref[0].isdigit(): | 
					
						
							|  |  |  |  |         return False | 
					
						
							|  |  |  |  |     arxiv_day = arxiv_ref[1] | 
					
						
							|  |  |  |  |     if 'v' in arxiv_day: | 
					
						
							|  |  |  |  |         arxiv_day = arxiv_day.split('v')[0] | 
					
						
							|  |  |  |  |     if not arxiv_day.isdigit(): | 
					
						
							|  |  |  |  |         return False | 
					
						
							|  |  |  |  |     ref_str = arxiv_ref[0] + '.' + arxiv_ref[1] | 
					
						
							|  |  |  |  |     markup = '<a href="https://arxiv.org/abs/' + \ | 
					
						
							|  |  |  |  |         ref_str + '" tabindex="10" ' + \ | 
					
						
							|  |  |  |  |         'rel="nofollow noopener noreferrer" ' + \ | 
					
						
							|  |  |  |  |         'target="_blank">' + \ | 
					
						
							|  |  |  |  |         '<span class="ellipsis">arXiv:' + ref_str + \ | 
					
						
							|  |  |  |  |         '</span></a>' | 
					
						
							|  |  |  |  |     replace_dict[wrd] = markup | 
					
						
							|  |  |  |  |     return True | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-04 13:33:05 +00:00
										 |  |  |  | def _contains_academic_references(content: str) -> bool: | 
					
						
							|  |  |  |  |     """Does the given content contain academic references
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     prefixes = ( | 
					
						
							|  |  |  |  |         'arXiv:', 'arx:', 'arxiv:', | 
					
						
							|  |  |  |  |         'doi:', 'DOI:' | 
					
						
							|  |  |  |  |     ) | 
					
						
							|  |  |  |  |     for reference in prefixes: | 
					
						
							|  |  |  |  |         if reference in content: | 
					
						
							|  |  |  |  |             return True | 
					
						
							|  |  |  |  |     return False | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-04-24 11:10:50 +00:00
										 |  |  |  | def remove_link_trackers_from_content(content: str) -> str: | 
					
						
							|  |  |  |  |     """ Removes any link trackers from urls within the content
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     if '?utm_' not in content: | 
					
						
							|  |  |  |  |         return content | 
					
						
							|  |  |  |  |     sections = content.split('?utm_') | 
					
						
							|  |  |  |  |     ctr = 0 | 
					
						
							|  |  |  |  |     new_content = '' | 
					
						
							|  |  |  |  |     for section_str in sections: | 
					
						
							|  |  |  |  |         if ctr == 0: | 
					
						
							|  |  |  |  |             new_content = section_str | 
					
						
							|  |  |  |  |             ctr = 1 | 
					
						
							|  |  |  |  |             continue | 
					
						
							|  |  |  |  |         if '"' in section_str: | 
					
						
							|  |  |  |  |             new_content += '"' + section_str.split('"', 1)[1] | 
					
						
							|  |  |  |  |         else: | 
					
						
							|  |  |  |  |             new_content += section_str | 
					
						
							|  |  |  |  |         ctr += 1 | 
					
						
							|  |  |  |  |     return new_content | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |  | def add_web_links(content: str) -> str: | 
					
						
							| 
									
										
										
										
											2019-08-21 12:07:30 +00:00
										 |  |  |  |     """Adds markup for web links
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2022-05-04 11:34:33 +00:00
										 |  |  |  |     content = _shorten_linked_urls(content) | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-06-11 09:43:48 +00:00
										 |  |  |  |     if ':' not in content: | 
					
						
							|  |  |  |  |         return content | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-27 17:32:34 +00:00
										 |  |  |  |     prefixes = get_link_prefixes() | 
					
						
							| 
									
										
										
										
											2020-06-11 11:56:08 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     # do any of these prefixes exist within the content? | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     prefix_found = False | 
					
						
							| 
									
										
										
										
											2020-06-11 11:56:08 +00:00
										 |  |  |  |     for prefix in prefixes: | 
					
						
							|  |  |  |  |         if prefix in content: | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |             prefix_found = True | 
					
						
							| 
									
										
										
										
											2020-06-11 11:56:08 +00:00
										 |  |  |  |             break | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     # if there are no prefixes then just keep the content we have | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     if not prefix_found: | 
					
						
							| 
									
										
										
										
											2023-01-04 13:33:05 +00:00
										 |  |  |  |         if _contains_academic_references(content): | 
					
						
							| 
									
										
										
										
											2023-01-04 11:53:15 +00:00
										 |  |  |  |             prefix_found = True | 
					
						
							|  |  |  |  |         else: | 
					
						
							|  |  |  |  |             return content | 
					
						
							| 
									
										
										
										
											2019-08-21 12:07:30 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-05-22 11:32:38 +00:00
										 |  |  |  |     content = content.replace('\r', '') | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |  |     words = content.replace('\n', ' --linebreak-- ').split(' ') | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     replace_dict = {} | 
					
						
							|  |  |  |  |     for wrd in words: | 
					
						
							|  |  |  |  |         if ':' not in wrd: | 
					
						
							| 
									
										
										
										
											2020-06-11 09:43:48 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2023-01-04 12:45:36 +00:00
										 |  |  |  |         if _contains_arxiv_reference(wrd, replace_dict): | 
					
						
							| 
									
										
										
										
											2023-01-04 11:53:15 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2023-01-04 13:33:05 +00:00
										 |  |  |  |         if _contains_doi_reference(wrd, replace_dict): | 
					
						
							|  |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2024-04-24 10:38:45 +00:00
										 |  |  |  |         # does the word begin with a link prefix? | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         prefix_found = False | 
					
						
							| 
									
										
										
										
											2020-06-11 11:56:08 +00:00
										 |  |  |  |         for prefix in prefixes: | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |             if wrd.startswith(prefix): | 
					
						
							|  |  |  |  |                 prefix_found = True | 
					
						
							| 
									
										
										
										
											2020-06-11 11:56:08 +00:00
										 |  |  |  |                 break | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         if not prefix_found: | 
					
						
							| 
									
										
										
										
											2020-06-11 11:56:08 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2024-04-24 10:38:45 +00:00
										 |  |  |  |         # the word contains a link prefix | 
					
						
							|  |  |  |  |         url = wrd | 
					
						
							|  |  |  |  |         if url.endswith('.') or wrd.endswith(';'): | 
					
						
							|  |  |  |  |             url = url[:-1] | 
					
						
							|  |  |  |  |         url = remove_link_tracking(url) | 
					
						
							|  |  |  |  |         markup = '<a href="' + url + '" tabindex="10" ' + \ | 
					
						
							| 
									
										
										
										
											2022-05-25 12:57:31 +00:00
										 |  |  |  |             'rel="nofollow noopener noreferrer" target="_blank">' | 
					
						
							| 
									
										
										
										
											2020-06-11 11:56:08 +00:00
										 |  |  |  |         for prefix in prefixes: | 
					
						
							| 
									
										
										
										
											2024-04-24 10:38:45 +00:00
										 |  |  |  |             if url.startswith(prefix): | 
					
						
							| 
									
										
										
										
											2020-06-11 11:56:08 +00:00
										 |  |  |  |                 markup += '<span class="invisible">' + prefix + '</span>' | 
					
						
							|  |  |  |  |                 break | 
					
						
							| 
									
										
										
										
											2024-04-24 10:38:45 +00:00
										 |  |  |  |         link_text = url | 
					
						
							| 
									
										
										
										
											2020-06-11 11:56:08 +00:00
										 |  |  |  |         for prefix in prefixes: | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |             link_text = link_text.replace(prefix, '') | 
					
						
							| 
									
										
										
										
											2020-06-11 11:56:08 +00:00
										 |  |  |  |         # prevent links from becoming too long | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         if len(link_text) > MAX_LINK_LENGTH: | 
					
						
							| 
									
										
										
										
											2020-06-11 11:56:08 +00:00
										 |  |  |  |             markup += '<span class="ellipsis">' + \ | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |                 link_text[:MAX_LINK_LENGTH] + '</span>' | 
					
						
							| 
									
										
										
										
											2020-06-11 11:56:08 +00:00
										 |  |  |  |             markup += '<span class="invisible">' + \ | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |                 link_text[MAX_LINK_LENGTH:] + '</span></a>' | 
					
						
							| 
									
										
										
										
											2020-06-11 11:56:08 +00:00
										 |  |  |  |         else: | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |             markup += '<span class="ellipsis">' + link_text + '</span></a>' | 
					
						
							| 
									
										
										
										
											2024-04-24 10:38:45 +00:00
										 |  |  |  |         replace_dict[url] = markup | 
					
						
							| 
									
										
										
										
											2020-06-11 11:56:08 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     # do the replacements | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     for url, markup in replace_dict.items(): | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |  |         content = content.replace(url, markup) | 
					
						
							| 
									
										
										
										
											2020-06-11 11:56:08 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     # replace any line breaks | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |  |     content = content.replace(' --linebreak-- ', '<br>') | 
					
						
							| 
									
										
										
										
											2020-06-11 11:56:08 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-08-21 12:07:30 +00:00
										 |  |  |  |     return content | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-14 10:20:37 +00:00
										 |  |  |  | def safe_web_text(arbitrary_html: str) -> str: | 
					
						
							|  |  |  |  |     """Turns arbitrary html into something safe.
 | 
					
						
							|  |  |  |  |     So if the arbitrary html contains attack scripts those will be removed | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     # first remove the markup, so that we have something safe | 
					
						
							|  |  |  |  |     safe_text = remove_html(arbitrary_html) | 
					
						
							|  |  |  |  |     if not safe_text: | 
					
						
							|  |  |  |  |         return '' | 
					
						
							|  |  |  |  |     # remove any spurious characters found in podcast descriptions | 
					
						
							| 
									
										
										
										
											2022-01-14 19:05:26 +00:00
										 |  |  |  |     remove_chars = ('Œ', 'â€', 'ğŸ', '<EFBFBD>', ']]', '__') | 
					
						
							| 
									
										
										
										
											2022-01-14 10:20:37 +00:00
										 |  |  |  |     for remchar in remove_chars: | 
					
						
							|  |  |  |  |         safe_text = safe_text.replace(remchar, '') | 
					
						
							|  |  |  |  |     # recreate any url links safely | 
					
						
							|  |  |  |  |     return add_web_links(safe_text) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  | def _add_hash_tags(word_str: str, http_prefix: str, domain: str, | 
					
						
							|  |  |  |  |                    replace_hashtags: {}, post_hashtags: {}) -> bool: | 
					
						
							| 
									
										
										
										
											2019-08-09 11:12:08 +00:00
										 |  |  |  |     """Detects hashtags and adds them to the replacements dict
 | 
					
						
							|  |  |  |  |     Also updates the hashtags list to be added to the post | 
					
						
							|  |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     if replace_hashtags.get(word_str): | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |  |         return True | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     hashtag = word_str[1:] | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |  |     if not valid_hash_tag(hashtag): | 
					
						
							| 
									
										
										
										
											2019-08-09 11:12:08 +00:00
										 |  |  |  |         return False | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     hashtag_url = http_prefix + "://" + domain + "/tags/" + hashtag | 
					
						
							|  |  |  |  |     post_hashtags[hashtag] = { | 
					
						
							|  |  |  |  |         'href': hashtag_url, | 
					
						
							| 
									
										
										
										
											2020-10-16 20:13:23 +00:00
										 |  |  |  |         'name': '#' + hashtag, | 
					
						
							| 
									
										
										
										
											2019-08-09 11:12:08 +00:00
										 |  |  |  |         'type': 'Hashtag' | 
					
						
							|  |  |  |  |     } | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     replace_hashtags[word_str] = "<a href=\"" + hashtag_url + \ | 
					
						
							| 
									
										
										
										
											2022-11-11 11:08:48 +00:00
										 |  |  |  |         "\" class=\"mention hashtag\" rel=\"tag\" tabindex=\"10\">" + \ | 
					
						
							| 
									
										
										
										
											2022-11-11 11:40:43 +00:00
										 |  |  |  |         "<span aria-hidden=\"true\">#</span><span>" + \ | 
					
						
							|  |  |  |  |         hashtag + "</span></a>" | 
					
						
							| 
									
										
										
										
											2019-08-09 11:12:08 +00:00
										 |  |  |  |     return True | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-05 22:19:38 +00:00
										 |  |  |  | def replace_remote_hashtags(content: str, | 
					
						
							|  |  |  |  |                             nickname: str, domain: str) -> str: | 
					
						
							|  |  |  |  |     """Replaces remote hashtags with a local version
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     if not domain: | 
					
						
							|  |  |  |  |         return content | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     if ' href="' not in content: | 
					
						
							|  |  |  |  |         return content | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     sections = content.split(' href="') | 
					
						
							|  |  |  |  |     ctr = 0 | 
					
						
							|  |  |  |  |     replacements = {} | 
					
						
							|  |  |  |  |     for section in sections: | 
					
						
							|  |  |  |  |         if ctr == 0: | 
					
						
							|  |  |  |  |             ctr += 1 | 
					
						
							|  |  |  |  |             continue | 
					
						
							|  |  |  |  |         if '"' not in section: | 
					
						
							|  |  |  |  |             ctr += 1 | 
					
						
							|  |  |  |  |             continue | 
					
						
							|  |  |  |  |         link = section.split('"')[0] | 
					
						
							| 
									
										
										
										
											2023-01-05 22:22:42 +00:00
										 |  |  |  |         if '://' not in link: | 
					
						
							|  |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2023-01-05 22:19:38 +00:00
										 |  |  |  |         if '?remotetag=' in link: | 
					
						
							|  |  |  |  |             ctr += 1 | 
					
						
							|  |  |  |  |             continue | 
					
						
							|  |  |  |  |         if '/tags/' not in link: | 
					
						
							|  |  |  |  |             ctr += 1 | 
					
						
							|  |  |  |  |             continue | 
					
						
							|  |  |  |  |         if '/' + domain not in link: | 
					
						
							|  |  |  |  |             new_link = '/users/' + nickname + \ | 
					
						
							|  |  |  |  |                 '?remotetag=' + link.replace('/', '--') | 
					
						
							|  |  |  |  |             replacements[link] = new_link | 
					
						
							|  |  |  |  |         ctr += 1 | 
					
						
							|  |  |  |  |     if not replacements: | 
					
						
							|  |  |  |  |         return content | 
					
						
							|  |  |  |  |     for old_link, new_link in replacements.items(): | 
					
						
							|  |  |  |  |         content = content.replace('"' + old_link + '"', | 
					
						
							|  |  |  |  |                                   '"' + new_link + '"') | 
					
						
							|  |  |  |  |     return content | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  | def _add_emoji(base_dir: str, word_str: str, | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |  |                http_prefix: str, domain: str, | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |                replace_emoji: {}, post_tags: {}, | 
					
						
							|  |  |  |  |                emoji_dict: {}) -> bool: | 
					
						
							| 
									
										
										
										
											2019-08-09 16:18:00 +00:00
										 |  |  |  |     """Detects Emoji and adds them to the replacements dict
 | 
					
						
							|  |  |  |  |     Also updates the tags list to be added to the post | 
					
						
							|  |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     if not word_str.startswith(':'): | 
					
						
							| 
									
										
										
										
											2019-08-09 16:18:00 +00:00
										 |  |  |  |         return False | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     if not word_str.endswith(':'): | 
					
						
							| 
									
										
										
										
											2019-08-09 16:18:00 +00:00
										 |  |  |  |         return False | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     if len(word_str) < 3: | 
					
						
							| 
									
										
										
										
											2019-08-09 16:18:00 +00:00
										 |  |  |  |         return False | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     if replace_emoji.get(word_str): | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |  |         return True | 
					
						
							| 
									
										
										
										
											2019-09-23 11:11:13 +00:00
										 |  |  |  |     # remove leading and trailing : characters | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     emoji = word_str[1:] | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |  |     emoji = emoji[:-1] | 
					
						
							| 
									
										
										
										
											2019-09-23 11:11:13 +00:00
										 |  |  |  |     # is the text of the emoji valid? | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |  |     if not valid_hash_tag(emoji): | 
					
						
							| 
									
										
										
										
											2019-08-09 16:18:00 +00:00
										 |  |  |  |         return False | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     if not emoji_dict.get(emoji): | 
					
						
							| 
									
										
										
										
											2019-08-09 16:18:00 +00:00
										 |  |  |  |         return False | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     emoji_filename = base_dir + '/emoji/' + emoji_dict[emoji] + '.png' | 
					
						
							|  |  |  |  |     if not os.path.isfile(emoji_filename): | 
					
						
							| 
									
										
										
										
											2022-03-29 21:10:09 +00:00
										 |  |  |  |         emoji_filename = \ | 
					
						
							|  |  |  |  |             base_dir + '/emojicustom/' + emoji_dict[emoji] + '.png' | 
					
						
							|  |  |  |  |         if not os.path.isfile(emoji_filename): | 
					
						
							|  |  |  |  |             return False | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     emoji_url = http_prefix + "://" + domain + \ | 
					
						
							|  |  |  |  |         "/emoji/" + emoji_dict[emoji] + '.png' | 
					
						
							|  |  |  |  |     post_tags[emoji] = { | 
					
						
							| 
									
										
										
										
											2019-08-19 13:35:55 +00:00
										 |  |  |  |         'icon': { | 
					
						
							|  |  |  |  |             'mediaType': 'image/png', | 
					
						
							|  |  |  |  |             'type': 'Image', | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |             'url': emoji_url | 
					
						
							| 
									
										
										
										
											2019-08-19 13:35:55 +00:00
										 |  |  |  |         }, | 
					
						
							| 
									
										
										
										
											2021-06-22 12:42:52 +00:00
										 |  |  |  |         'name': ':' + emoji + ':', | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         "updated": file_last_modified(emoji_filename), | 
					
						
							|  |  |  |  |         "id": emoji_url.replace('.png', ''), | 
					
						
							| 
									
										
										
										
											2019-08-09 16:18:00 +00:00
										 |  |  |  |         'type': 'Emoji' | 
					
						
							|  |  |  |  |     } | 
					
						
							|  |  |  |  |     return True | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-02-26 19:22:25 +00:00
										 |  |  |  | def post_tag_exists(tag_type: str, tag_name: str, tags: {}) -> bool: | 
					
						
							| 
									
										
										
										
											2020-12-13 20:07:45 +00:00
										 |  |  |  |     """Returns true if a tag exists in the given dict
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     for tag in tags: | 
					
						
							| 
									
										
										
										
											2024-02-26 19:22:25 +00:00
										 |  |  |  |         if tag['name'] == tag_name and tag['type'] == tag_type: | 
					
						
							| 
									
										
										
										
											2020-12-13 20:07:45 +00:00
										 |  |  |  |             return True | 
					
						
							|  |  |  |  |     return False | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-09-03 17:09:00 +00:00
										 |  |  |  | def _mention_to_url(base_dir: str, http_prefix: str, | 
					
						
							|  |  |  |  |                     domain: str, nickname: str) -> str: | 
					
						
							|  |  |  |  |     """Convert https://somedomain/@somenick to
 | 
					
						
							|  |  |  |  |     https://somedomain/users/somenick | 
					
						
							|  |  |  |  |     This uses the hack of trying the cache directory to see if | 
					
						
							|  |  |  |  |     there is a matching actor | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     possible_paths = get_user_paths() | 
					
						
							|  |  |  |  |     cache_dir = base_dir + '/cache/actors' | 
					
						
							|  |  |  |  |     cache_path_start = cache_dir + '/' + http_prefix + ':##' + domain | 
					
						
							|  |  |  |  |     for users_path in possible_paths: | 
					
						
							|  |  |  |  |         users_path = users_path.replace('/', '#') | 
					
						
							|  |  |  |  |         possible_cache_entry = \ | 
					
						
							|  |  |  |  |             cache_path_start + users_path + nickname + '.json' | 
					
						
							|  |  |  |  |         if os.path.isfile(possible_cache_entry): | 
					
						
							|  |  |  |  |             return http_prefix + '://' + \ | 
					
						
							|  |  |  |  |                 domain + users_path.replace('#', '/') + nickname | 
					
						
							| 
									
										
										
										
											2023-05-10 11:53:38 +00:00
										 |  |  |  |     possible_cache_entry = \ | 
					
						
							|  |  |  |  |         cache_path_start + '#' + nickname + '.json' | 
					
						
							|  |  |  |  |     if os.path.isfile(possible_cache_entry): | 
					
						
							|  |  |  |  |         return http_prefix + '://' + domain + '/' + nickname | 
					
						
							| 
									
										
										
										
											2022-09-03 17:09:00 +00:00
										 |  |  |  |     return http_prefix + '://' + domain + '/users/' + nickname | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | def _add_mention(base_dir: str, word_str: str, http_prefix: str, | 
					
						
							|  |  |  |  |                  following: [], petnames: [], replace_mentions: {}, | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |                  recipients: [], tags: {}) -> bool: | 
					
						
							| 
									
										
										
										
											2020-03-29 09:59:54 +00:00
										 |  |  |  |     """Detects mentions and adds them to the replacements dict and
 | 
					
						
							|  |  |  |  |     recipients list | 
					
						
							| 
									
										
										
										
											2019-08-09 09:09:21 +00:00
										 |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     possible_handle = word_str[1:] | 
					
						
							| 
									
										
										
										
											2019-08-19 10:05:50 +00:00
										 |  |  |  |     # @nick | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     if following and '@' not in possible_handle: | 
					
						
							| 
									
										
										
										
											2019-08-09 09:48:51 +00:00
										 |  |  |  |         # fall back to a best effort match against the following list | 
					
						
							|  |  |  |  |         # if no domain was specified. eg. @nick | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         possible_nickname = possible_handle | 
					
						
							| 
									
										
										
										
											2019-08-09 09:48:51 +00:00
										 |  |  |  |         for follow in following: | 
					
						
							| 
									
										
										
										
											2021-01-29 21:33:23 +00:00
										 |  |  |  |             if '@' not in follow: | 
					
						
							|  |  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |             follow_nick = follow.split('@')[0] | 
					
						
							| 
									
										
										
										
											2024-08-30 12:25:23 +00:00
										 |  |  |  |             if possible_nickname != follow_nick: | 
					
						
							|  |  |  |  |                 continue | 
					
						
							|  |  |  |  |             follow_str = remove_eol(follow) | 
					
						
							|  |  |  |  |             replace_domain = follow_str.split('@')[1] | 
					
						
							|  |  |  |  |             recipient_actor = \ | 
					
						
							|  |  |  |  |                 _mention_to_url(base_dir, http_prefix, | 
					
						
							|  |  |  |  |                                 replace_domain, possible_nickname) | 
					
						
							|  |  |  |  |             if recipient_actor not in recipients: | 
					
						
							|  |  |  |  |                 recipients.append(recipient_actor) | 
					
						
							|  |  |  |  |             tags[word_str] = { | 
					
						
							|  |  |  |  |                 'href': recipient_actor, | 
					
						
							|  |  |  |  |                 'name': word_str, | 
					
						
							|  |  |  |  |                 'type': 'Mention' | 
					
						
							|  |  |  |  |             } | 
					
						
							|  |  |  |  |             replace_mentions[word_str] = \ | 
					
						
							|  |  |  |  |                 "<span class=\"h-card\"><a href=\"" + recipient_actor + \ | 
					
						
							|  |  |  |  |                 "\" tabindex=\"10\" class=\"u-url mention\">@<span>" + \ | 
					
						
							|  |  |  |  |                 possible_nickname + "</span></a></span>" | 
					
						
							|  |  |  |  |             return True | 
					
						
							|  |  |  |  |         # try replacing petnames with mentions | 
					
						
							|  |  |  |  |         follow_ctr = 0 | 
					
						
							|  |  |  |  |         for follow in following: | 
					
						
							|  |  |  |  |             if '@' not in follow: | 
					
						
							|  |  |  |  |                 follow_ctr += 1 | 
					
						
							|  |  |  |  |                 continue | 
					
						
							|  |  |  |  |             pet = remove_eol(petnames[follow_ctr]) | 
					
						
							|  |  |  |  |             if pet: | 
					
						
							|  |  |  |  |                 if possible_nickname != pet: | 
					
						
							|  |  |  |  |                     follow_ctr += 1 | 
					
						
							|  |  |  |  |                     continue | 
					
						
							| 
									
										
										
										
											2022-06-21 11:58:50 +00:00
										 |  |  |  |                 follow_str = remove_eol(follow) | 
					
						
							| 
									
										
										
										
											2024-08-30 12:25:23 +00:00
										 |  |  |  |                 replace_nickname = follow_str.split('@')[0] | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |                 replace_domain = follow_str.split('@')[1] | 
					
						
							| 
									
										
										
										
											2022-09-03 17:09:00 +00:00
										 |  |  |  |                 recipient_actor = \ | 
					
						
							|  |  |  |  |                     _mention_to_url(base_dir, http_prefix, | 
					
						
							| 
									
										
										
										
											2024-08-30 12:25:23 +00:00
										 |  |  |  |                                     replace_domain, replace_nickname) | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |                 if recipient_actor not in recipients: | 
					
						
							|  |  |  |  |                     recipients.append(recipient_actor) | 
					
						
							|  |  |  |  |                 tags[word_str] = { | 
					
						
							|  |  |  |  |                     'href': recipient_actor, | 
					
						
							|  |  |  |  |                     'name': word_str, | 
					
						
							| 
									
										
										
										
											2019-08-19 12:13:18 +00:00
										 |  |  |  |                     'type': 'Mention' | 
					
						
							|  |  |  |  |                 } | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |                 replace_mentions[word_str] = \ | 
					
						
							| 
									
										
										
										
											2024-08-30 12:25:23 +00:00
										 |  |  |  |                     "<span class=\"h-card\"><a href=\"" + \ | 
					
						
							|  |  |  |  |                     recipient_actor + "\" tabindex=\"10\" " + \ | 
					
						
							|  |  |  |  |                     "class=\"u-url mention\">@<span>" + \ | 
					
						
							|  |  |  |  |                     replace_nickname + "</span></a></span>" | 
					
						
							| 
									
										
										
										
											2019-08-09 09:48:51 +00:00
										 |  |  |  |                 return True | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |             follow_ctr += 1 | 
					
						
							| 
									
										
										
										
											2019-08-09 09:48:51 +00:00
										 |  |  |  |         return False | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     possible_nickname = None | 
					
						
							|  |  |  |  |     possible_domain = None | 
					
						
							|  |  |  |  |     if '@' not in possible_handle: | 
					
						
							| 
									
										
										
										
											2019-10-29 20:15:21 +00:00
										 |  |  |  |         return False | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     possible_nickname = possible_handle.split('@')[0] | 
					
						
							|  |  |  |  |     if not possible_nickname: | 
					
						
							| 
									
										
										
										
											2019-10-29 20:15:21 +00:00
										 |  |  |  |         return False | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     possible_domain = \ | 
					
						
							|  |  |  |  |         possible_handle.split('@')[1].strip('\n').strip('\r') | 
					
						
							|  |  |  |  |     if not possible_domain: | 
					
						
							| 
									
										
										
										
											2019-10-29 20:15:21 +00:00
										 |  |  |  |         return False | 
					
						
							| 
									
										
										
										
											2019-08-19 11:41:15 +00:00
										 |  |  |  |     if following: | 
					
						
							|  |  |  |  |         for follow in following: | 
					
						
							| 
									
										
										
										
											2022-06-21 11:58:50 +00:00
										 |  |  |  |             if remove_eol(follow) != possible_handle: | 
					
						
							| 
									
										
										
										
											2019-08-19 11:41:15 +00:00
										 |  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2022-09-03 17:09:00 +00:00
										 |  |  |  |             recipient_actor = \ | 
					
						
							|  |  |  |  |                 _mention_to_url(base_dir, http_prefix, | 
					
						
							|  |  |  |  |                                 possible_domain, possible_nickname) | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |             if recipient_actor not in recipients: | 
					
						
							|  |  |  |  |                 recipients.append(recipient_actor) | 
					
						
							|  |  |  |  |             tags[word_str] = { | 
					
						
							|  |  |  |  |                 'href': recipient_actor, | 
					
						
							|  |  |  |  |                 'name': word_str, | 
					
						
							| 
									
										
										
										
											2019-08-19 12:13:18 +00:00
										 |  |  |  |                 'type': 'Mention' | 
					
						
							|  |  |  |  |             } | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |             replace_mentions[word_str] = \ | 
					
						
							| 
									
										
										
										
											2022-09-03 16:22:56 +00:00
										 |  |  |  |                 "<span class=\"h-card\"><a href=\"" + recipient_actor + \ | 
					
						
							| 
									
										
										
										
											2022-05-25 12:57:31 +00:00
										 |  |  |  |                 "\" tabindex=\"10\" class=\"u-url mention\">@<span>" + \ | 
					
						
							|  |  |  |  |                 possible_nickname + "</span></a></span>" | 
					
						
							| 
									
										
										
										
											2019-08-19 11:41:15 +00:00
										 |  |  |  |             return True | 
					
						
							| 
									
										
										
										
											2019-08-19 10:05:50 +00:00
										 |  |  |  |     # @nick@domain | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     if not (possible_domain == 'localhost' or '.' in possible_domain): | 
					
						
							| 
									
										
										
										
											2020-03-22 21:16:02 +00:00
										 |  |  |  |         return False | 
					
						
							| 
									
										
										
										
											2022-09-03 17:09:00 +00:00
										 |  |  |  |     recipient_actor = \ | 
					
						
							|  |  |  |  |         _mention_to_url(base_dir, http_prefix, | 
					
						
							|  |  |  |  |                         possible_domain, possible_nickname) | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     if recipient_actor not in recipients: | 
					
						
							|  |  |  |  |         recipients.append(recipient_actor) | 
					
						
							|  |  |  |  |     tags[word_str] = { | 
					
						
							|  |  |  |  |         'href': recipient_actor, | 
					
						
							|  |  |  |  |         'name': word_str, | 
					
						
							| 
									
										
										
										
											2019-10-29 20:15:21 +00:00
										 |  |  |  |         'type': 'Mention' | 
					
						
							|  |  |  |  |     } | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     replace_mentions[word_str] = \ | 
					
						
							| 
									
										
										
										
											2022-09-03 16:22:56 +00:00
										 |  |  |  |         "<span class=\"h-card\"><a href=\"" + recipient_actor + \ | 
					
						
							| 
									
										
										
										
											2022-05-25 12:57:31 +00:00
										 |  |  |  |         "\" tabindex=\"10\" class=\"u-url mention\">@<span>" + \ | 
					
						
							|  |  |  |  |         possible_nickname + "</span></a></span>" | 
					
						
							| 
									
										
										
										
											2019-10-29 20:15:21 +00:00
										 |  |  |  |     return True | 
					
						
							| 
									
										
										
										
											2019-08-09 09:09:21 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |  | def replace_content_duplicates(content: str) -> str: | 
					
						
							| 
									
										
										
										
											2020-05-12 09:34:58 +00:00
										 |  |  |  |     """Replaces invalid duplicates within content
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-12-26 19:15:36 +00:00
										 |  |  |  |     if is_pgp_encrypted(content) or contains_pgp_public_key(content): | 
					
						
							| 
									
										
										
										
											2021-03-11 17:15:32 +00:00
										 |  |  |  |         return content | 
					
						
							| 
									
										
										
										
											2020-05-12 09:34:58 +00:00
										 |  |  |  |     while '<<' in content: | 
					
						
							|  |  |  |  |         content = content.replace('<<', '<') | 
					
						
							|  |  |  |  |     while '>>' in content: | 
					
						
							|  |  |  |  |         content = content.replace('>>', '>') | 
					
						
							| 
									
										
										
										
											2020-05-12 09:42:24 +00:00
										 |  |  |  |     content = content.replace('<\\p>', '') | 
					
						
							| 
									
										
										
										
											2020-05-12 09:34:58 +00:00
										 |  |  |  |     return content | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-03-24 14:40:28 +00:00
										 |  |  |  | def remove_text_formatting(content: str, bold_reading: bool) -> str: | 
					
						
							| 
									
										
										
										
											2020-06-14 13:25:38 +00:00
										 |  |  |  |     """Removes markup for bold, italics, etc
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-12-26 19:15:36 +00:00
										 |  |  |  |     if is_pgp_encrypted(content) or contains_pgp_public_key(content): | 
					
						
							| 
									
										
										
										
											2021-03-11 17:15:32 +00:00
										 |  |  |  |         return content | 
					
						
							| 
									
										
										
										
											2020-06-14 13:25:38 +00:00
										 |  |  |  |     if '<' not in content: | 
					
						
							|  |  |  |  |         return content | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     for markup in REMOVE_MARKUP: | 
					
						
							| 
									
										
										
										
											2022-03-24 14:40:28 +00:00
										 |  |  |  |         if bold_reading: | 
					
						
							|  |  |  |  |             if markup == 'b': | 
					
						
							|  |  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2020-06-14 13:39:03 +00:00
										 |  |  |  |         content = content.replace('<' + markup + '>', '') | 
					
						
							|  |  |  |  |         content = content.replace('</' + markup + '>', '') | 
					
						
							|  |  |  |  |         content = content.replace('<' + markup.upper() + '>', '') | 
					
						
							|  |  |  |  |         content = content.replace('</' + markup.upper() + '>', '') | 
					
						
							| 
									
										
										
										
											2020-06-14 13:25:38 +00:00
										 |  |  |  |     return content | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  | def remove_long_words(content: str, max_word_length: int, | 
					
						
							|  |  |  |  |                       long_words_list: []) -> str: | 
					
						
							| 
									
										
										
										
											2020-03-29 09:59:54 +00:00
										 |  |  |  |     """Breaks up long words so that on mobile screens this doesn't
 | 
					
						
							|  |  |  |  |     disrupt the layout | 
					
						
							| 
									
										
										
										
											2019-10-09 12:19:17 +00:00
										 |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-12-26 19:15:36 +00:00
										 |  |  |  |     if is_pgp_encrypted(content) or contains_pgp_public_key(content): | 
					
						
							| 
									
										
										
										
											2021-03-11 17:15:32 +00:00
										 |  |  |  |         return content | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |  |     content = replace_content_duplicates(content) | 
					
						
							| 
									
										
										
										
											2024-09-21 20:52:13 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     non_html_list = False | 
					
						
							|  |  |  |  |     if '\n\n' in content and '<p>' not in content: | 
					
						
							|  |  |  |  |         content = '<p>' + content.replace('\n\n', '</p> <p>') + '</p>' | 
					
						
							|  |  |  |  |         non_html_list = True | 
					
						
							|  |  |  |  |     non_html_list2 = False | 
					
						
							|  |  |  |  |     if '\n' in content and '<p>' not in content: | 
					
						
							|  |  |  |  |         content = '<p>' + content.replace('\n', '</p> <p>') + '</p>' | 
					
						
							|  |  |  |  |         non_html_list2 = True | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-02-07 10:48:46 +00:00
										 |  |  |  |     if ' ' not in content and '</p><p>' not in content: | 
					
						
							| 
									
										
										
										
											2019-12-13 12:41:26 +00:00
										 |  |  |  |         # handle a single very long string with no spaces | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         content_str = content.replace('<p>', '').replace(r'<\p>', '') | 
					
						
							|  |  |  |  |         if '://' not in content_str: | 
					
						
							|  |  |  |  |             if len(content_str) > max_word_length: | 
					
						
							| 
									
										
										
										
											2019-12-13 12:41:26 +00:00
										 |  |  |  |                 if '<p>' in content: | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |                     content = '<p>' + content_str[:max_word_length] + r'<\p>' | 
					
						
							| 
									
										
										
										
											2019-12-13 12:41:26 +00:00
										 |  |  |  |                 else: | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |                     content = content[:max_word_length] | 
					
						
							| 
									
										
										
										
											2019-12-13 12:41:26 +00:00
										 |  |  |  |                 return content | 
					
						
							| 
									
										
										
										
											2022-04-08 16:16:16 +00:00
										 |  |  |  |     content = content.replace('<p></p>', '<p> </p>') | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |  |     words = content.split(' ') | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     if not long_words_list: | 
					
						
							| 
									
										
										
										
											2024-12-23 15:39:55 +00:00
										 |  |  |  |         long_words_list: list[str] = [] | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         for word_str in words: | 
					
						
							|  |  |  |  |             if len(word_str) > max_word_length: | 
					
						
							|  |  |  |  |                 if word_str not in long_words_list: | 
					
						
							|  |  |  |  |                     long_words_list.append(word_str) | 
					
						
							|  |  |  |  |     for word_str in long_words_list: | 
					
						
							| 
									
										
										
										
											2024-02-07 10:48:46 +00:00
										 |  |  |  |         original_word_str = word_str | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         if word_str.startswith('<p>'): | 
					
						
							|  |  |  |  |             word_str = word_str.replace('<p>', '') | 
					
						
							|  |  |  |  |         if word_str.startswith('<'): | 
					
						
							| 
									
										
										
										
											2019-10-18 12:24:31 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         if len(word_str) == 76: | 
					
						
							|  |  |  |  |             if word_str.upper() == word_str: | 
					
						
							| 
									
										
										
										
											2020-03-22 14:29:34 +00:00
										 |  |  |  |                 # tox address | 
					
						
							|  |  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         if '=\"' in word_str: | 
					
						
							| 
									
										
										
										
											2019-11-04 21:08:43 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         if '@' in word_str: | 
					
						
							|  |  |  |  |             if '@@' not in word_str: | 
					
						
							| 
									
										
										
										
											2019-11-04 21:11:09 +00:00
										 |  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         if '=.ed25519' in word_str: | 
					
						
							| 
									
										
										
										
											2020-01-25 10:49:59 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         if '.onion' in word_str: | 
					
						
							| 
									
										
										
										
											2020-01-25 10:49:59 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         if '.i2p' in word_str: | 
					
						
							| 
									
										
										
										
											2020-01-25 10:49:59 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         if 'https:' in word_str: | 
					
						
							| 
									
										
										
										
											2019-10-25 18:27:32 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2022-06-09 14:46:30 +00:00
										 |  |  |  |         if 'http:' in word_str: | 
					
						
							| 
									
										
										
										
											2019-11-04 20:39:14 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2022-06-09 14:46:30 +00:00
										 |  |  |  |         if 'i2p:' in word_str: | 
					
						
							| 
									
										
										
										
											2020-02-17 17:18:21 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2022-06-09 14:46:30 +00:00
										 |  |  |  |         if 'gnunet:' in word_str: | 
					
						
							| 
									
										
										
										
											2020-06-09 11:51:51 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2022-06-09 14:46:30 +00:00
										 |  |  |  |         if 'dat:' in word_str: | 
					
						
							| 
									
										
										
										
											2019-11-04 20:39:14 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2022-06-09 14:46:30 +00:00
										 |  |  |  |         if 'rad:' in word_str: | 
					
						
							| 
									
										
										
										
											2020-12-06 10:18:41 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2022-06-09 14:46:30 +00:00
										 |  |  |  |         if 'hyper:' in word_str: | 
					
						
							| 
									
										
										
										
											2020-05-17 09:37:59 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2022-06-09 14:46:30 +00:00
										 |  |  |  |         if 'briar:' in word_str: | 
					
						
							| 
									
										
										
										
											2020-05-17 09:37:59 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         if '<' in word_str: | 
					
						
							|  |  |  |  |             replace_word = word_str.split('<', 1)[0] | 
					
						
							|  |  |  |  |             # if len(replace_word) > max_word_length: | 
					
						
							|  |  |  |  |             #     replace_word = replace_word[:max_word_length] | 
					
						
							|  |  |  |  |             content = content.replace(word_str, replace_word) | 
					
						
							|  |  |  |  |             word_str = replace_word | 
					
						
							|  |  |  |  |         if '/' in word_str: | 
					
						
							| 
									
										
										
										
											2019-10-25 18:27:32 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         if len(word_str[max_word_length:]) < max_word_length: | 
					
						
							| 
									
										
										
										
											2024-02-07 10:48:46 +00:00
										 |  |  |  |             end_of_line_char = '\n' | 
					
						
							|  |  |  |  |             if '<br>' in original_word_str: | 
					
						
							|  |  |  |  |                 end_of_line_char = '' | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |             content = content.replace(word_str, | 
					
						
							| 
									
										
										
										
											2024-02-07 10:48:46 +00:00
										 |  |  |  |                                       word_str[:max_word_length] + | 
					
						
							|  |  |  |  |                                       end_of_line_char + | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |                                       word_str[max_word_length:]) | 
					
						
							| 
									
										
										
										
											2019-10-18 12:24:31 +00:00
										 |  |  |  |         else: | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |             content = content.replace(word_str, | 
					
						
							|  |  |  |  |                                       word_str[:max_word_length]) | 
					
						
							| 
									
										
										
										
											2020-01-24 11:27:12 +00:00
										 |  |  |  |     if content.startswith('<p>'): | 
					
						
							|  |  |  |  |         if not content.endswith('</p>'): | 
					
						
							| 
									
										
										
										
											2020-10-31 23:10:38 +00:00
										 |  |  |  |             content = content.strip() + '</p>' | 
					
						
							| 
									
										
										
										
											2022-04-08 16:16:16 +00:00
										 |  |  |  |     content = content.replace('<p> </p>', '<p></p>') | 
					
						
							| 
									
										
										
										
											2024-09-21 20:52:13 +00:00
										 |  |  |  |     if non_html_list: | 
					
						
							|  |  |  |  |         content = content.replace('</p> <p>', '\n\n') | 
					
						
							|  |  |  |  |         content = content.replace('<p>', '') | 
					
						
							|  |  |  |  |         content = content.replace('</p>', '') | 
					
						
							|  |  |  |  |     if non_html_list2: | 
					
						
							|  |  |  |  |         content = content.replace('</p> <p>', '\n') | 
					
						
							|  |  |  |  |         content = content.replace('<p>', '') | 
					
						
							|  |  |  |  |         content = content.replace('</p>', '') | 
					
						
							|  |  |  |  |     content = content.replace('</p> <p>', '</p><p>') | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-09 12:19:17 +00:00
										 |  |  |  |     return content | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |  | def _load_auto_tags(base_dir: str, nickname: str, domain: str) -> []: | 
					
						
							| 
									
										
										
										
											2020-09-13 14:42:17 +00:00
										 |  |  |  |     """Loads automatic tags file and returns a list containing
 | 
					
						
							|  |  |  |  |     the lines of the file | 
					
						
							|  |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-12-26 12:02:29 +00:00
										 |  |  |  |     filename = acct_dir(base_dir, nickname, domain) + '/autotags.txt' | 
					
						
							| 
									
										
										
										
											2020-09-13 14:42:17 +00:00
										 |  |  |  |     if not os.path.isfile(filename): | 
					
						
							|  |  |  |  |         return [] | 
					
						
							| 
									
										
										
										
											2021-11-26 12:28:20 +00:00
										 |  |  |  |     try: | 
					
						
							| 
									
										
										
										
											2024-07-14 13:01:46 +00:00
										 |  |  |  |         with open(filename, 'r', encoding='utf-8') as fp_tags: | 
					
						
							|  |  |  |  |             return fp_tags.readlines() | 
					
						
							| 
									
										
										
										
											2021-11-26 12:28:20 +00:00
										 |  |  |  |     except OSError: | 
					
						
							|  |  |  |  |         print('EX: unable to read auto tags ' + filename) | 
					
						
							| 
									
										
										
										
											2020-09-13 14:42:17 +00:00
										 |  |  |  |     return [] | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-19 21:03:09 +00:00
										 |  |  |  | def _auto_tag(word_str: str, auto_tag_list: [], append_tags: []) -> None: | 
					
						
							| 
									
										
										
										
											2020-09-13 14:42:17 +00:00
										 |  |  |  |     """Generates a list of tags to be automatically appended to the content
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     for tag_rule in auto_tag_list: | 
					
						
							|  |  |  |  |         if word_str not in tag_rule: | 
					
						
							| 
									
										
										
										
											2020-09-13 14:42:17 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         if '->' not in tag_rule: | 
					
						
							| 
									
										
										
										
											2020-09-13 14:42:17 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         rulematch = tag_rule.split('->')[0].strip() | 
					
						
							|  |  |  |  |         if rulematch != word_str: | 
					
						
							| 
									
										
										
										
											2020-09-13 14:42:17 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         tag_name = tag_rule.split('->')[1].strip() | 
					
						
							|  |  |  |  |         if tag_name.startswith('#'): | 
					
						
							|  |  |  |  |             if tag_name not in append_tags: | 
					
						
							|  |  |  |  |                 append_tags.append(tag_name) | 
					
						
							| 
									
										
										
										
											2020-09-13 14:42:17 +00:00
										 |  |  |  |         else: | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |             if '#' + tag_name not in append_tags: | 
					
						
							|  |  |  |  |                 append_tags.append('#' + tag_name) | 
					
						
							| 
									
										
										
										
											2020-09-13 14:42:17 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-07-05 11:12:07 +00:00
										 |  |  |  | def _get_simplified_content(content: str) -> str: | 
					
						
							| 
									
										
										
										
											2022-07-05 11:12:58 +00:00
										 |  |  |  |     """Returns a simplified version of the content suitable for
 | 
					
						
							|  |  |  |  |     splitting up into individual words | 
					
						
							| 
									
										
										
										
											2022-07-05 11:12:07 +00:00
										 |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2024-08-08 17:23:33 +00:00
										 |  |  |  |     replacements = { | 
					
						
							|  |  |  |  |         ',': ' ', | 
					
						
							|  |  |  |  |         ';': ' ', | 
					
						
							|  |  |  |  |         '- ': ' ' | 
					
						
							|  |  |  |  |     } | 
					
						
							|  |  |  |  |     content_simplified = replace_strings(content, replacements) | 
					
						
							| 
									
										
										
										
											2022-07-05 11:12:07 +00:00
										 |  |  |  |     content_simplified = content_simplified.replace('. ', ' ').strip() | 
					
						
							|  |  |  |  |     if content_simplified.endswith('.'): | 
					
						
							|  |  |  |  |         content_simplified = content_simplified[:len(content_simplified)-1] | 
					
						
							| 
									
										
										
										
											2022-07-05 11:15:26 +00:00
										 |  |  |  |     return content_simplified | 
					
						
							| 
									
										
										
										
											2022-07-05 11:12:07 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-07-05 12:30:21 +00:00
										 |  |  |  | def detect_dogwhistles(content: str, dogwhistles: {}) -> {}: | 
					
						
							| 
									
										
										
										
											2022-07-05 11:37:35 +00:00
										 |  |  |  |     """Returns a dict containing any detected dogwhistle words
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2022-07-05 18:47:23 +00:00
										 |  |  |  |     content = remove_html(content).lower() | 
					
						
							| 
									
										
										
										
											2022-07-05 11:37:35 +00:00
										 |  |  |  |     result = {} | 
					
						
							|  |  |  |  |     words = _get_simplified_content(content).split(' ') | 
					
						
							|  |  |  |  |     for whistle, category in dogwhistles.items(): | 
					
						
							| 
									
										
										
										
											2022-07-05 16:25:31 +00:00
										 |  |  |  |         if not category: | 
					
						
							|  |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2022-07-05 11:37:35 +00:00
										 |  |  |  |         ending = False | 
					
						
							| 
									
										
										
										
											2022-07-05 16:21:48 +00:00
										 |  |  |  |         starting = False | 
					
						
							| 
									
										
										
										
											2022-07-05 18:47:23 +00:00
										 |  |  |  |         whistle = whistle.lower() | 
					
						
							| 
									
										
										
										
											2022-07-05 16:21:48 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-07-05 18:47:23 +00:00
										 |  |  |  |         if whistle.startswith('x-'): | 
					
						
							| 
									
										
										
										
											2022-07-05 11:37:35 +00:00
										 |  |  |  |             whistle = whistle[2:] | 
					
						
							|  |  |  |  |             ending = True | 
					
						
							| 
									
										
										
										
											2024-04-10 12:23:59 +00:00
										 |  |  |  |         elif string_ends_with(whistle, ('*', '~', '-')): | 
					
						
							| 
									
										
										
										
											2022-07-05 11:37:35 +00:00
										 |  |  |  |             whistle = whistle[1:] | 
					
						
							|  |  |  |  |             ending = True | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         if ending: | 
					
						
							| 
									
										
										
										
											2022-07-05 19:45:18 +00:00
										 |  |  |  |             prev_wrd = '' | 
					
						
							| 
									
										
										
										
											2022-07-05 11:37:35 +00:00
										 |  |  |  |             for wrd in words: | 
					
						
							| 
									
										
										
										
											2022-07-05 19:45:18 +00:00
										 |  |  |  |                 wrd2 = (prev_wrd + ' ' + wrd).strip() | 
					
						
							|  |  |  |  |                 if wrd.endswith(whistle) or wrd2.endswith(whistle): | 
					
						
							| 
									
										
										
										
											2022-07-05 11:37:35 +00:00
										 |  |  |  |                     if not result.get(whistle): | 
					
						
							|  |  |  |  |                         result[whistle] = { | 
					
						
							|  |  |  |  |                             "count": 1, | 
					
						
							|  |  |  |  |                             "category": category | 
					
						
							|  |  |  |  |                         } | 
					
						
							|  |  |  |  |                     else: | 
					
						
							|  |  |  |  |                         result[whistle]['count'] += 1 | 
					
						
							| 
									
										
										
										
											2022-07-05 19:45:18 +00:00
										 |  |  |  |                 prev_wrd = wrd | 
					
						
							| 
									
										
										
										
											2022-07-05 19:35:38 +00:00
										 |  |  |  |             continue | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         if whistle.lower().endswith('-x'): | 
					
						
							|  |  |  |  |             whistle = whistle[:len(whistle)-2] | 
					
						
							|  |  |  |  |             starting = True | 
					
						
							| 
									
										
										
										
											2024-04-10 12:23:59 +00:00
										 |  |  |  |         elif string_ends_with(whistle, ('*', '~', '-')): | 
					
						
							| 
									
										
										
										
											2022-07-05 19:35:38 +00:00
										 |  |  |  |             whistle = whistle[:len(whistle)-1] | 
					
						
							|  |  |  |  |             starting = True | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         if starting: | 
					
						
							| 
									
										
										
										
											2022-07-05 19:45:18 +00:00
										 |  |  |  |             prev_wrd = '' | 
					
						
							| 
									
										
										
										
											2022-07-05 19:35:38 +00:00
										 |  |  |  |             for wrd in words: | 
					
						
							| 
									
										
										
										
											2022-07-05 19:45:18 +00:00
										 |  |  |  |                 wrd2 = (prev_wrd + ' ' + wrd).strip() | 
					
						
							|  |  |  |  |                 if wrd.startswith(whistle) or wrd2.startswith(whistle): | 
					
						
							| 
									
										
										
										
											2022-07-05 19:35:38 +00:00
										 |  |  |  |                     if not result.get(whistle): | 
					
						
							|  |  |  |  |                         result[whistle] = { | 
					
						
							|  |  |  |  |                             "count": 1, | 
					
						
							|  |  |  |  |                             "category": category | 
					
						
							|  |  |  |  |                         } | 
					
						
							|  |  |  |  |                     else: | 
					
						
							|  |  |  |  |                         result[whistle]['count'] += 1 | 
					
						
							| 
									
										
										
										
											2022-07-05 19:45:18 +00:00
										 |  |  |  |                 prev_wrd = wrd | 
					
						
							| 
									
										
										
										
											2022-07-05 19:35:38 +00:00
										 |  |  |  |             continue | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         if '*' in whistle: | 
					
						
							|  |  |  |  |             whistle_start = whistle.split('*', 1)[0] | 
					
						
							|  |  |  |  |             whistle_end = whistle.split('*', 1)[1] | 
					
						
							| 
									
										
										
										
											2022-07-05 19:45:18 +00:00
										 |  |  |  |             prev_wrd = '' | 
					
						
							| 
									
										
										
										
											2022-07-05 19:35:38 +00:00
										 |  |  |  |             for wrd in words: | 
					
						
							| 
									
										
										
										
											2022-07-05 19:45:18 +00:00
										 |  |  |  |                 wrd2 = (prev_wrd + ' ' + wrd).strip() | 
					
						
							|  |  |  |  |                 if ((wrd.startswith(whistle_start) and | 
					
						
							|  |  |  |  |                      wrd.endswith(whistle_end)) or | 
					
						
							|  |  |  |  |                     (wrd2.startswith(whistle_start) and | 
					
						
							|  |  |  |  |                      wrd2.endswith(whistle_end))): | 
					
						
							| 
									
										
										
										
											2022-07-05 19:35:38 +00:00
										 |  |  |  |                     if not result.get(whistle): | 
					
						
							|  |  |  |  |                         result[whistle] = { | 
					
						
							|  |  |  |  |                             "count": 1, | 
					
						
							|  |  |  |  |                             "category": category | 
					
						
							|  |  |  |  |                         } | 
					
						
							|  |  |  |  |                     else: | 
					
						
							|  |  |  |  |                         result[whistle]['count'] += 1 | 
					
						
							| 
									
										
										
										
											2022-07-05 19:45:18 +00:00
										 |  |  |  |                 prev_wrd = wrd | 
					
						
							| 
									
										
										
										
											2022-07-05 19:35:38 +00:00
										 |  |  |  |             continue | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-07-05 19:45:18 +00:00
										 |  |  |  |         prev_wrd = '' | 
					
						
							| 
									
										
										
										
											2022-07-05 19:35:38 +00:00
										 |  |  |  |         for wrd in words: | 
					
						
							| 
									
										
										
										
											2022-07-05 19:45:18 +00:00
										 |  |  |  |             wrd2 = (prev_wrd + ' ' + wrd).strip() | 
					
						
							|  |  |  |  |             if whistle in (wrd, wrd2): | 
					
						
							| 
									
										
										
										
											2022-07-05 19:35:38 +00:00
										 |  |  |  |                 if not result.get(whistle): | 
					
						
							|  |  |  |  |                     result[whistle] = { | 
					
						
							|  |  |  |  |                         "count": 1, | 
					
						
							|  |  |  |  |                         "category": category | 
					
						
							|  |  |  |  |                     } | 
					
						
							|  |  |  |  |                 else: | 
					
						
							|  |  |  |  |                     result[whistle]['count'] += 1 | 
					
						
							| 
									
										
										
										
											2022-07-05 19:45:18 +00:00
										 |  |  |  |             prev_wrd = wrd | 
					
						
							| 
									
										
										
										
											2022-07-05 11:37:35 +00:00
										 |  |  |  |     return result | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-07-05 12:30:21 +00:00
										 |  |  |  | def load_dogwhistles(filename: str) -> {}: | 
					
						
							|  |  |  |  |     """Loads a list of dogwhistles from file
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     if not os.path.isfile(filename): | 
					
						
							|  |  |  |  |         return {} | 
					
						
							| 
									
										
										
										
											2024-12-23 15:39:55 +00:00
										 |  |  |  |     dogwhistle_lines: list[str] = [] | 
					
						
							| 
									
										
										
										
											2022-07-05 12:30:21 +00:00
										 |  |  |  |     try: | 
					
						
							|  |  |  |  |         with open(filename, 'r', encoding='utf-8') as fp_dogwhistles: | 
					
						
							|  |  |  |  |             dogwhistle_lines = fp_dogwhistles.readlines() | 
					
						
							|  |  |  |  |     except OSError: | 
					
						
							|  |  |  |  |         print('EX: unable to load dogwhistles from ' + filename) | 
					
						
							|  |  |  |  |         return {} | 
					
						
							| 
									
										
										
										
											2022-07-05 17:52:19 +00:00
										 |  |  |  |     separators = ('->', '=>', ',', ';', '|', '=') | 
					
						
							| 
									
										
										
										
											2022-07-05 12:30:21 +00:00
										 |  |  |  |     dogwhistles = {} | 
					
						
							|  |  |  |  |     for line in dogwhistle_lines: | 
					
						
							| 
									
										
										
										
											2022-07-05 18:14:57 +00:00
										 |  |  |  |         line = remove_eol(line).strip() | 
					
						
							| 
									
										
										
										
											2022-07-05 12:30:21 +00:00
										 |  |  |  |         if not line: | 
					
						
							|  |  |  |  |             continue | 
					
						
							|  |  |  |  |         if line.startswith('#'): | 
					
						
							|  |  |  |  |             continue | 
					
						
							|  |  |  |  |         whistle = None | 
					
						
							|  |  |  |  |         category = None | 
					
						
							|  |  |  |  |         for sep in separators: | 
					
						
							|  |  |  |  |             if sep in line: | 
					
						
							| 
									
										
										
										
											2022-07-05 17:52:19 +00:00
										 |  |  |  |                 whistle = line.split(sep, 1)[0].strip() | 
					
						
							|  |  |  |  |                 category = line.split(sep, 1)[1].strip() | 
					
						
							| 
									
										
										
										
											2022-07-05 12:30:21 +00:00
										 |  |  |  |                 break | 
					
						
							|  |  |  |  |         if not whistle: | 
					
						
							|  |  |  |  |             whistle = line | 
					
						
							|  |  |  |  |         dogwhistles[whistle] = category | 
					
						
							|  |  |  |  |     return dogwhistles | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |  | def add_html_tags(base_dir: str, http_prefix: str, | 
					
						
							|  |  |  |  |                   nickname: str, domain: str, content: str, | 
					
						
							| 
									
										
										
										
											2022-07-18 16:18:04 +00:00
										 |  |  |  |                   recipients: [], hashtags: {}, translate: {}, | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |                   is_json_content: bool = False) -> str: | 
					
						
							| 
									
										
										
										
											2019-07-15 14:11:31 +00:00
										 |  |  |  |     """ Replaces plaintext mentions such as @nick@domain into html
 | 
					
						
							|  |  |  |  |     by matching against known following accounts | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     if content.startswith('<p>'): | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |  |         content = html_replace_email_quote(content) | 
					
						
							|  |  |  |  |         return html_replace_quote_marks(content) | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     max_word_length = 40 | 
					
						
							| 
									
										
										
										
											2024-08-08 17:23:33 +00:00
										 |  |  |  |     replacements = { | 
					
						
							|  |  |  |  |         '\r': '', | 
					
						
							|  |  |  |  |         '\n': ' --linebreak-- ' | 
					
						
							|  |  |  |  |     } | 
					
						
							|  |  |  |  |     content = replace_strings(content, replacements) | 
					
						
							| 
									
										
										
										
											2022-07-18 16:18:04 +00:00
										 |  |  |  |     now_playing_str = 'NowPlaying' | 
					
						
							|  |  |  |  |     if translate.get(now_playing_str): | 
					
						
							|  |  |  |  |         now_playing_str = translate[now_playing_str] | 
					
						
							| 
									
										
										
										
											2022-07-19 13:59:51 +00:00
										 |  |  |  |     now_playing_lower_str = 'nowplaying' | 
					
						
							|  |  |  |  |     if translate.get(now_playing_lower_str): | 
					
						
							|  |  |  |  |         now_playing_lower_str = translate[now_playing_lower_str] | 
					
						
							|  |  |  |  |     if '#' + now_playing_lower_str in content: | 
					
						
							|  |  |  |  |         content = content.replace('#' + now_playing_lower_str, | 
					
						
							|  |  |  |  |                                   '#' + now_playing_str) | 
					
						
							| 
									
										
										
										
											2022-07-18 16:18:04 +00:00
										 |  |  |  |     content = _add_music_tag(content, now_playing_str) | 
					
						
							| 
									
										
										
										
											2022-07-05 11:12:07 +00:00
										 |  |  |  |     words = _get_simplified_content(content).split(' ') | 
					
						
							| 
									
										
										
										
											2020-03-22 21:16:02 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-08-19 11:07:04 +00:00
										 |  |  |  |     # remove . for words which are not mentions | 
					
						
							| 
									
										
										
										
											2024-12-23 15:39:55 +00:00
										 |  |  |  |     new_words: list[str] = [] | 
					
						
							| 
									
										
										
										
											2024-03-20 13:04:25 +00:00
										 |  |  |  |     for _, word_str in enumerate(words): | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         if word_str.endswith('.'): | 
					
						
							|  |  |  |  |             if not word_str.startswith('@'): | 
					
						
							|  |  |  |  |                 word_str = word_str[:-1] | 
					
						
							|  |  |  |  |         if word_str.startswith('.'): | 
					
						
							|  |  |  |  |             word_str = word_str[1:] | 
					
						
							|  |  |  |  |         new_words.append(word_str) | 
					
						
							|  |  |  |  |     words = new_words | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     replace_mentions = {} | 
					
						
							|  |  |  |  |     replace_hashtags = {} | 
					
						
							|  |  |  |  |     replace_emoji = {} | 
					
						
							|  |  |  |  |     emoji_dict = {} | 
					
						
							|  |  |  |  |     original_domain = domain | 
					
						
							| 
									
										
										
										
											2021-12-26 18:17:37 +00:00
										 |  |  |  |     domain = remove_domain_port(domain) | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     following_filename = \ | 
					
						
							|  |  |  |  |         acct_dir(base_dir, nickname, domain) + '/following.txt' | 
					
						
							| 
									
										
										
										
											2019-08-09 09:09:21 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     # read the following list so that we can detect just @nick | 
					
						
							|  |  |  |  |     # in addition to @nick@domain | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |  |     following = None | 
					
						
							| 
									
										
										
										
											2021-01-29 21:33:23 +00:00
										 |  |  |  |     petnames = None | 
					
						
							| 
									
										
										
										
											2019-10-18 12:24:31 +00:00
										 |  |  |  |     if '@' in words: | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         if os.path.isfile(following_filename): | 
					
						
							| 
									
										
										
										
											2024-12-23 15:39:55 +00:00
										 |  |  |  |             following: list[str] = [] | 
					
						
							| 
									
										
										
										
											2021-11-26 12:28:20 +00:00
										 |  |  |  |             try: | 
					
						
							| 
									
										
										
										
											2022-06-09 14:46:30 +00:00
										 |  |  |  |                 with open(following_filename, 'r', | 
					
						
							| 
									
										
										
										
											2024-07-14 13:01:46 +00:00
										 |  |  |  |                           encoding='utf-8') as fp_foll: | 
					
						
							|  |  |  |  |                     following = fp_foll.readlines() | 
					
						
							| 
									
										
										
										
											2021-11-26 12:28:20 +00:00
										 |  |  |  |             except OSError: | 
					
						
							| 
									
										
										
										
											2024-07-02 22:16:13 +00:00
										 |  |  |  |                 print('EX: add_html_tags unable to read ' + | 
					
						
							|  |  |  |  |                       following_filename) | 
					
						
							| 
									
										
										
										
											2021-11-26 12:28:20 +00:00
										 |  |  |  |             for handle in following: | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |  |                 pet = get_pet_name(base_dir, nickname, domain, handle) | 
					
						
							| 
									
										
										
										
											2021-11-26 12:28:20 +00:00
										 |  |  |  |                 if pet: | 
					
						
							|  |  |  |  |                     petnames.append(pet + '\n') | 
					
						
							| 
									
										
										
										
											2019-08-09 09:09:21 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     # extract mentions and tags from words | 
					
						
							| 
									
										
										
										
											2024-12-23 15:39:55 +00:00
										 |  |  |  |     long_words_list: list[str] = [] | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     prev_word_str = '' | 
					
						
							|  |  |  |  |     auto_tags_list = _load_auto_tags(base_dir, nickname, domain) | 
					
						
							|  |  |  |  |     append_tags = [] | 
					
						
							|  |  |  |  |     for word_str in words: | 
					
						
							|  |  |  |  |         word_len = len(word_str) | 
					
						
							| 
									
										
										
										
											2024-03-20 13:04:25 +00:00
										 |  |  |  |         if word_len <= 2: | 
					
						
							|  |  |  |  |             continue | 
					
						
							|  |  |  |  |         if word_len > max_word_length: | 
					
						
							|  |  |  |  |             long_words_list.append(word_str) | 
					
						
							|  |  |  |  |         first_char = word_str[0] | 
					
						
							|  |  |  |  |         if first_char == '@': | 
					
						
							|  |  |  |  |             if _add_mention(base_dir, word_str, http_prefix, following, | 
					
						
							|  |  |  |  |                             petnames, replace_mentions, recipients, | 
					
						
							|  |  |  |  |                             hashtags): | 
					
						
							|  |  |  |  |                 prev_word_str = '' | 
					
						
							|  |  |  |  |                 continue | 
					
						
							|  |  |  |  |         elif first_char == '#': | 
					
						
							|  |  |  |  |             # remove any endings from the hashtag | 
					
						
							|  |  |  |  |             hash_tag_endings = ('.', ':', ';', '-', '\n') | 
					
						
							|  |  |  |  |             for ending in hash_tag_endings: | 
					
						
							|  |  |  |  |                 if word_str.endswith(ending): | 
					
						
							|  |  |  |  |                     word_str = word_str[:len(word_str) - 1] | 
					
						
							|  |  |  |  |                     break | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |             if _add_hash_tags(word_str, http_prefix, original_domain, | 
					
						
							|  |  |  |  |                               replace_hashtags, hashtags): | 
					
						
							|  |  |  |  |                 prev_word_str = '' | 
					
						
							|  |  |  |  |                 continue | 
					
						
							|  |  |  |  |         elif ':' in word_str: | 
					
						
							|  |  |  |  |             word_str2 = word_str.split(':')[1] | 
					
						
							|  |  |  |  |             if not emoji_dict: | 
					
						
							|  |  |  |  |                 # emoji.json is generated so that it can be customized and | 
					
						
							|  |  |  |  |                 # the changes will be retained even if default_emoji.json | 
					
						
							|  |  |  |  |                 # is subsequently updated | 
					
						
							|  |  |  |  |                 if not os.path.isfile(base_dir + '/emoji/emoji.json'): | 
					
						
							|  |  |  |  |                     copyfile(base_dir + '/emoji/default_emoji.json', | 
					
						
							|  |  |  |  |                              base_dir + '/emoji/emoji.json') | 
					
						
							|  |  |  |  |             emoji_dict = load_json(base_dir + '/emoji/emoji.json') | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |             # append custom emoji to the dict | 
					
						
							|  |  |  |  |             custom_emoji_filename = base_dir + '/emojicustom/emoji.json' | 
					
						
							|  |  |  |  |             if os.path.isfile(custom_emoji_filename): | 
					
						
							|  |  |  |  |                 custom_emoji_dict = load_json(custom_emoji_filename) | 
					
						
							|  |  |  |  |                 if custom_emoji_dict: | 
					
						
							|  |  |  |  |                     # combine emoji dicts one by one | 
					
						
							|  |  |  |  |                     for ename, eitem in custom_emoji_dict.items(): | 
					
						
							|  |  |  |  |                         if ename and eitem: | 
					
						
							|  |  |  |  |                             if not emoji_dict.get(ename): | 
					
						
							|  |  |  |  |                                 emoji_dict[ename] = eitem | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |             _add_emoji(base_dir, ':' + word_str2 + ':', http_prefix, | 
					
						
							|  |  |  |  |                        original_domain, replace_emoji, hashtags, | 
					
						
							|  |  |  |  |                        emoji_dict) | 
					
						
							|  |  |  |  |         else: | 
					
						
							|  |  |  |  |             if _auto_tag(word_str, auto_tags_list, append_tags): | 
					
						
							|  |  |  |  |                 prev_word_str = '' | 
					
						
							|  |  |  |  |                 continue | 
					
						
							|  |  |  |  |             if prev_word_str: | 
					
						
							|  |  |  |  |                 if _auto_tag(prev_word_str + ' ' + word_str, | 
					
						
							|  |  |  |  |                              auto_tags_list, append_tags): | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |                     prev_word_str = '' | 
					
						
							| 
									
										
										
										
											2020-09-13 14:42:17 +00:00
										 |  |  |  |                     continue | 
					
						
							| 
									
										
										
										
											2024-03-20 13:04:25 +00:00
										 |  |  |  |         prev_word_str = word_str | 
					
						
							| 
									
										
										
										
											2020-09-13 14:42:17 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     # add any auto generated tags | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     for appended in append_tags: | 
					
						
							| 
									
										
										
										
											2020-09-13 14:42:17 +00:00
										 |  |  |  |         content = content + ' ' + appended | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         _add_hash_tags(appended, http_prefix, original_domain, | 
					
						
							|  |  |  |  |                        replace_hashtags, hashtags) | 
					
						
							| 
									
										
										
										
											2019-08-09 09:09:21 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     # replace words with their html versions | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     for word_str, replace_str in replace_mentions.items(): | 
					
						
							|  |  |  |  |         content = content.replace(word_str, replace_str) | 
					
						
							|  |  |  |  |     for word_str, replace_str in replace_hashtags.items(): | 
					
						
							|  |  |  |  |         content = content.replace(word_str, replace_str) | 
					
						
							|  |  |  |  |     if not is_json_content: | 
					
						
							|  |  |  |  |         for word_str, replace_str in replace_emoji.items(): | 
					
						
							|  |  |  |  |             content = content.replace(word_str, replace_str) | 
					
						
							| 
									
										
										
										
											2019-10-29 13:04:38 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |  |     content = add_web_links(content) | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     if long_words_list: | 
					
						
							|  |  |  |  |         content = remove_long_words(content, max_word_length, long_words_list) | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |  |     content = limit_repeated_words(content, 6) | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |  |     content = content.replace(' --linebreak-- ', '</p><p>') | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |  |     content = html_replace_email_quote(content) | 
					
						
							|  |  |  |  |     return '<p>' + html_replace_quote_marks(content) + '</p>' | 
					
						
							| 
									
										
										
										
											2020-03-22 21:16:02 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-20 13:10:56 +00:00
										 |  |  |  | def _string_starts_with_url_prefix(text: str) -> bool: | 
					
						
							|  |  |  |  |     """ Does the given text begin with one of the url prefixes?
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     url_prefixes = ('http', 'gnunet', 'i2p', 'ipfs', 'ipns', 'hyper', 'dat:') | 
					
						
							|  |  |  |  |     for possible_prefix in url_prefixes: | 
					
						
							|  |  |  |  |         if text.startswith(possible_prefix): | 
					
						
							|  |  |  |  |             return True | 
					
						
							|  |  |  |  |     return False | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  | def get_mentions_from_html(html_text: str, match_str: str) -> []: | 
					
						
							| 
									
										
										
										
											2019-08-05 19:13:15 +00:00
										 |  |  |  |     """Extracts mentioned actors from the given html content string
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2024-12-23 15:39:55 +00:00
										 |  |  |  |     mentions: list[str] = [] | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     if match_str not in html_text: | 
					
						
							| 
									
										
										
										
											2019-08-05 19:13:15 +00:00
										 |  |  |  |         return mentions | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     mentions_list = html_text.split(match_str) | 
					
						
							|  |  |  |  |     for mention_str in mentions_list: | 
					
						
							|  |  |  |  |         if '"' not in mention_str: | 
					
						
							| 
									
										
										
										
											2019-08-05 19:13:15 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         actor_str = mention_str.split('"')[0] | 
					
						
							| 
									
										
										
										
											2024-03-20 13:10:56 +00:00
										 |  |  |  |         if _string_starts_with_url_prefix(actor_str): | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |             if actor_str not in mentions: | 
					
						
							|  |  |  |  |                 mentions.append(actor_str) | 
					
						
							| 
									
										
										
										
											2019-08-05 19:13:15 +00:00
										 |  |  |  |     return mentions | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  | def extract_media_in_form_post(post_bytes, boundary, name: str): | 
					
						
							| 
									
										
										
										
											2020-03-29 09:59:54 +00:00
										 |  |  |  |     """Extracts the binary encoding for image/video/audio within a http
 | 
					
						
							|  |  |  |  |     form POST | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |  |     Returns the media bytes and the remaining bytes | 
					
						
							|  |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     image_start_boundary = b'Content-Disposition: form-data; name="' + \ | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |  |         name.encode('utf8', 'ignore') + b'";' | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     image_start_location = post_bytes.find(image_start_boundary) | 
					
						
							|  |  |  |  |     if image_start_location == -1: | 
					
						
							|  |  |  |  |         return None, post_bytes | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     # bytes after the start boundary appears | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     media_bytes = post_bytes[image_start_location:] | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     # look for the next boundary | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     image_end_boundary = boundary.encode('utf8', 'ignore') | 
					
						
							|  |  |  |  |     image_end_location = media_bytes.find(image_end_boundary) | 
					
						
							|  |  |  |  |     if image_end_location == -1: | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |  |         # no ending boundary | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         return media_bytes, post_bytes[:image_start_location] | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     # remaining bytes after the end of the image | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     remainder = media_bytes[image_end_location:] | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     # remove bytes after the end boundary | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     media_bytes = media_bytes[:image_end_location] | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     # return the media and the before+after bytes | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     return media_bytes, post_bytes[:image_start_location] + remainder | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-07-21 10:16:14 +00:00
										 |  |  |  | def _valid_follows_csv(content: str) -> bool: | 
					
						
							|  |  |  |  |     """is the given content a valid csv file containing imported follows?
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     if ',' not in content: | 
					
						
							|  |  |  |  |         return False | 
					
						
							|  |  |  |  |     if 'Account address,' not in content: | 
					
						
							|  |  |  |  |         return False | 
					
						
							|  |  |  |  |     return True | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  | def save_media_in_form_post(media_bytes, debug: bool, | 
					
						
							| 
									
										
										
										
											2024-02-19 14:44:52 +00:00
										 |  |  |  |                             filename_base: str) -> (str, str): | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |  |     """Saves the given media bytes extracted from http form POST
 | 
					
						
							|  |  |  |  |     Returns the filename and attachment type | 
					
						
							|  |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     if not media_bytes: | 
					
						
							|  |  |  |  |         if filename_base: | 
					
						
							| 
									
										
										
										
											2021-08-09 21:27:13 +00:00
										 |  |  |  |             # remove any existing files | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |             extension_types = get_image_extensions() | 
					
						
							|  |  |  |  |             for ex in extension_types: | 
					
						
							|  |  |  |  |                 possible_other_format = filename_base + '.' + ex | 
					
						
							|  |  |  |  |                 if os.path.isfile(possible_other_format): | 
					
						
							| 
									
										
										
										
											2021-09-05 10:17:43 +00:00
										 |  |  |  |                     try: | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |                         os.remove(possible_other_format) | 
					
						
							| 
									
										
										
										
											2021-11-25 18:42:38 +00:00
										 |  |  |  |                     except OSError: | 
					
						
							| 
									
										
										
										
											2021-10-29 16:31:20 +00:00
										 |  |  |  |                         if debug: | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |  |                             print('EX: save_media_in_form_post ' + | 
					
						
							| 
									
										
										
										
											2021-10-29 16:31:20 +00:00
										 |  |  |  |                                   'unable to delete other ' + | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |                                   str(possible_other_format)) | 
					
						
							|  |  |  |  |             if os.path.isfile(filename_base): | 
					
						
							| 
									
										
										
										
											2021-09-05 10:17:43 +00:00
										 |  |  |  |                 try: | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |                     os.remove(filename_base) | 
					
						
							| 
									
										
										
										
											2021-11-25 18:42:38 +00:00
										 |  |  |  |                 except OSError: | 
					
						
							| 
									
										
										
										
											2021-10-29 16:31:20 +00:00
										 |  |  |  |                     if debug: | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |  |                         print('EX: save_media_in_form_post ' + | 
					
						
							| 
									
										
										
										
											2021-10-29 16:31:20 +00:00
										 |  |  |  |                               'unable to delete ' + | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |                               str(filename_base)) | 
					
						
							| 
									
										
										
										
											2021-08-09 21:27:13 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |  |         if debug: | 
					
						
							|  |  |  |  |             print('DEBUG: No media found within POST') | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |  |         return None, None | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     media_location = -1 | 
					
						
							|  |  |  |  |     search_str = '' | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |  |     filename = None | 
					
						
							| 
									
										
										
										
											2020-03-22 21:16:02 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |  |     # directly search the binary array for the beginning | 
					
						
							| 
									
										
										
										
											2022-07-21 10:16:14 +00:00
										 |  |  |  |     # of an image, zip or csv | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     extension_list = { | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |  |         'png': 'image/png', | 
					
						
							|  |  |  |  |         'jpeg': 'image/jpeg', | 
					
						
							| 
									
										
										
										
											2022-02-06 11:04:49 +00:00
										 |  |  |  |         'jxl': 'image/jxl', | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |  |         'gif': 'image/gif', | 
					
						
							| 
									
										
										
										
											2021-01-11 22:27:57 +00:00
										 |  |  |  |         'svg': 'image/svg+xml', | 
					
						
							| 
									
										
										
										
											2019-11-14 13:30:54 +00:00
										 |  |  |  |         'webp': 'image/webp', | 
					
						
							| 
									
										
										
										
											2020-09-09 15:09:38 +00:00
										 |  |  |  |         'avif': 'image/avif', | 
					
						
							| 
									
										
										
										
											2022-10-31 17:26:31 +00:00
										 |  |  |  |         'heic': 'image/heic', | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |  |         'mp4': 'video/mp4', | 
					
						
							|  |  |  |  |         'ogv': 'video/ogv', | 
					
						
							|  |  |  |  |         'mp3': 'audio/mpeg', | 
					
						
							| 
									
										
										
										
											2021-05-29 11:04:03 +00:00
										 |  |  |  |         'ogg': 'audio/ogg', | 
					
						
							| 
									
										
										
										
											2022-10-31 11:05:11 +00:00
										 |  |  |  |         'wav': 'audio/vnd.wave', | 
					
						
							|  |  |  |  |         'wav2': 'audio/wav', | 
					
						
							|  |  |  |  |         'wav3': 'audio/x-wav', | 
					
						
							|  |  |  |  |         'wav4': 'audio/x-pn-wave', | 
					
						
							| 
									
										
										
										
											2022-04-18 13:21:45 +00:00
										 |  |  |  |         'opus': 'audio/opus', | 
					
						
							| 
									
										
										
										
											2022-10-20 19:37:59 +00:00
										 |  |  |  |         'spx': 'audio/speex', | 
					
						
							| 
									
										
										
										
											2021-08-03 09:09:04 +00:00
										 |  |  |  |         'flac': 'audio/flac', | 
					
						
							| 
									
										
										
										
											2022-07-21 09:58:28 +00:00
										 |  |  |  |         'zip': 'application/zip', | 
					
						
							| 
									
										
										
										
											2022-07-21 10:45:27 +00:00
										 |  |  |  |         'csv': 'text/csv', | 
					
						
							|  |  |  |  |         'csv2': 'text/plain' | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |  |     } | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     detected_extension = None | 
					
						
							|  |  |  |  |     for extension, content_type in extension_list.items(): | 
					
						
							|  |  |  |  |         search_str = b'Content-Type: ' + content_type.encode('utf8', 'ignore') | 
					
						
							|  |  |  |  |         media_location = media_bytes.find(search_str) | 
					
						
							|  |  |  |  |         if media_location > -1: | 
					
						
							| 
									
										
										
										
											2020-05-26 19:05:03 +00:00
										 |  |  |  |             # image/video/audio binaries | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |  |             if extension == 'jpeg': | 
					
						
							|  |  |  |  |                 extension = 'jpg' | 
					
						
							|  |  |  |  |             elif extension == 'mpeg': | 
					
						
							|  |  |  |  |                 extension = 'mp3' | 
					
						
							| 
									
										
										
										
											2022-07-21 10:45:27 +00:00
										 |  |  |  |             elif extension == 'csv2': | 
					
						
							|  |  |  |  |                 extension = 'csv' | 
					
						
							| 
									
										
										
										
											2022-10-31 11:05:11 +00:00
										 |  |  |  |             elif extension == 'wav2': | 
					
						
							|  |  |  |  |                 extension = 'wav' | 
					
						
							|  |  |  |  |             elif extension == 'wav3': | 
					
						
							|  |  |  |  |                 extension = 'wav' | 
					
						
							|  |  |  |  |             elif extension == 'wav4': | 
					
						
							|  |  |  |  |                 extension = 'wav' | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |             if filename_base: | 
					
						
							| 
									
										
										
										
											2022-12-10 11:17:33 +00:00
										 |  |  |  |                 if not filename_base.endswith('.' + extension): | 
					
						
							|  |  |  |  |                     filename = filename_base + '.' + extension | 
					
						
							|  |  |  |  |                 else: | 
					
						
							|  |  |  |  |                     # already has the extension | 
					
						
							|  |  |  |  |                     filename = filename_base | 
					
						
							| 
									
										
										
										
											2022-06-01 14:26:50 +00:00
										 |  |  |  |             search_lst = search_str.decode().split('/', maxsplit=1) | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |             attachment_media_type = \ | 
					
						
							| 
									
										
										
										
											2022-06-01 14:26:50 +00:00
										 |  |  |  |                 search_lst[0].replace('Content-Type: ', '') | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |             detected_extension = extension | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |  |             break | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     if not filename: | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |  |         return None, None | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-05-26 19:29:15 +00:00
										 |  |  |  |     # locate the beginning of the image, after any | 
					
						
							|  |  |  |  |     # carriage returns | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     start_pos = media_location + len(search_str) | 
					
						
							| 
									
										
										
										
											2020-05-26 19:29:15 +00:00
										 |  |  |  |     for offset in range(1, 8): | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         if media_bytes[start_pos+offset] != 10: | 
					
						
							|  |  |  |  |             if media_bytes[start_pos+offset] != 13: | 
					
						
							|  |  |  |  |                 start_pos += offset | 
					
						
							| 
									
										
										
										
											2020-05-26 19:29:15 +00:00
										 |  |  |  |                 break | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-11-14 13:30:54 +00:00
										 |  |  |  |     # remove any existing image files with a different format | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     if detected_extension != 'zip': | 
					
						
							|  |  |  |  |         extension_types = get_image_extensions() | 
					
						
							|  |  |  |  |         for ex in extension_types: | 
					
						
							|  |  |  |  |             if ex == detected_extension: | 
					
						
							| 
									
										
										
										
											2021-05-29 11:04:03 +00:00
										 |  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |             possible_other_format = \ | 
					
						
							| 
									
										
										
										
											2021-05-29 11:04:03 +00:00
										 |  |  |  |                 filename.replace('.temp', '').replace('.' + | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |                                                       detected_extension, '.' + | 
					
						
							| 
									
										
										
										
											2021-05-29 11:04:03 +00:00
										 |  |  |  |                                                       ex) | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |             if os.path.isfile(possible_other_format): | 
					
						
							| 
									
										
										
										
											2021-09-05 10:17:43 +00:00
										 |  |  |  |                 try: | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |                     os.remove(possible_other_format) | 
					
						
							| 
									
										
										
										
											2021-11-25 18:42:38 +00:00
										 |  |  |  |                 except OSError: | 
					
						
							| 
									
										
										
										
											2021-10-29 16:31:20 +00:00
										 |  |  |  |                     if debug: | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |  |                         print('EX: save_media_in_form_post ' + | 
					
						
							| 
									
										
										
										
											2021-10-29 16:31:20 +00:00
										 |  |  |  |                               'unable to delete other 2 ' + | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |                               str(possible_other_format)) | 
					
						
							| 
									
										
										
										
											2019-11-14 13:30:54 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-09-13 17:51:33 +00:00
										 |  |  |  |     # don't allow scripts within svg files | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     if detected_extension == 'svg': | 
					
						
							|  |  |  |  |         svg_str = media_bytes[start_pos:] | 
					
						
							|  |  |  |  |         svg_str = svg_str.decode() | 
					
						
							|  |  |  |  |         if dangerous_svg(svg_str, False): | 
					
						
							| 
									
										
										
										
											2021-09-13 17:51:33 +00:00
										 |  |  |  |             return None, None | 
					
						
							| 
									
										
										
										
											2022-07-21 09:58:28 +00:00
										 |  |  |  |     elif detected_extension == 'csv': | 
					
						
							|  |  |  |  |         csv_str = media_bytes[start_pos:] | 
					
						
							| 
									
										
										
										
											2022-07-21 10:47:17 +00:00
										 |  |  |  |         csv_str = csv_str.decode() | 
					
						
							| 
									
										
										
										
											2022-07-21 10:16:14 +00:00
										 |  |  |  |         if not _valid_follows_csv(csv_str): | 
					
						
							| 
									
										
										
										
											2022-07-21 09:58:28 +00:00
										 |  |  |  |             return None, None | 
					
						
							| 
									
										
										
										
											2021-09-13 17:51:33 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-09-12 18:38:56 +00:00
										 |  |  |  |     # if this is an image then check that the binary looks like an image | 
					
						
							|  |  |  |  |     image_extension_types = get_image_extensions() | 
					
						
							|  |  |  |  |     if detected_extension in image_extension_types: | 
					
						
							|  |  |  |  |         if not binary_is_image(filename, media_bytes[start_pos:]): | 
					
						
							|  |  |  |  |             print('WARN: save_media_in_form_post ' + | 
					
						
							|  |  |  |  |                   'image binary not recognized ' + filename) | 
					
						
							|  |  |  |  |             return None, None | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-25 18:42:38 +00:00
										 |  |  |  |     try: | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         with open(filename, 'wb') as fp_media: | 
					
						
							|  |  |  |  |             fp_media.write(media_bytes[start_pos:]) | 
					
						
							| 
									
										
										
										
											2021-11-25 18:42:38 +00:00
										 |  |  |  |     except OSError: | 
					
						
							| 
									
										
										
										
											2024-07-02 22:16:13 +00:00
										 |  |  |  |         print('EX: save_media_in_form_post unable to write media') | 
					
						
							| 
									
										
										
										
											2019-12-04 18:52:27 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-06 23:16:54 +00:00
										 |  |  |  |     if not os.path.isfile(filename): | 
					
						
							|  |  |  |  |         print('WARN: Media file could not be written to file: ' + filename) | 
					
						
							|  |  |  |  |         return None, None | 
					
						
							| 
									
										
										
										
											2021-03-06 23:19:03 +00:00
										 |  |  |  |     print('Uploaded media file written: ' + filename) | 
					
						
							| 
									
										
										
										
											2021-03-06 23:16:54 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     return filename, attachment_media_type | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-07-11 14:45:40 +00:00
										 |  |  |  | def combine_textarea_lines(text: str) -> str: | 
					
						
							|  |  |  |  |     """Combines separate lines
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     result = '' | 
					
						
							|  |  |  |  |     ctr = 0 | 
					
						
							|  |  |  |  |     paragraphs = text.split('\n\n') | 
					
						
							| 
									
										
										
										
											2024-08-08 17:23:33 +00:00
										 |  |  |  |     replacements = { | 
					
						
							|  |  |  |  |         '\n* ': '***BULLET POINT*** ', | 
					
						
							|  |  |  |  |         '\n * ': '***BULLET POINT*** ', | 
					
						
							|  |  |  |  |         '\n- ': '***DASH POINT*** ', | 
					
						
							|  |  |  |  |         '\n - ': '***DASH POINT*** ', | 
					
						
							|  |  |  |  |         '\n': ' ', | 
					
						
							|  |  |  |  |         '  ': ' ', | 
					
						
							|  |  |  |  |         '***BULLET POINT*** ': '\n* ', | 
					
						
							|  |  |  |  |         '***DASH POINT*** ': '\n- ' | 
					
						
							|  |  |  |  |     } | 
					
						
							| 
									
										
										
										
											2022-07-11 14:45:40 +00:00
										 |  |  |  |     for para in paragraphs: | 
					
						
							| 
									
										
										
										
											2024-08-08 17:23:33 +00:00
										 |  |  |  |         para = replace_strings(para, replacements) | 
					
						
							| 
									
										
										
										
											2022-07-11 14:45:40 +00:00
										 |  |  |  |         if ctr > 0: | 
					
						
							| 
									
										
										
										
											2022-07-11 15:13:39 +00:00
										 |  |  |  |             result += '</p><p>' | 
					
						
							| 
									
										
										
										
											2022-07-11 14:45:40 +00:00
										 |  |  |  |         result += para | 
					
						
							|  |  |  |  |         ctr += 1 | 
					
						
							|  |  |  |  |     return result | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  | def extract_text_fields_in_post(post_bytes, boundary: str, debug: bool, | 
					
						
							| 
									
										
										
										
											2024-02-19 14:46:59 +00:00
										 |  |  |  |                                 unit_test_data: str) -> {}: | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |  |     """Returns a dictionary containing the text fields of a http form POST
 | 
					
						
							|  |  |  |  |     The boundary argument comes from the http header | 
					
						
							| 
									
										
										
										
											2020-03-22 21:16:02 +00:00
										 |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2022-07-10 21:52:24 +00:00
										 |  |  |  |     if boundary == 'LYNX': | 
					
						
							| 
									
										
										
										
											2022-07-10 21:57:26 +00:00
										 |  |  |  |         if debug: | 
					
						
							|  |  |  |  |             print('POST from lynx browser') | 
					
						
							| 
									
										
										
										
											2022-07-10 21:52:39 +00:00
										 |  |  |  |         boundary = '--LYNX' | 
					
						
							| 
									
										
										
										
											2022-07-10 21:52:24 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-06-01 14:26:50 +00:00
										 |  |  |  |     if not unit_test_data: | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         msg_bytes = email.parser.BytesParser().parsebytes(post_bytes) | 
					
						
							|  |  |  |  |         message_fields = msg_bytes.get_payload(decode=True).decode('utf-8') | 
					
						
							| 
									
										
										
										
											2021-03-01 10:02:55 +00:00
										 |  |  |  |     else: | 
					
						
							| 
									
										
										
										
											2022-06-01 14:26:50 +00:00
										 |  |  |  |         message_fields = unit_test_data | 
					
						
							| 
									
										
										
										
											2021-03-01 10:02:55 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-07-10 21:15:06 +00:00
										 |  |  |  |     if debug: | 
					
						
							|  |  |  |  |         if 'password' not in message_fields: | 
					
						
							|  |  |  |  |             print('DEBUG: POST arriving ' + message_fields) | 
					
						
							| 
									
										
										
										
											2021-03-01 10:02:55 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     message_fields = message_fields.split(boundary) | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |  |     fields = {} | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     fields_with_semicolon_allowed = ( | 
					
						
							| 
									
										
										
										
											2021-03-01 12:19:49 +00:00
										 |  |  |  |         'message', 'bio', 'autoCW', 'password', 'passwordconfirm', | 
					
						
							|  |  |  |  |         'instanceDescription', 'instanceDescriptionShort', | 
					
						
							| 
									
										
										
										
											2023-02-10 18:42:58 +00:00
										 |  |  |  |         'subject', 'location', 'imageDescription', 'importBlocks', | 
					
						
							|  |  |  |  |         'importFollows', 'importTheme' | 
					
						
							| 
									
										
										
										
											2021-03-01 12:15:06 +00:00
										 |  |  |  |     ) | 
					
						
							| 
									
										
										
										
											2022-07-10 21:15:06 +00:00
										 |  |  |  |     if debug: | 
					
						
							|  |  |  |  |         if 'password' not in message_fields: | 
					
						
							|  |  |  |  |             print('DEBUG: POST message_fields: ' + str(message_fields)) | 
					
						
							| 
									
										
										
										
											2022-07-10 19:18:37 +00:00
										 |  |  |  |     lynx_content_type = 'Content-Type: text/plain; charset=utf-8\r\n' | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |  |     # examine each section of the POST, separated by the boundary | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     for fld in message_fields: | 
					
						
							|  |  |  |  |         if fld == '--': | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         if ' name="' not in fld: | 
					
						
							| 
									
										
										
										
											2020-03-22 21:16:02 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         post_str = fld.split(' name="', 1)[1] | 
					
						
							|  |  |  |  |         if '"' not in post_str: | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         post_key = post_str.split('"', 1)[0] | 
					
						
							| 
									
										
										
										
											2022-07-10 21:57:26 +00:00
										 |  |  |  |         if debug: | 
					
						
							|  |  |  |  |             print('post_key: ' + post_key) | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         post_value_str = post_str.split('"', 1)[1] | 
					
						
							| 
									
										
										
										
											2022-07-10 19:04:54 +00:00
										 |  |  |  |         if boundary == '--LYNX': | 
					
						
							|  |  |  |  |             post_value_str = \ | 
					
						
							|  |  |  |  |                 post_value_str.replace(lynx_content_type, '') | 
					
						
							| 
									
										
										
										
											2022-07-10 21:57:26 +00:00
										 |  |  |  |         if debug and 'password' not in post_key: | 
					
						
							| 
									
										
										
										
											2022-07-10 21:48:36 +00:00
										 |  |  |  |             print('boundary: ' + boundary) | 
					
						
							| 
									
										
										
										
											2022-07-10 21:44:12 +00:00
										 |  |  |  |             print('post_value_str1: ' + post_value_str) | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         if ';' in post_value_str: | 
					
						
							|  |  |  |  |             if post_key not in fields_with_semicolon_allowed and \ | 
					
						
							|  |  |  |  |                not post_key.startswith('edited'): | 
					
						
							| 
									
										
										
										
											2022-07-10 21:57:26 +00:00
										 |  |  |  |                 if debug: | 
					
						
							|  |  |  |  |                     print('extract_text_fields_in_post exit 1') | 
					
						
							| 
									
										
										
										
											2021-03-01 10:02:55 +00:00
										 |  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2022-07-10 21:57:26 +00:00
										 |  |  |  |         if debug and 'password' not in post_key: | 
					
						
							| 
									
										
										
										
											2022-07-10 21:44:12 +00:00
										 |  |  |  |             print('post_value_str2: ' + post_value_str) | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         if '\r\n' not in post_value_str: | 
					
						
							| 
									
										
										
										
											2022-07-10 21:57:26 +00:00
										 |  |  |  |             if debug: | 
					
						
							|  |  |  |  |                 print('extract_text_fields_in_post exit 2') | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         post_lines = post_value_str.split('\r\n') | 
					
						
							| 
									
										
										
										
											2022-07-10 21:57:26 +00:00
										 |  |  |  |         if debug and 'password' not in post_key: | 
					
						
							| 
									
										
										
										
											2022-07-10 21:44:12 +00:00
										 |  |  |  |             print('post_lines: ' + str(post_lines)) | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         post_value = '' | 
					
						
							|  |  |  |  |         if len(post_lines) > 2: | 
					
						
							|  |  |  |  |             for line in range(2, len(post_lines)-1): | 
					
						
							| 
									
										
										
										
											2020-04-02 09:56:17 +00:00
										 |  |  |  |                 if line > 2: | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |                     post_value += '\n' | 
					
						
							|  |  |  |  |                 post_value += post_lines[line] | 
					
						
							|  |  |  |  |         fields[post_key] = urllib.parse.unquote(post_value) | 
					
						
							| 
									
										
										
										
											2022-07-11 14:45:40 +00:00
										 |  |  |  |         if boundary == '--LYNX' and post_key in ('message', 'bio'): | 
					
						
							|  |  |  |  |             fields[post_key] = combine_textarea_lines(fields[post_key]) | 
					
						
							| 
									
										
										
										
											2019-11-10 11:37:24 +00:00
										 |  |  |  |     return fields | 
					
						
							| 
									
										
										
										
											2021-07-10 09:38:59 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  | def limit_repeated_words(text: str, max_repeats: int) -> str: | 
					
						
							| 
									
										
										
										
											2021-07-10 09:38:59 +00:00
										 |  |  |  |     """Removes words which are repeated many times
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     words = text.replace('\n', ' ').split(' ') | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     repeat_ctr = 0 | 
					
						
							|  |  |  |  |     repeated_text = '' | 
					
						
							| 
									
										
										
										
											2021-07-10 09:38:59 +00:00
										 |  |  |  |     replacements = {} | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     prev_word = '' | 
					
						
							| 
									
										
										
										
											2021-07-10 09:38:59 +00:00
										 |  |  |  |     for word in words: | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         if word == prev_word: | 
					
						
							|  |  |  |  |             repeat_ctr += 1 | 
					
						
							|  |  |  |  |             if repeated_text: | 
					
						
							|  |  |  |  |                 repeated_text += ' ' + word | 
					
						
							| 
									
										
										
										
											2021-07-10 09:38:59 +00:00
										 |  |  |  |             else: | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |                 repeated_text = word + ' ' + word | 
					
						
							| 
									
										
										
										
											2021-07-10 09:38:59 +00:00
										 |  |  |  |         else: | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |             if repeat_ctr > max_repeats: | 
					
						
							|  |  |  |  |                 new_text = ((prev_word + ' ') * max_repeats).strip() | 
					
						
							|  |  |  |  |                 replacements[prev_word] = [repeated_text, new_text] | 
					
						
							|  |  |  |  |             repeat_ctr = 0 | 
					
						
							|  |  |  |  |             repeated_text = '' | 
					
						
							|  |  |  |  |         prev_word = word | 
					
						
							| 
									
										
										
										
											2021-07-10 09:38:59 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     if repeat_ctr > max_repeats: | 
					
						
							|  |  |  |  |         new_text = ((prev_word + ' ') * max_repeats).strip() | 
					
						
							|  |  |  |  |         replacements[prev_word] = [repeated_text, new_text] | 
					
						
							| 
									
										
										
										
											2021-07-10 09:38:59 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     for word, item in replacements.items(): | 
					
						
							|  |  |  |  |         text = text.replace(item[0], item[1]) | 
					
						
							|  |  |  |  |     return text | 
					
						
							| 
									
										
										
										
											2021-08-07 17:03:41 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-02-26 19:22:25 +00:00
										 |  |  |  | def get_price_from_string(price_str: str) -> (str, str): | 
					
						
							| 
									
										
										
										
											2021-08-07 17:03:41 +00:00
										 |  |  |  |     """Returns the item price and currency
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-12-26 17:29:09 +00:00
										 |  |  |  |     currencies = get_currencies() | 
					
						
							| 
									
										
										
										
											2021-08-07 17:03:41 +00:00
										 |  |  |  |     for symbol, name in currencies.items(): | 
					
						
							| 
									
										
										
										
											2024-02-26 19:22:25 +00:00
										 |  |  |  |         if symbol in price_str: | 
					
						
							|  |  |  |  |             price = price_str.replace(symbol, '') | 
					
						
							| 
									
										
										
										
											2021-12-26 18:03:39 +00:00
										 |  |  |  |             if is_float(price): | 
					
						
							| 
									
										
										
										
											2021-08-07 17:03:41 +00:00
										 |  |  |  |                 return price, name | 
					
						
							| 
									
										
										
										
											2024-02-26 19:22:25 +00:00
										 |  |  |  |         elif name in price_str: | 
					
						
							|  |  |  |  |             price = price_str.replace(name, '') | 
					
						
							| 
									
										
										
										
											2021-12-26 18:03:39 +00:00
										 |  |  |  |             if is_float(price): | 
					
						
							| 
									
										
										
										
											2021-08-07 17:03:41 +00:00
										 |  |  |  |                 return price, name | 
					
						
							| 
									
										
										
										
											2024-02-26 19:22:25 +00:00
										 |  |  |  |     if is_float(price_str): | 
					
						
							|  |  |  |  |         return price_str, "EUR" | 
					
						
							| 
									
										
										
										
											2021-08-07 17:03:41 +00:00
										 |  |  |  |     return "0.00", "EUR" | 
					
						
							| 
									
										
										
										
											2021-10-14 15:12:35 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |  | def _words_similarity_histogram(words: []) -> {}: | 
					
						
							| 
									
										
										
										
											2021-10-14 15:40:19 +00:00
										 |  |  |  |     """Returns a histogram for word combinations
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     histogram = {} | 
					
						
							|  |  |  |  |     for index in range(1, len(words)): | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         combined_words = words[index - 1] + words[index] | 
					
						
							|  |  |  |  |         if histogram.get(combined_words): | 
					
						
							|  |  |  |  |             histogram[combined_words] += 1 | 
					
						
							| 
									
										
										
										
											2021-10-14 15:40:19 +00:00
										 |  |  |  |         else: | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |             histogram[combined_words] = 1 | 
					
						
							| 
									
										
										
										
											2021-10-14 15:40:19 +00:00
										 |  |  |  |     return histogram | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |  | def _words_similarity_words_list(content: str) -> []: | 
					
						
							| 
									
										
										
										
											2021-10-14 15:53:04 +00:00
										 |  |  |  |     """Returns a list of words for the given content
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     remove_punctuation = ('.', ',', ';', '-', ':', '"') | 
					
						
							| 
									
										
										
										
											2021-12-27 15:43:22 +00:00
										 |  |  |  |     content = remove_html(content).lower() | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     for punc in remove_punctuation: | 
					
						
							|  |  |  |  |         content = content.replace(punc, ' ') | 
					
						
							| 
									
										
										
										
											2021-10-14 15:53:04 +00:00
										 |  |  |  |         content = content.replace('  ', ' ') | 
					
						
							|  |  |  |  |     return content.split(' ') | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  | def words_similarity(content1: str, content2: str, min_words: int) -> int: | 
					
						
							| 
									
										
										
										
											2021-10-14 15:12:35 +00:00
										 |  |  |  |     """Returns percentage similarity
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     if content1 == content2: | 
					
						
							|  |  |  |  |         return 100 | 
					
						
							| 
									
										
										
										
											2021-10-14 15:40:19 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |  |     words1 = _words_similarity_words_list(content1) | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     if len(words1) < min_words: | 
					
						
							| 
									
										
										
										
											2021-10-14 15:12:35 +00:00
										 |  |  |  |         return 0 | 
					
						
							| 
									
										
										
										
											2021-10-14 15:40:19 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |  |     words2 = _words_similarity_words_list(content2) | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     if len(words2) < min_words: | 
					
						
							| 
									
										
										
										
											2021-10-14 15:12:35 +00:00
										 |  |  |  |         return 0 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |  |     histogram1 = _words_similarity_histogram(words1) | 
					
						
							|  |  |  |  |     histogram2 = _words_similarity_histogram(words2) | 
					
						
							| 
									
										
										
										
											2021-10-14 15:12:35 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     diff = 0 | 
					
						
							| 
									
										
										
										
											2024-05-18 16:37:19 +00:00
										 |  |  |  |     for combined_words, histogram1_value in histogram1.items(): | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |         if not histogram2.get(combined_words): | 
					
						
							| 
									
										
										
										
											2021-10-14 15:12:35 +00:00
										 |  |  |  |             diff += 1 | 
					
						
							|  |  |  |  |         else: | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |             diff += \ | 
					
						
							| 
									
										
										
										
											2024-05-18 16:37:19 +00:00
										 |  |  |  |                 abs(histogram2[combined_words] - histogram1_value) | 
					
						
							| 
									
										
										
										
											2021-10-14 15:12:35 +00:00
										 |  |  |  |     return 100 - int(diff * 100 / len(histogram1.items())) | 
					
						
							| 
									
										
										
										
											2021-10-26 16:06:22 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-12 10:24:59 +00:00
										 |  |  |  | def contains_invalid_local_links(domain_full: str, | 
					
						
							|  |  |  |  |                                  onion_domain: str, i2p_domain: str, | 
					
						
							|  |  |  |  |                                  content: str) -> bool: | 
					
						
							| 
									
										
										
										
											2021-10-26 16:06:22 +00:00
										 |  |  |  |     """Returns true if the given content has invalid links
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-12-30 20:24:05 +00:00
										 |  |  |  |     for inv_str in INVALID_CONTENT_STRINGS: | 
					
						
							| 
									
										
										
										
											2024-06-12 10:24:59 +00:00
										 |  |  |  |         match_str = '?' + inv_str + '=' | 
					
						
							|  |  |  |  |         if match_str not in content: | 
					
						
							|  |  |  |  |             continue | 
					
						
							|  |  |  |  |         # extract the urls and check whether they are for the local domain | 
					
						
							|  |  |  |  |         ctr = 0 | 
					
						
							|  |  |  |  |         sections = content.split(match_str) | 
					
						
							|  |  |  |  |         final_section_index = len(sections) - 1 | 
					
						
							|  |  |  |  |         for section_str in sections: | 
					
						
							|  |  |  |  |             if ctr == final_section_index: | 
					
						
							|  |  |  |  |                 continue | 
					
						
							|  |  |  |  |             if '://' in section_str: | 
					
						
							|  |  |  |  |                 url = section_str.split('://')[-1] | 
					
						
							|  |  |  |  |                 if domain_full in url: | 
					
						
							|  |  |  |  |                     return True | 
					
						
							|  |  |  |  |                 if onion_domain: | 
					
						
							|  |  |  |  |                     if onion_domain in url: | 
					
						
							|  |  |  |  |                         return True | 
					
						
							|  |  |  |  |                 if i2p_domain: | 
					
						
							|  |  |  |  |                     if i2p_domain in url: | 
					
						
							|  |  |  |  |                         return True | 
					
						
							|  |  |  |  |             ctr += 1 | 
					
						
							| 
									
										
										
										
											2021-10-26 16:06:22 +00:00
										 |  |  |  |     return False | 
					
						
							| 
									
										
										
										
											2022-03-24 13:14:41 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | def bold_reading_string(text: str) -> str: | 
					
						
							|  |  |  |  |     """Returns bold reading formatted text
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2022-03-24 15:15:53 +00:00
										 |  |  |  |     text = html.unescape(text) | 
					
						
							| 
									
										
										
										
											2022-03-24 13:14:41 +00:00
										 |  |  |  |     add_paragraph_markup = False | 
					
						
							|  |  |  |  |     if '<p>' in text: | 
					
						
							|  |  |  |  |         text = text.replace('</p>', '\n').replace('<p>', '') | 
					
						
							|  |  |  |  |         add_paragraph_markup = True | 
					
						
							|  |  |  |  |     paragraphs = text.split('\n') | 
					
						
							|  |  |  |  |     parag_ctr = 0 | 
					
						
							| 
									
										
										
										
											2022-03-24 14:40:28 +00:00
										 |  |  |  |     new_text = '' | 
					
						
							| 
									
										
										
										
											2022-03-24 13:14:41 +00:00
										 |  |  |  |     for parag in paragraphs: | 
					
						
							|  |  |  |  |         words = parag.split(' ') | 
					
						
							|  |  |  |  |         new_parag = '' | 
					
						
							| 
									
										
										
										
											2022-03-24 14:08:07 +00:00
										 |  |  |  |         reading_markup = False | 
					
						
							| 
									
										
										
										
											2022-03-24 13:14:41 +00:00
										 |  |  |  |         for wrd in words: | 
					
						
							| 
									
										
										
										
											2022-03-24 15:32:37 +00:00
										 |  |  |  |             if '<' in wrd: | 
					
						
							| 
									
										
										
										
											2022-03-24 14:08:07 +00:00
										 |  |  |  |                 reading_markup = True | 
					
						
							| 
									
										
										
										
											2022-03-24 15:32:37 +00:00
										 |  |  |  |             if reading_markup and '>' in wrd: | 
					
						
							| 
									
										
										
										
											2022-03-24 14:08:07 +00:00
										 |  |  |  |                 reading_markup = False | 
					
						
							| 
									
										
										
										
											2022-03-24 16:16:36 +00:00
										 |  |  |  |             wrd_len = len(wrd) | 
					
						
							|  |  |  |  |             if not reading_markup and wrd_len > 1 and \ | 
					
						
							| 
									
										
										
										
											2022-03-24 13:38:10 +00:00
										 |  |  |  |                '<' not in wrd and '>' not in wrd and \ | 
					
						
							| 
									
										
										
										
											2022-03-24 15:57:44 +00:00
										 |  |  |  |                '&' not in wrd and '=' not in wrd and \ | 
					
						
							|  |  |  |  |                not wrd.startswith(':'): | 
					
						
							| 
									
										
										
										
											2022-03-24 13:45:55 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |                 prefix = '' | 
					
						
							|  |  |  |  |                 postfix = '' | 
					
						
							|  |  |  |  |                 if wrd.startswith('"'): | 
					
						
							|  |  |  |  |                     prefix = '"' | 
					
						
							|  |  |  |  |                     wrd = wrd[1:] | 
					
						
							|  |  |  |  |                 if wrd.endswith('"'): | 
					
						
							|  |  |  |  |                     postfix = '"' | 
					
						
							| 
									
										
										
										
											2022-03-24 16:16:36 +00:00
										 |  |  |  |                     wrd = wrd[:wrd_len - 1] | 
					
						
							| 
									
										
										
										
											2022-03-24 13:45:55 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-03-24 16:16:36 +00:00
										 |  |  |  |                 initial_chars = int(math.ceil(wrd_len / 2.0)) | 
					
						
							| 
									
										
										
										
											2022-03-24 13:14:41 +00:00
										 |  |  |  |                 new_parag += \ | 
					
						
							| 
									
										
										
										
											2022-03-24 13:45:55 +00:00
										 |  |  |  |                     prefix + '<b>' + wrd[:initial_chars] + '</b>' + \ | 
					
						
							|  |  |  |  |                     wrd[initial_chars:] + postfix + ' ' | 
					
						
							| 
									
										
										
										
											2022-03-24 13:14:41 +00:00
										 |  |  |  |             else: | 
					
						
							|  |  |  |  |                 new_parag += wrd + ' ' | 
					
						
							|  |  |  |  |         parag_ctr += 1 | 
					
						
							|  |  |  |  |         new_parag = new_parag.strip() | 
					
						
							| 
									
										
										
										
											2022-03-24 15:57:44 +00:00
										 |  |  |  |         if not new_parag: | 
					
						
							|  |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2022-03-24 13:14:41 +00:00
										 |  |  |  |         if parag_ctr < len(paragraphs): | 
					
						
							|  |  |  |  |             if not add_paragraph_markup: | 
					
						
							|  |  |  |  |                 new_text += new_parag + '\n' | 
					
						
							|  |  |  |  |             else: | 
					
						
							|  |  |  |  |                 new_text += '<p>' + new_parag + '</p>' | 
					
						
							|  |  |  |  |         else: | 
					
						
							|  |  |  |  |             if not add_paragraph_markup: | 
					
						
							|  |  |  |  |                 new_text += new_parag | 
					
						
							|  |  |  |  |             else: | 
					
						
							|  |  |  |  |                 new_text += '<p>' + new_parag + '</p>' | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     return new_text | 
					
						
							| 
									
										
										
										
											2022-03-29 19:34:03 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | def import_emoji(base_dir: str, import_filename: str, session) -> None: | 
					
						
							|  |  |  |  |     """Imports emoji from the given filename
 | 
					
						
							| 
									
										
										
										
											2022-04-11 19:44:39 +00:00
										 |  |  |  |     Each line should be [emoji url], :emojiname: | 
					
						
							| 
									
										
										
										
											2022-03-29 19:34:03 +00:00
										 |  |  |  |     """
 | 
					
						
							|  |  |  |  |     if not os.path.isfile(import_filename): | 
					
						
							|  |  |  |  |         return | 
					
						
							| 
									
										
										
										
											2024-06-20 10:47:58 +00:00
										 |  |  |  |     emoji_dict = load_json(base_dir + '/emoji/default_emoji.json') | 
					
						
							| 
									
										
										
										
											2022-03-29 19:34:03 +00:00
										 |  |  |  |     added = 0 | 
					
						
							| 
									
										
										
										
											2022-06-09 14:46:30 +00:00
										 |  |  |  |     with open(import_filename, "r", encoding='utf-8') as fp_emoji: | 
					
						
							| 
									
										
										
										
											2022-03-29 19:34:03 +00:00
										 |  |  |  |         lines = fp_emoji.readlines() | 
					
						
							|  |  |  |  |         for line in lines: | 
					
						
							| 
									
										
										
										
											2022-11-08 12:36:59 +00:00
										 |  |  |  |             if ', ' not in line: | 
					
						
							|  |  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2022-03-29 19:34:03 +00:00
										 |  |  |  |             url = line.split(', ')[0] | 
					
						
							|  |  |  |  |             tag = line.split(', ')[1].strip() | 
					
						
							| 
									
										
										
										
											2022-11-08 12:36:59 +00:00
										 |  |  |  |             if ':' not in tag: | 
					
						
							|  |  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2022-03-29 19:34:03 +00:00
										 |  |  |  |             tag = tag.split(':')[1] | 
					
						
							|  |  |  |  |             if emoji_dict.get(tag): | 
					
						
							|  |  |  |  |                 continue | 
					
						
							|  |  |  |  |             emoji_image_filename = base_dir + '/emoji/' + tag + '.png' | 
					
						
							|  |  |  |  |             if os.path.isfile(emoji_image_filename): | 
					
						
							|  |  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2022-06-14 10:24:29 +00:00
										 |  |  |  |             if download_image(session, url, | 
					
						
							| 
									
										
										
										
											2022-03-29 19:34:03 +00:00
										 |  |  |  |                               emoji_image_filename, True, False): | 
					
						
							|  |  |  |  |                 emoji_dict[tag] = tag | 
					
						
							|  |  |  |  |                 added += 1 | 
					
						
							|  |  |  |  |     save_json(emoji_dict, base_dir + '/emoji/default_emoji.json') | 
					
						
							|  |  |  |  |     print(str(added) + ' custom emoji added') | 
					
						
							| 
									
										
										
										
											2022-04-10 19:19:40 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | def content_diff(content: str, prev_content: str) -> str: | 
					
						
							|  |  |  |  |     """Returns a diff for the given content
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2022-05-30 21:41:18 +00:00
										 |  |  |  |     cdiff = difflib.Differ() | 
					
						
							| 
									
										
										
										
											2022-04-10 19:19:40 +00:00
										 |  |  |  |     text1_lines = content.splitlines() | 
					
						
							| 
									
										
										
										
											2024-12-23 15:39:55 +00:00
										 |  |  |  |     text1_sentences: list[str] = [] | 
					
						
							| 
									
										
										
										
											2022-04-11 12:13:04 +00:00
										 |  |  |  |     for line in text1_lines: | 
					
						
							|  |  |  |  |         sentences = line.split('.') | 
					
						
							|  |  |  |  |         for sentence in sentences: | 
					
						
							|  |  |  |  |             text1_sentences.append(sentence.strip()) | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-04-10 19:19:40 +00:00
										 |  |  |  |     text2_lines = prev_content.splitlines() | 
					
						
							| 
									
										
										
										
											2024-12-23 15:39:55 +00:00
										 |  |  |  |     text2_sentences: list[str] = [] | 
					
						
							| 
									
										
										
										
											2022-04-11 12:13:04 +00:00
										 |  |  |  |     for line in text2_lines: | 
					
						
							|  |  |  |  |         sentences = line.split('.') | 
					
						
							|  |  |  |  |         for sentence in sentences: | 
					
						
							|  |  |  |  |             text2_sentences.append(sentence.strip()) | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-05-30 21:41:18 +00:00
										 |  |  |  |     diff = cdiff.compare(text1_sentences, text2_sentences) | 
					
						
							| 
									
										
										
										
											2022-04-10 19:19:40 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     diff_text = '' | 
					
						
							|  |  |  |  |     for line in diff: | 
					
						
							|  |  |  |  |         if line.startswith('- '): | 
					
						
							| 
									
										
										
										
											2022-04-11 12:13:04 +00:00
										 |  |  |  |             if not diff_text: | 
					
						
							|  |  |  |  |                 diff_text = '<p>' | 
					
						
							|  |  |  |  |             else: | 
					
						
							|  |  |  |  |                 diff_text += '<br>' | 
					
						
							|  |  |  |  |             diff_text += '<label class="diff_add">+ ' + line[2:] + '</label>' | 
					
						
							|  |  |  |  |         elif line.startswith('+ '): | 
					
						
							|  |  |  |  |             if not diff_text: | 
					
						
							|  |  |  |  |                 diff_text = '<p>' | 
					
						
							|  |  |  |  |             else: | 
					
						
							|  |  |  |  |                 diff_text += '<br>' | 
					
						
							|  |  |  |  |             diff_text += \ | 
					
						
							|  |  |  |  |                 '<label class="diff_remove">- ' + line[2:] + '</label>' | 
					
						
							|  |  |  |  |     if diff_text: | 
					
						
							|  |  |  |  |         diff_text += '</p>' | 
					
						
							|  |  |  |  |     return diff_text | 
					
						
							| 
									
										
										
										
											2022-04-10 22:50:44 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | def create_edits_html(edits_json: {}, post_json_object: {}, | 
					
						
							| 
									
										
										
										
											2022-04-13 14:20:38 +00:00
										 |  |  |  |                       translate: {}, timezone: str, | 
					
						
							| 
									
										
										
										
											2023-03-20 18:04:38 +00:00
										 |  |  |  |                       system_language: str, | 
					
						
							|  |  |  |  |                       languages_understood: []) -> str: | 
					
						
							| 
									
										
										
										
											2022-04-10 22:50:44 +00:00
										 |  |  |  |     """ Creates html showing historical edits made to a post
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     if not edits_json: | 
					
						
							|  |  |  |  |         return '' | 
					
						
							|  |  |  |  |     if not has_object_dict(post_json_object): | 
					
						
							|  |  |  |  |         return '' | 
					
						
							| 
									
										
										
										
											2023-01-08 22:23:02 +00:00
										 |  |  |  |     if 'content' not in post_json_object['object']: | 
					
						
							|  |  |  |  |         if 'contentMap' not in post_json_object['object']: | 
					
						
							| 
									
										
										
										
											2022-04-13 14:20:38 +00:00
										 |  |  |  |             return '' | 
					
						
							| 
									
										
										
										
											2024-12-23 15:39:55 +00:00
										 |  |  |  |     edit_dates_list: list[str] = [] | 
					
						
							| 
									
										
										
										
											2022-05-30 21:41:18 +00:00
										 |  |  |  |     for modified, _ in edits_json.items(): | 
					
						
							| 
									
										
										
										
											2022-04-10 22:50:44 +00:00
										 |  |  |  |         edit_dates_list.append(modified) | 
					
						
							|  |  |  |  |     edit_dates_list.sort(reverse=True) | 
					
						
							|  |  |  |  |     edits_str = '' | 
					
						
							| 
									
										
										
										
											2023-03-20 18:04:38 +00:00
										 |  |  |  |     content = get_content_from_post(post_json_object, system_language, | 
					
						
							| 
									
										
										
										
											2024-02-19 20:54:46 +00:00
										 |  |  |  |                                     languages_understood, "content") | 
					
						
							| 
									
										
										
										
											2023-03-20 18:04:38 +00:00
										 |  |  |  |     if not content: | 
					
						
							| 
									
										
										
										
											2022-04-13 14:20:38 +00:00
										 |  |  |  |         return '' | 
					
						
							|  |  |  |  |     content = remove_html(content) | 
					
						
							| 
									
										
										
										
											2022-04-10 22:50:44 +00:00
										 |  |  |  |     for modified in edit_dates_list: | 
					
						
							|  |  |  |  |         prev_json = edits_json[modified] | 
					
						
							|  |  |  |  |         if not has_object_dict(prev_json): | 
					
						
							|  |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2023-03-20 18:04:38 +00:00
										 |  |  |  |         prev_content = get_content_from_post(prev_json, system_language, | 
					
						
							| 
									
										
										
										
											2024-02-19 20:54:46 +00:00
										 |  |  |  |                                              languages_understood, "content") | 
					
						
							| 
									
										
										
										
											2023-03-20 18:04:38 +00:00
										 |  |  |  |         if not prev_content: | 
					
						
							| 
									
										
										
										
											2022-04-10 22:50:44 +00:00
										 |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2022-04-13 14:20:38 +00:00
										 |  |  |  |         prev_content = remove_html(prev_content) | 
					
						
							| 
									
										
										
										
											2022-04-10 22:50:44 +00:00
										 |  |  |  |         if content == prev_content: | 
					
						
							|  |  |  |  |             continue | 
					
						
							|  |  |  |  |         diff = content_diff(content, prev_content) | 
					
						
							|  |  |  |  |         if not diff: | 
					
						
							|  |  |  |  |             continue | 
					
						
							|  |  |  |  |         diff = diff.replace('\n', '</p><p>') | 
					
						
							|  |  |  |  |         # convert to local time | 
					
						
							|  |  |  |  |         datetime_object = parse(modified) | 
					
						
							|  |  |  |  |         datetime_object = \ | 
					
						
							|  |  |  |  |             convert_published_to_local_timezone(datetime_object, timezone) | 
					
						
							|  |  |  |  |         modified_str = datetime_object.strftime("%a %b %d, %H:%M") | 
					
						
							| 
									
										
										
										
											2022-04-11 12:13:04 +00:00
										 |  |  |  |         diff = '<p><b>' + modified_str + '</b></p>' + diff | 
					
						
							| 
									
										
										
										
											2022-04-10 22:50:44 +00:00
										 |  |  |  |         edits_str += diff | 
					
						
							|  |  |  |  |         content = prev_content | 
					
						
							|  |  |  |  |     if not edits_str: | 
					
						
							|  |  |  |  |         return '' | 
					
						
							| 
									
										
										
										
											2022-06-10 16:00:55 +00:00
										 |  |  |  |     return '<details><summary class="cw" tabindex="10">' + \ | 
					
						
							| 
									
										
										
										
											2022-04-10 22:50:44 +00:00
										 |  |  |  |         translate['SHOW EDITS'] + '</summary>' + \ | 
					
						
							| 
									
										
										
										
											2022-04-11 12:13:04 +00:00
										 |  |  |  |         edits_str + '</details>' | 
					
						
							| 
									
										
										
										
											2022-05-26 09:08:02 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-05-26 12:17:56 +00:00
										 |  |  |  | def remove_script(content: str, log_filename: str, | 
					
						
							|  |  |  |  |                   actor: str, url: str) -> str: | 
					
						
							| 
									
										
										
										
											2022-05-26 09:08:02 +00:00
										 |  |  |  |     """Removes <script> from some content
 | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     separators = [['<', '>'], ['<', '>']] | 
					
						
							|  |  |  |  |     for sep in separators: | 
					
						
							|  |  |  |  |         prefix = sep[0] + 'script' | 
					
						
							|  |  |  |  |         ending = '/script' + sep[1] | 
					
						
							| 
									
										
										
										
											2022-06-01 14:26:50 +00:00
										 |  |  |  |         if prefix not in content: | 
					
						
							|  |  |  |  |             continue | 
					
						
							|  |  |  |  |         sections = content.split(prefix) | 
					
						
							|  |  |  |  |         ctr = 0 | 
					
						
							|  |  |  |  |         for text in sections: | 
					
						
							|  |  |  |  |             if ctr == 0: | 
					
						
							|  |  |  |  |                 ctr += 1 | 
					
						
							|  |  |  |  |                 continue | 
					
						
							|  |  |  |  |             if ending not in text: | 
					
						
							|  |  |  |  |                 if '/' + sep[1] not in text: | 
					
						
							| 
									
										
										
										
											2022-05-26 09:08:02 +00:00
										 |  |  |  |                     continue | 
					
						
							| 
									
										
										
										
											2022-06-01 14:26:50 +00:00
										 |  |  |  |             if ending in text: | 
					
						
							|  |  |  |  |                 text = prefix + text.split(ending)[0] + ending | 
					
						
							|  |  |  |  |             else: | 
					
						
							|  |  |  |  |                 text = prefix + text.split('/' + sep[1])[0] + '/' + sep[1] | 
					
						
							|  |  |  |  |                 if log_filename and actor: | 
					
						
							|  |  |  |  |                     # write the detected script to a log file | 
					
						
							|  |  |  |  |                     log_str = actor + ' ' + url + ' ' + text + '\n' | 
					
						
							|  |  |  |  |                     write_type = 'a+' | 
					
						
							|  |  |  |  |                     if os.path.isfile(log_filename): | 
					
						
							|  |  |  |  |                         write_type = 'w+' | 
					
						
							|  |  |  |  |                     try: | 
					
						
							| 
									
										
										
										
											2022-06-09 14:46:30 +00:00
										 |  |  |  |                         with open(log_filename, write_type, | 
					
						
							|  |  |  |  |                                   encoding='utf-8') as fp_log: | 
					
						
							| 
									
										
										
										
											2022-06-01 14:26:50 +00:00
										 |  |  |  |                             fp_log.write(log_str) | 
					
						
							|  |  |  |  |                     except OSError: | 
					
						
							|  |  |  |  |                         print('EX: cannot append to svg script log') | 
					
						
							|  |  |  |  |             content = content.replace(text, '') | 
					
						
							| 
									
										
										
										
											2022-05-26 09:08:02 +00:00
										 |  |  |  |     return content | 
					
						
							| 
									
										
										
										
											2022-07-27 09:13:30 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | def reject_twitter_summary(base_dir: str, nickname: str, domain: str, | 
					
						
							|  |  |  |  |                            summary: str) -> bool: | 
					
						
							|  |  |  |  |     """Returns true if the post should be rejected due to twitter
 | 
					
						
							|  |  |  |  |     existing within the summary | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     if not summary: | 
					
						
							|  |  |  |  |         return False | 
					
						
							|  |  |  |  |     remove_twitter = \ | 
					
						
							|  |  |  |  |         acct_dir(base_dir, nickname, domain) + '/.removeTwitter' | 
					
						
							|  |  |  |  |     if not os.path.isfile(remove_twitter): | 
					
						
							|  |  |  |  |         return False | 
					
						
							|  |  |  |  |     summary_lower = summary.lower() | 
					
						
							| 
									
										
										
										
											2024-08-02 11:16:57 +00:00
										 |  |  |  |     twitter_strings = ('twitter', '/x.com', ' x.com', 'birdsite') | 
					
						
							|  |  |  |  |     if string_contains(summary_lower, twitter_strings): | 
					
						
							| 
									
										
										
										
											2022-07-27 09:13:30 +00:00
										 |  |  |  |         return True | 
					
						
							|  |  |  |  |     return False | 
					
						
							| 
									
										
										
										
											2022-12-08 14:13:15 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | def add_name_emojis_to_tags(base_dir: str, http_prefix: str, | 
					
						
							|  |  |  |  |                             domain: str, port: int, | 
					
						
							|  |  |  |  |                             actor_json: {}) -> None: | 
					
						
							|  |  |  |  |     """Add any custom emojis within the name of an actor to
 | 
					
						
							|  |  |  |  |     the tag list | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     if not actor_json.get('name'): | 
					
						
							|  |  |  |  |         return | 
					
						
							|  |  |  |  |     name = actor_json['name'] | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     # does the name contain an emoji? | 
					
						
							|  |  |  |  |     if ':' not in name: | 
					
						
							|  |  |  |  |         return | 
					
						
							|  |  |  |  |     if ':' not in name.split(':', 1)[1]: | 
					
						
							|  |  |  |  |         return | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     # get emojis from the actor name | 
					
						
							|  |  |  |  |     words = name.split(' ') | 
					
						
							| 
									
										
										
										
											2024-12-23 15:39:55 +00:00
										 |  |  |  |     emojis: list[str] = [] | 
					
						
							| 
									
										
										
										
											2022-12-08 14:13:15 +00:00
										 |  |  |  |     for wrd in words: | 
					
						
							|  |  |  |  |         if wrd.startswith(':') and wrd.endswith(':'): | 
					
						
							|  |  |  |  |             if wrd not in emojis: | 
					
						
							|  |  |  |  |                 emojis.append(wrd) | 
					
						
							|  |  |  |  |     if not emojis: | 
					
						
							|  |  |  |  |         return | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-12-23 15:39:55 +00:00
										 |  |  |  |     actor_tags: list[dict] = [] | 
					
						
							| 
									
										
										
										
											2022-12-08 14:13:15 +00:00
										 |  |  |  |     if actor_json.get('tag'): | 
					
						
							|  |  |  |  |         actor_tags = actor_json['tag'] | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     # is the emoji already in the tag list? | 
					
						
							|  |  |  |  |     for tag_dict in actor_tags: | 
					
						
							|  |  |  |  |         if not tag_dict.get('type'): | 
					
						
							|  |  |  |  |             continue | 
					
						
							|  |  |  |  |         if tag_dict['type'] != 'Emoji': | 
					
						
							|  |  |  |  |             continue | 
					
						
							|  |  |  |  |         if not tag_dict.get('name'): | 
					
						
							|  |  |  |  |             continue | 
					
						
							|  |  |  |  |         if not tag_dict['name'].startswith(':'): | 
					
						
							|  |  |  |  |             continue | 
					
						
							|  |  |  |  |         if not tag_dict['name'].endswith(':'): | 
					
						
							|  |  |  |  |             continue | 
					
						
							|  |  |  |  |         if tag_dict['name'] in emojis: | 
					
						
							|  |  |  |  |             emojis.remove(tag_dict['name']) | 
					
						
							|  |  |  |  |     if not emojis: | 
					
						
							|  |  |  |  |         return | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     domain_full = get_full_domain(domain, port) | 
					
						
							|  |  |  |  |     for emoji_tag_name in emojis: | 
					
						
							|  |  |  |  |         emoji_name = emoji_tag_name.replace(':', '') | 
					
						
							|  |  |  |  |         emoji_id = \ | 
					
						
							|  |  |  |  |             http_prefix + '://' + domain_full + '/emoji/' + \ | 
					
						
							|  |  |  |  |             emoji_name | 
					
						
							|  |  |  |  |         url = emoji_id + '.png' | 
					
						
							|  |  |  |  |         emoji_filename = base_dir + '/emoji/' + emoji_name + '.png' | 
					
						
							|  |  |  |  |         updated = None | 
					
						
							|  |  |  |  |         if os.path.isfile(emoji_filename): | 
					
						
							|  |  |  |  |             updated = file_last_modified(emoji_filename) | 
					
						
							|  |  |  |  |         new_tag = { | 
					
						
							|  |  |  |  |             'icon': { | 
					
						
							|  |  |  |  |                 'mediaType': 'image/png', | 
					
						
							|  |  |  |  |                 'type': 'Image', | 
					
						
							|  |  |  |  |                 'url': url | 
					
						
							|  |  |  |  |             }, | 
					
						
							|  |  |  |  |             'id': emoji_id, | 
					
						
							|  |  |  |  |             'name': emoji_tag_name, | 
					
						
							|  |  |  |  |             'type': 'Emoji', | 
					
						
							|  |  |  |  |             'updated': '2022-11-15T23:45:42Z' | 
					
						
							|  |  |  |  |         } | 
					
						
							|  |  |  |  |         if updated: | 
					
						
							|  |  |  |  |             new_tag['updated'] = updated | 
					
						
							|  |  |  |  |         actor_json['tag'].append(new_tag) | 
					
						
							| 
									
										
										
										
											2023-09-20 12:23:45 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | def format_mixed_right_to_left(content: str, | 
					
						
							|  |  |  |  |                                language: str) -> str: | 
					
						
							|  |  |  |  |     """Adds RTL direction formatting for non-RTL language
 | 
					
						
							|  |  |  |  |     eg. where some paragraphs are English and others are Arabic | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     # not a RTL language | 
					
						
							|  |  |  |  |     if language_right_to_left(language): | 
					
						
							|  |  |  |  |         return content | 
					
						
							|  |  |  |  |     result = '' | 
					
						
							|  |  |  |  |     changed = False | 
					
						
							| 
									
										
										
										
											2023-09-20 12:50:29 +00:00
										 |  |  |  |     paragraphs = content.split('<p>') | 
					
						
							| 
									
										
										
										
											2023-09-20 12:23:45 +00:00
										 |  |  |  |     for text_html in paragraphs: | 
					
						
							|  |  |  |  |         if '</p>' not in text_html: | 
					
						
							|  |  |  |  |             continue | 
					
						
							|  |  |  |  |         text_html = '<p>' + text_html | 
					
						
							|  |  |  |  |         text_plain = remove_html(text_html) | 
					
						
							|  |  |  |  |         if is_right_to_left_text(text_plain): | 
					
						
							|  |  |  |  |             text_html = text_html.replace('<p>', '<p><div dir="rtl">', 1) | 
					
						
							|  |  |  |  |             text_html = text_html.replace('</p>', '</div></p>', 1) | 
					
						
							|  |  |  |  |             changed = True | 
					
						
							|  |  |  |  |         result += text_html | 
					
						
							| 
									
										
										
										
											2023-09-20 12:50:29 +00:00
										 |  |  |  |     if not changed: | 
					
						
							| 
									
										
										
										
											2023-09-20 12:55:18 +00:00
										 |  |  |  |         result = '' | 
					
						
							| 
									
										
										
										
											2023-09-20 13:00:36 +00:00
										 |  |  |  |         prev_distilled = '' | 
					
						
							|  |  |  |  |         distilled = content | 
					
						
							|  |  |  |  |         while prev_distilled != distilled: | 
					
						
							|  |  |  |  |             prev_distilled = distilled | 
					
						
							|  |  |  |  |             distilled = distilled.replace('<br><br><br>', '<br><br>') | 
					
						
							|  |  |  |  |         paragraphs = distilled.split('<br><br>') | 
					
						
							| 
									
										
										
										
											2023-09-20 12:50:29 +00:00
										 |  |  |  |         ctr = 0 | 
					
						
							|  |  |  |  |         for text_html in paragraphs: | 
					
						
							|  |  |  |  |             ctr += 1 | 
					
						
							|  |  |  |  |             if ctr < len(paragraphs): | 
					
						
							|  |  |  |  |                 text_html += '<br><br>' | 
					
						
							|  |  |  |  |             text_plain = remove_html(text_html) | 
					
						
							|  |  |  |  |             if is_right_to_left_text(text_plain): | 
					
						
							|  |  |  |  |                 text_html = '<div dir="rtl">' + text_html | 
					
						
							|  |  |  |  |                 if ctr < len(paragraphs): | 
					
						
							|  |  |  |  |                     text_html = \ | 
					
						
							|  |  |  |  |                         text_html.replace('<br><br>', '</div><br><br>', 1) | 
					
						
							|  |  |  |  |                 else: | 
					
						
							|  |  |  |  |                     text_html += '</div>' | 
					
						
							|  |  |  |  |                 changed = True | 
					
						
							|  |  |  |  |             result += text_html | 
					
						
							| 
									
										
										
										
											2023-09-20 12:23:45 +00:00
										 |  |  |  |     if not changed: | 
					
						
							|  |  |  |  |         return content | 
					
						
							|  |  |  |  |     return result | 
					
						
							| 
									
										
										
										
											2024-01-17 11:53:56 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | def _load_auto_cw(base_dir: str, nickname: str, domain: str) -> []: | 
					
						
							|  |  |  |  |     """Loads automatic CWs file and returns a list containing
 | 
					
						
							|  |  |  |  |     the lines of the file | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     auto_cw_filename = acct_dir(base_dir, nickname, domain) + '/autocw.txt' | 
					
						
							|  |  |  |  |     if not os.path.isfile(auto_cw_filename): | 
					
						
							|  |  |  |  |         return [] | 
					
						
							|  |  |  |  |     try: | 
					
						
							|  |  |  |  |         with open(auto_cw_filename, 'r', encoding='utf-8') as fp_auto: | 
					
						
							| 
									
										
										
										
											2024-01-17 20:02:35 +00:00
										 |  |  |  |             return fp_auto.read().split('\n') | 
					
						
							| 
									
										
										
										
											2024-01-17 11:53:56 +00:00
										 |  |  |  |     except OSError: | 
					
						
							|  |  |  |  |         print('EX: unable to load auto cw file ' + auto_cw_filename) | 
					
						
							|  |  |  |  |     return [] | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-18 14:03:35 +00:00
										 |  |  |  | def load_auto_cw_cache(base_dir: str) -> {}: | 
					
						
							|  |  |  |  |     """Returns a dictionary containing the automatic content warning lists
 | 
					
						
							|  |  |  |  |     for each account | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     auto_cw_cache = {} | 
					
						
							| 
									
										
										
										
											2024-05-12 12:35:26 +00:00
										 |  |  |  |     dir_str = data_dir(base_dir) | 
					
						
							|  |  |  |  |     for _, dirs, _ in os.walk(dir_str): | 
					
						
							| 
									
										
										
										
											2024-01-18 14:03:35 +00:00
										 |  |  |  |         for handle in dirs: | 
					
						
							|  |  |  |  |             if not is_account_dir(handle): | 
					
						
							|  |  |  |  |                 continue | 
					
						
							|  |  |  |  |             nickname = handle.split('@')[0] | 
					
						
							|  |  |  |  |             domain = handle.split('@')[1] | 
					
						
							|  |  |  |  |             auto_cw_cache[nickname] = _load_auto_cw(base_dir, nickname, domain) | 
					
						
							|  |  |  |  |         break | 
					
						
							|  |  |  |  |     return auto_cw_cache | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-17 11:53:56 +00:00
										 |  |  |  | def add_auto_cw(base_dir: str, nickname: str, domain: str, | 
					
						
							| 
									
										
										
										
											2024-01-18 13:27:22 +00:00
										 |  |  |  |                 subject: str, content: str, | 
					
						
							|  |  |  |  |                 auto_cw_cache: {}) -> str: | 
					
						
							| 
									
										
										
										
											2024-01-18 14:03:35 +00:00
										 |  |  |  |     """Appends any automatic content warnings to the subject line
 | 
					
						
							| 
									
										
										
										
											2024-01-17 11:53:56 +00:00
										 |  |  |  |     and returns the new subject line | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     new_subject = subject | 
					
						
							| 
									
										
										
										
											2024-01-18 13:27:22 +00:00
										 |  |  |  |     if auto_cw_cache.get(nickname): | 
					
						
							|  |  |  |  |         auto_cw_list = auto_cw_cache[nickname] | 
					
						
							|  |  |  |  |     else: | 
					
						
							|  |  |  |  |         auto_cw_list = _load_auto_cw(base_dir, nickname, domain) | 
					
						
							|  |  |  |  |         auto_cw_cache[nickname] = auto_cw_list | 
					
						
							| 
									
										
										
										
											2024-01-17 11:53:56 +00:00
										 |  |  |  |     for cw_rule in auto_cw_list: | 
					
						
							|  |  |  |  |         if '->' not in cw_rule: | 
					
						
							|  |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2024-01-17 20:02:35 +00:00
										 |  |  |  |         sections = cw_rule.split('->') | 
					
						
							|  |  |  |  |         rulematch = sections[0].strip() | 
					
						
							| 
									
										
										
										
											2024-01-17 11:53:56 +00:00
										 |  |  |  |         if rulematch not in content: | 
					
						
							|  |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2024-01-17 20:02:35 +00:00
										 |  |  |  |         cw_str = sections[1].strip() | 
					
						
							|  |  |  |  |         if not cw_str: | 
					
						
							|  |  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2024-01-17 11:53:56 +00:00
										 |  |  |  |         if new_subject: | 
					
						
							| 
									
										
										
										
											2024-07-14 14:04:09 +00:00
										 |  |  |  |             if cw_str not in new_subject and \ | 
					
						
							|  |  |  |  |                cw_str.title() not in new_subject: | 
					
						
							| 
									
										
										
										
											2024-01-17 11:53:56 +00:00
										 |  |  |  |                 new_subject += ', ' + cw_str | 
					
						
							|  |  |  |  |         else: | 
					
						
							|  |  |  |  |             new_subject = cw_str | 
					
						
							|  |  |  |  |     return new_subject |