diff --git a/conversation.py b/conversation.py index 9802f1ec8..c42ef2293 100644 --- a/conversation.py +++ b/conversation.py @@ -14,6 +14,7 @@ from utils import remove_id_ending from utils import text_in_file from utils import locate_post from utils import load_json +from utils import harmless_markup from keys import get_instance_actor_key from session import get_json @@ -170,6 +171,10 @@ def download_conversation_posts(session, http_prefix: str, base_dir: str, post_json = wrapped_post if not post_json['object'].get('published'): break + + # render harmless any dangerous markup + harmless_markup(post_json) + conversation_view = [post_json] + conversation_view if not post_json['object'].get('inReplyTo'): if debug: diff --git a/desktop_client.py b/desktop_client.py index f1e8a4326..a2d4cad96 100644 --- a/desktop_client.py +++ b/desktop_client.py @@ -16,6 +16,7 @@ import webbrowser import urllib.parse from pathlib import Path from random import randint +from utils import remove_html from utils import safe_system_string from utils import text_in_file from utils import disallow_announce @@ -25,7 +26,6 @@ from utils import has_object_dict from utils import get_full_domain from utils import is_dm from utils import load_translations_from_file -from utils import remove_html from utils import get_nickname_from_actor from utils import get_domain_from_actor from utils import is_pgp_encrypted @@ -682,6 +682,7 @@ def _get_image_description(post_json_object: {}) -> str: message_str = img['name'] if message_str: message_str = message_str.strip() + message_str = remove_html(message_str) if not message_str.endswith('.'): image_description += message_str + '. ' else: diff --git a/epicyon.py b/epicyon.py index 7f04ccdfa..9bb1c7126 100644 --- a/epicyon.py +++ b/epicyon.py @@ -72,6 +72,7 @@ from tests import test_update_actor from tests import run_all_tests from auth import store_basic_credentials from auth import create_password +from utils import remove_html from utils import remove_eol from utils import text_in_file from utils import remove_domain_port @@ -1665,7 +1666,7 @@ def _command_options() -> None: cc_url = None send_message = argb.message # client_to_server = argb.client - attached_image_description = argb.imageDescription + attached_image_description = remove_html(argb.imageDescription) city = 'London, England' send_threads = [] post_log = [] diff --git a/speaker.py b/speaker.py index 3d4cc34cd..dc9e55235 100644 --- a/speaker.py +++ b/speaker.py @@ -473,7 +473,7 @@ def _post_to_speaker_json(base_dir: str, http_prefix: str, if img.get('name'): if isinstance(img['name'], str): image_description += \ - img['name'] + '. ' + remove_html(img['name']) + '. ' is_direct = is_dm(post_json_object) actor = local_actor_url(http_prefix, nickname, domain_full) diff --git a/utils.py b/utils.py index 3b8751a05..7b29e6970 100644 --- a/utils.py +++ b/utils.py @@ -4257,3 +4257,22 @@ def unescaped_text(txt: str) -> str: for orig, replacement in _get_escaped_chars().items(): txt = txt.replace(replacement, orig) return txt + + +def harmless_markup(post_json_object: {}) -> None: + """render harmless any dangerous markup + """ + for field_name in ('content', 'summary'): + if post_json_object['object'].get(field_name): + if dangerous_markup(post_json_object['object'][field_name], + False): + post_json_object['object'][field_name] = \ + remove_html(post_json_object['object'][field_name]) + map_name = field_name + 'Map' + if post_json_object['object'].get(map_name): + map_dict = post_json_object['object'][map_name].items() + for lang, content in map_dict: + if dangerous_markup(content, False): + content = remove_html(content) + post_json_object['object'][map_name][lang] = \ + content diff --git a/webapp_search.py b/webapp_search.py index 86dbf3de5..b75cca5ff 100644 --- a/webapp_search.py +++ b/webapp_search.py @@ -11,6 +11,7 @@ import os from shutil import copyfile import urllib.parse from datetime import datetime +from utils import harmless_markup from utils import remove_id_ending from utils import has_object_dict from utils import acct_handle_dir @@ -1191,6 +1192,10 @@ def html_hashtag_search_remote(nickname: str, domain: str, port: int, if not is_public_post(post_json_object): print('Hashtag post is not public ' + post_id) continue + + # render harmless any dangerous markup + harmless_markup(post_json_object) + show_individual_post_icons = False allow_deletion = False show_repeats = show_individual_post_icons diff --git a/webapp_utils.py b/webapp_utils.py index 438548bca..984e8c78a 100644 --- a/webapp_utils.py +++ b/webapp_utils.py @@ -1238,6 +1238,7 @@ def get_post_attachments_as_html(base_dir: str, image_description = '' if attach.get('name'): image_description = attach['name'].replace('"', "'") + image_description = remove_html(image_description) if _is_image_mime_type(media_type): image_url = attach['url']