From cf8505c85a2b4c564c6f45e741dd29062f67dc3b Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 7 Jan 2023 11:07:34 +0000 Subject: [PATCH 1/4] Remove any dangerous markup from conversation view Here you may be reading posts from arbitrary untrusted instances --- conversation.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/conversation.py b/conversation.py index 9802f1ec8..f8bafb94c 100644 --- a/conversation.py +++ b/conversation.py @@ -14,6 +14,8 @@ from utils import remove_id_ending from utils import text_in_file from utils import locate_post from utils import load_json +from utils import dangerous_markup +from utils import remove_html from keys import get_instance_actor_key from session import get_json @@ -170,6 +172,20 @@ def download_conversation_posts(session, http_prefix: str, base_dir: str, post_json = wrapped_post if not post_json['object'].get('published'): break + + # remove any dangerous markup + for field_name in ('content', 'summary'): + if post_json['object'].get(field_name): + if dangerous_markup(post_json['object'][field_name], False): + post_json['object'][field_name] = \ + remove_html(post_json['object'][field_name]) + if post_json['object'].get(field_name + 'Map'): + map_dict = post_json['object'][field_name + 'Map'].items() + for lang, content in map_dict: + if dangerous_markup(content, False): + content = remove_html(content) + post_json['object'][field_name + 'Map'][lang] = content + conversation_view = [post_json] + conversation_view if not post_json['object'].get('inReplyTo'): if debug: From 179f4341ee87f80bc2856d3993db2c2692d28c0c Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 7 Jan 2023 11:19:57 +0000 Subject: [PATCH 2/4] Remove dangerous markup from remote hashtag posts --- conversation.py | 7 ++++--- webapp_search.py | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/conversation.py b/conversation.py index f8bafb94c..58b79ac94 100644 --- a/conversation.py +++ b/conversation.py @@ -179,12 +179,13 @@ def download_conversation_posts(session, http_prefix: str, base_dir: str, if dangerous_markup(post_json['object'][field_name], False): post_json['object'][field_name] = \ remove_html(post_json['object'][field_name]) - if post_json['object'].get(field_name + 'Map'): - map_dict = post_json['object'][field_name + 'Map'].items() + map_name = field_name + 'Map' + if post_json['object'].get(map_name): + map_dict = post_json['object'][map_name].items() for lang, content in map_dict: if dangerous_markup(content, False): content = remove_html(content) - post_json['object'][field_name + 'Map'][lang] = content + post_json['object'][map_name][lang] = content conversation_view = [post_json] + conversation_view if not post_json['object'].get('inReplyTo'): diff --git a/webapp_search.py b/webapp_search.py index 86dbf3de5..35d721e9a 100644 --- a/webapp_search.py +++ b/webapp_search.py @@ -11,6 +11,8 @@ import os from shutil import copyfile import urllib.parse from datetime import datetime +from utils import dangerous_markup +from utils import remove_html from utils import remove_id_ending from utils import has_object_dict from utils import acct_handle_dir @@ -1191,6 +1193,23 @@ def html_hashtag_search_remote(nickname: str, domain: str, port: int, if not is_public_post(post_json_object): print('Hashtag post is not public ' + post_id) continue + + # remove any dangerous markup + for field_name in ('content', 'summary'): + if post_json_object['object'].get(field_name): + if dangerous_markup(post_json_object['object'][field_name], + False): + post_json_object['object'][field_name] = \ + remove_html(post_json_object['object'][field_name]) + map_name = field_name + 'Map' + if post_json_object['object'].get(map_name): + map_dict = post_json_object['object'][map_name].items() + for lang, content in map_dict: + if dangerous_markup(content, False): + content = remove_html(content) + post_json_object['object'][map_name][lang] = \ + content + show_individual_post_icons = False allow_deletion = False show_repeats = show_individual_post_icons From 1bc5f2515ec00a815ae3258fa532dcedb714e2a9 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 7 Jan 2023 11:45:19 +0000 Subject: [PATCH 3/4] Tidying --- conversation.py | 18 +++--------------- utils.py | 19 +++++++++++++++++++ webapp_search.py | 20 +++----------------- 3 files changed, 25 insertions(+), 32 deletions(-) diff --git a/conversation.py b/conversation.py index 58b79ac94..c42ef2293 100644 --- a/conversation.py +++ b/conversation.py @@ -14,8 +14,7 @@ from utils import remove_id_ending from utils import text_in_file from utils import locate_post from utils import load_json -from utils import dangerous_markup -from utils import remove_html +from utils import harmless_markup from keys import get_instance_actor_key from session import get_json @@ -173,19 +172,8 @@ def download_conversation_posts(session, http_prefix: str, base_dir: str, if not post_json['object'].get('published'): break - # remove any dangerous markup - for field_name in ('content', 'summary'): - if post_json['object'].get(field_name): - if dangerous_markup(post_json['object'][field_name], False): - post_json['object'][field_name] = \ - remove_html(post_json['object'][field_name]) - map_name = field_name + 'Map' - if post_json['object'].get(map_name): - map_dict = post_json['object'][map_name].items() - for lang, content in map_dict: - if dangerous_markup(content, False): - content = remove_html(content) - post_json['object'][map_name][lang] = content + # render harmless any dangerous markup + harmless_markup(post_json) conversation_view = [post_json] + conversation_view if not post_json['object'].get('inReplyTo'): diff --git a/utils.py b/utils.py index 3b8751a05..7b29e6970 100644 --- a/utils.py +++ b/utils.py @@ -4257,3 +4257,22 @@ def unescaped_text(txt: str) -> str: for orig, replacement in _get_escaped_chars().items(): txt = txt.replace(replacement, orig) return txt + + +def harmless_markup(post_json_object: {}) -> None: + """render harmless any dangerous markup + """ + for field_name in ('content', 'summary'): + if post_json_object['object'].get(field_name): + if dangerous_markup(post_json_object['object'][field_name], + False): + post_json_object['object'][field_name] = \ + remove_html(post_json_object['object'][field_name]) + map_name = field_name + 'Map' + if post_json_object['object'].get(map_name): + map_dict = post_json_object['object'][map_name].items() + for lang, content in map_dict: + if dangerous_markup(content, False): + content = remove_html(content) + post_json_object['object'][map_name][lang] = \ + content diff --git a/webapp_search.py b/webapp_search.py index 35d721e9a..b75cca5ff 100644 --- a/webapp_search.py +++ b/webapp_search.py @@ -11,8 +11,7 @@ import os from shutil import copyfile import urllib.parse from datetime import datetime -from utils import dangerous_markup -from utils import remove_html +from utils import harmless_markup from utils import remove_id_ending from utils import has_object_dict from utils import acct_handle_dir @@ -1194,21 +1193,8 @@ def html_hashtag_search_remote(nickname: str, domain: str, port: int, print('Hashtag post is not public ' + post_id) continue - # remove any dangerous markup - for field_name in ('content', 'summary'): - if post_json_object['object'].get(field_name): - if dangerous_markup(post_json_object['object'][field_name], - False): - post_json_object['object'][field_name] = \ - remove_html(post_json_object['object'][field_name]) - map_name = field_name + 'Map' - if post_json_object['object'].get(map_name): - map_dict = post_json_object['object'][map_name].items() - for lang, content in map_dict: - if dangerous_markup(content, False): - content = remove_html(content) - post_json_object['object'][map_name][lang] = \ - content + # render harmless any dangerous markup + harmless_markup(post_json_object) show_individual_post_icons = False allow_deletion = False From 7657fe5e43b674316faf65739c24d35844389155 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 7 Jan 2023 13:56:52 +0000 Subject: [PATCH 4/4] Remove any html from image descriptions --- desktop_client.py | 3 ++- epicyon.py | 3 ++- speaker.py | 2 +- webapp_utils.py | 1 + 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/desktop_client.py b/desktop_client.py index f1e8a4326..a2d4cad96 100644 --- a/desktop_client.py +++ b/desktop_client.py @@ -16,6 +16,7 @@ import webbrowser import urllib.parse from pathlib import Path from random import randint +from utils import remove_html from utils import safe_system_string from utils import text_in_file from utils import disallow_announce @@ -25,7 +26,6 @@ from utils import has_object_dict from utils import get_full_domain from utils import is_dm from utils import load_translations_from_file -from utils import remove_html from utils import get_nickname_from_actor from utils import get_domain_from_actor from utils import is_pgp_encrypted @@ -682,6 +682,7 @@ def _get_image_description(post_json_object: {}) -> str: message_str = img['name'] if message_str: message_str = message_str.strip() + message_str = remove_html(message_str) if not message_str.endswith('.'): image_description += message_str + '. ' else: diff --git a/epicyon.py b/epicyon.py index 7f04ccdfa..9bb1c7126 100644 --- a/epicyon.py +++ b/epicyon.py @@ -72,6 +72,7 @@ from tests import test_update_actor from tests import run_all_tests from auth import store_basic_credentials from auth import create_password +from utils import remove_html from utils import remove_eol from utils import text_in_file from utils import remove_domain_port @@ -1665,7 +1666,7 @@ def _command_options() -> None: cc_url = None send_message = argb.message # client_to_server = argb.client - attached_image_description = argb.imageDescription + attached_image_description = remove_html(argb.imageDescription) city = 'London, England' send_threads = [] post_log = [] diff --git a/speaker.py b/speaker.py index 3d4cc34cd..dc9e55235 100644 --- a/speaker.py +++ b/speaker.py @@ -473,7 +473,7 @@ def _post_to_speaker_json(base_dir: str, http_prefix: str, if img.get('name'): if isinstance(img['name'], str): image_description += \ - img['name'] + '. ' + remove_html(img['name']) + '. ' is_direct = is_dm(post_json_object) actor = local_actor_url(http_prefix, nickname, domain_full) diff --git a/webapp_utils.py b/webapp_utils.py index 438548bca..984e8c78a 100644 --- a/webapp_utils.py +++ b/webapp_utils.py @@ -1238,6 +1238,7 @@ def get_post_attachments_as_html(base_dir: str, image_description = '' if attach.get('name'): image_description = attach['name'].replace('"', "'") + image_description = remove_html(image_description) if _is_image_mime_type(media_type): image_url = attach['url']