From 0af6b098401d598b4670d275ee2e2baed7bb6c22 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Tue, 24 Sep 2024 20:40:30 +0100 Subject: [PATCH] Remove html headers from post content --- utils.py | 9 +++++++++ webapp_post.py | 7 +++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/utils.py b/utils.py index 32edf6cd7..2649cda64 100644 --- a/utils.py +++ b/utils.py @@ -450,6 +450,15 @@ def remove_markup_tag(html: str, tag: str) -> str: return result +def remove_header_tags(html: str) -> str: + """Removes any header tags from the given html text + """ + header_tags = ('h1', 'h2', 'h3', 'h4', 'h5') + for tag_str in header_tags: + html = remove_markup_tag(html, tag_str) + return html + + def get_content_from_post(post_json_object: {}, system_language: str, languages_understood: [], content_type: str) -> str: diff --git a/webapp_post.py b/webapp_post.py index b859abbfb..0ad066f7b 100644 --- a/webapp_post.py +++ b/webapp_post.py @@ -34,6 +34,7 @@ from flags import is_news_post from flags import is_recent_post from flags import is_chat_message from flags import is_pgp_encrypted +from utils import remove_header_tags from utils import get_actor_from_post_id from utils import contains_statuses from utils import data_dir @@ -2871,9 +2872,11 @@ def individual_post_as_html(signing_priv_key_pem: str, not post_is_blog: content_str = bold_reading_string(content_str) - object_content = remove_link_trackers_from_content(content_str) + object_content = remove_header_tags(content_str) object_content = \ - remove_long_words(content_str, 40, []) + remove_link_trackers_from_content(object_content) + object_content = \ + remove_long_words(object_content, 40, []) object_content = \ remove_text_formatting(object_content, bold_reading) object_content = limit_repeated_words(object_content, 6)