Remove html headers from post content

main
Bob Mottram 2024-09-24 20:40:30 +01:00
parent f404304794
commit 0af6b09840
2 changed files with 14 additions and 2 deletions

View File

@ -450,6 +450,15 @@ def remove_markup_tag(html: str, tag: str) -> str:
return result return result
def remove_header_tags(html: str) -> str:
"""Removes any header tags from the given html text
"""
header_tags = ('h1', 'h2', 'h3', 'h4', 'h5')
for tag_str in header_tags:
html = remove_markup_tag(html, tag_str)
return html
def get_content_from_post(post_json_object: {}, system_language: str, def get_content_from_post(post_json_object: {}, system_language: str,
languages_understood: [], languages_understood: [],
content_type: str) -> str: content_type: str) -> str:

View File

@ -34,6 +34,7 @@ from flags import is_news_post
from flags import is_recent_post from flags import is_recent_post
from flags import is_chat_message from flags import is_chat_message
from flags import is_pgp_encrypted from flags import is_pgp_encrypted
from utils import remove_header_tags
from utils import get_actor_from_post_id from utils import get_actor_from_post_id
from utils import contains_statuses from utils import contains_statuses
from utils import data_dir from utils import data_dir
@ -2871,9 +2872,11 @@ def individual_post_as_html(signing_priv_key_pem: str,
not post_is_blog: not post_is_blog:
content_str = bold_reading_string(content_str) content_str = bold_reading_string(content_str)
object_content = remove_link_trackers_from_content(content_str) object_content = remove_header_tags(content_str)
object_content = \ object_content = \
remove_long_words(content_str, 40, []) remove_link_trackers_from_content(object_content)
object_content = \
remove_long_words(object_content, 40, [])
object_content = \ object_content = \
remove_text_formatting(object_content, bold_reading) remove_text_formatting(object_content, bold_reading)
object_content = limit_repeated_words(object_content, 6) object_content = limit_repeated_words(object_content, 6)