From c3e77ab909bb163742db6b2801f23ae98752dc8c Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Tue, 6 Feb 2024 23:38:39 +0000 Subject: [PATCH] Fix bug in checking for long words --- content.py | 2 +- tests.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/content.py b/content.py index 99e06a82d..89a08162c 100644 --- a/content.py +++ b/content.py @@ -1049,7 +1049,7 @@ def remove_long_words(content: str, max_word_length: int, if is_pgp_encrypted(content) or contains_pgp_public_key(content): return content content = replace_content_duplicates(content) - if ' ' not in content: + if ' ' not in content and '

' not in content: # handle a single very long string with no spaces content_str = content.replace('

', '').replace(r'<\p>', '') if '://' not in content_str: diff --git a/tests.py b/tests.py index e2252178b..ac71ea35c 100644 --- a/tests.py +++ b/tests.py @@ -8615,6 +8615,36 @@ def _test_uninvert2(): assert uninverted_text == 'This is a test.' +def _test_check_individual_post_content(): + print('check_individual_post_content') + content = "

Unenshitification?

" + \ + "Counter-enshitification?

" + \ + "

Anti-enshitification?

Nonshitification?

" + content2 = remove_style_within_html(content) + if content2 != content: + print(content) + print(content2) + assert content2 == content + + content3 = remove_long_words(content, 40, []) + if content3 != content: + print(content) + print(content3) + assert content3 == content + + content4 = remove_text_formatting(content, False) + if content4 != content: + print(content) + print(content4) + assert content4 == content + + content5 = limit_repeated_words(content, 6) + if content5 != content: + print(content) + print(content5) + assert content5 == content + + def run_all_tests(): base_dir = os.getcwd() print('Running tests...') @@ -8632,6 +8662,7 @@ def run_all_tests(): _test_checkbox_names() _test_thread_functions() _test_functions() + _test_check_individual_post_content() _test_uninvert2() _test_book_link(base_dir) _test_dateformat()