From b8372aa02e5b4048585f09b2f85b91d9538e3162 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Wed, 7 Feb 2024 10:48:46 +0000 Subject: [PATCH] Another fix for removing long words --- content.py | 9 +++++++-- tests.py | 26 ++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/content.py b/content.py index 89a08162c..5e1a99212 100644 --- a/content.py +++ b/content.py @@ -1049,7 +1049,7 @@ def remove_long_words(content: str, max_word_length: int, if is_pgp_encrypted(content) or contains_pgp_public_key(content): return content content = replace_content_duplicates(content) - if ' ' not in content and '

' not in content: + if ' ' not in content and '

' not in content: # handle a single very long string with no spaces content_str = content.replace('

', '').replace(r'<\p>', '') if '://' not in content_str: @@ -1068,6 +1068,7 @@ def remove_long_words(content: str, max_word_length: int, if word_str not in long_words_list: long_words_list.append(word_str) for word_str in long_words_list: + original_word_str = word_str if word_str.startswith('

'): word_str = word_str.replace('

', '') if word_str.startswith('<'): @@ -1112,8 +1113,12 @@ def remove_long_words(content: str, max_word_length: int, if '/' in word_str: continue if len(word_str[max_word_length:]) < max_word_length: + end_of_line_char = '\n' + if '
' in original_word_str: + end_of_line_char = '' content = content.replace(word_str, - word_str[:max_word_length] + '\n' + + word_str[:max_word_length] + + end_of_line_char + word_str[max_word_length:]) else: content = content.replace(word_str, diff --git a/tests.py b/tests.py index ac71ea35c..3db486be6 100644 --- a/tests.py +++ b/tests.py @@ -8644,6 +8644,32 @@ def _test_check_individual_post_content(): print(content5) assert content5 == content + content = "

D-A-N-G-E-R-O-U-S
A-N-I-M-A-L

" + \ + "

D-A-N-G-E-R-O-U-S
A-N-I-M-A-L

" + content2 = remove_style_within_html(content) + if content2 != content: + print(content) + print(content2) + assert content2 == content + + content3 = remove_long_words(content, 40, []) + if content3 != content: + print(content) + print(content3) + assert content3 == content + + content4 = remove_text_formatting(content, False) + if content4 != content: + print(content) + print(content4) + assert content4 == content + + content5 = limit_repeated_words(content, 6) + if content5 != content: + print(content) + print(content5) + assert content5 == content + def run_all_tests(): base_dir = os.getcwd()