From bdc52a1fceba0b6ca2301128e6978c85bf771a4a Mon Sep 17 00:00:00 2001
From: Bob Mottram ' not in content:
+ content = ' ' + content.replace('\n\n', ' ') + ' ' not in content:
+ content = ' ' + content.replace('\n', ' ') + '
' not in content: # handle a single very long string with no spaces content_str = content.replace('
', '').replace(r'<\p>', '') @@ -1166,6 +1176,16 @@ def remove_long_words(content: str, max_word_length: int, if not content.endswith('
'): content = content.strip() + '' content = content.replace('', '') + if non_html_list: + content = content.replace('
', '\n\n') + content = content.replace('
', '') + content = content.replace('
', '') + if non_html_list2: + content = content.replace('', '\n') + content = content.replace('
', '') + content = content.replace('
', '') + content = content.replace('', '
') + return content diff --git a/tests.py b/tests.py index 1862cac39..8b8e6241f 100644 --- a/tests.py +++ b/tests.py @@ -8855,6 +8855,61 @@ def _test_uninvert2(): def _test_check_individual_post_content(): print('check_individual_post_content') + + content = "Unenshitification?\n\n" + \ + "Counter-enshitification?\n\n" + \ + "Anti-enshitification?" + content2 = remove_style_within_html(content) + if content2 != content: + print(content) + print(content2) + assert content2 == content + + content3 = remove_long_words(content, 40, []) + if content3 != content: + print(content) + print(content3) + assert content3 == content + + content4 = remove_text_formatting(content, False) + if content4 != content: + print(content) + print(content4) + assert content4 == content + + content5 = limit_repeated_words(content, 6) + if content5 != content: + print(content) + print(content5) + assert content5 == content + + content = "Unenshitification?\n" + \ + "Counter-enshitification?\n" + \ + "Anti-enshitification?" + content2 = remove_style_within_html(content) + if content2 != content: + print(content) + print(content2) + assert content2 == content + + content3 = remove_long_words(content, 40, []) + if content3 != content: + print(content) + print(content3) + assert content3 == content + + content4 = remove_text_formatting(content, False) + if content4 != content: + print(content) + print(content4) + assert content4 == content + + content5 = limit_repeated_words(content, 6) + if content5 != content: + print(content) + print(content5) + assert content5 == content + content = "
Unenshitification?
" + \ "Counter-enshitification?
" + \ "Anti-enshitification?
Nonshitification?
"