Another fix for removing long words

main
Bob Mottram 2024-02-07 10:48:46 +00:00
parent 3e0e6d6cde
commit b8372aa02e
2 changed files with 33 additions and 2 deletions

View File

@ -1049,7 +1049,7 @@ def remove_long_words(content: str, max_word_length: int,
if is_pgp_encrypted(content) or contains_pgp_public_key(content):
return content
content = replace_content_duplicates(content)
if ' ' not in content and '<p></p>' not in content:
if ' ' not in content and '</p><p>' not in content:
# handle a single very long string with no spaces
content_str = content.replace('<p>', '').replace(r'<\p>', '')
if '://' not in content_str:
@ -1068,6 +1068,7 @@ def remove_long_words(content: str, max_word_length: int,
if word_str not in long_words_list:
long_words_list.append(word_str)
for word_str in long_words_list:
original_word_str = word_str
if word_str.startswith('<p>'):
word_str = word_str.replace('<p>', '')
if word_str.startswith('<'):
@ -1112,8 +1113,12 @@ def remove_long_words(content: str, max_word_length: int,
if '/' in word_str:
continue
if len(word_str[max_word_length:]) < max_word_length:
end_of_line_char = '\n'
if '<br>' in original_word_str:
end_of_line_char = ''
content = content.replace(word_str,
word_str[:max_word_length] + '\n' +
word_str[:max_word_length] +
end_of_line_char +
word_str[max_word_length:])
else:
content = content.replace(word_str,

View File

@ -8644,6 +8644,32 @@ def _test_check_individual_post_content():
print(content5)
assert content5 == content
content = "<p>D-A-N-G-E-R-O-U-S<br>A-N-I-M-A-L</p>" + \
"<p>D-A-N-G-E-R-O-U-S<br>A-N-I-M-A-L</p>"
content2 = remove_style_within_html(content)
if content2 != content:
print(content)
print(content2)
assert content2 == content
content3 = remove_long_words(content, 40, [])
if content3 != content:
print(content)
print(content3)
assert content3 == content
content4 = remove_text_formatting(content, False)
if content4 != content:
print(content)
print(content4)
assert content4 == content
content5 = limit_repeated_words(content, 6)
if content5 != content:
print(content)
print(content5)
assert content5 == content
def run_all_tests():
base_dir = os.getcwd()