mirror of https://gitlab.com/bashrc2/epicyon
Another fix for removing long words
parent
3e0e6d6cde
commit
b8372aa02e
|
@ -1049,7 +1049,7 @@ def remove_long_words(content: str, max_word_length: int,
|
||||||
if is_pgp_encrypted(content) or contains_pgp_public_key(content):
|
if is_pgp_encrypted(content) or contains_pgp_public_key(content):
|
||||||
return content
|
return content
|
||||||
content = replace_content_duplicates(content)
|
content = replace_content_duplicates(content)
|
||||||
if ' ' not in content and '<p></p>' not in content:
|
if ' ' not in content and '</p><p>' not in content:
|
||||||
# handle a single very long string with no spaces
|
# handle a single very long string with no spaces
|
||||||
content_str = content.replace('<p>', '').replace(r'<\p>', '')
|
content_str = content.replace('<p>', '').replace(r'<\p>', '')
|
||||||
if '://' not in content_str:
|
if '://' not in content_str:
|
||||||
|
@ -1068,6 +1068,7 @@ def remove_long_words(content: str, max_word_length: int,
|
||||||
if word_str not in long_words_list:
|
if word_str not in long_words_list:
|
||||||
long_words_list.append(word_str)
|
long_words_list.append(word_str)
|
||||||
for word_str in long_words_list:
|
for word_str in long_words_list:
|
||||||
|
original_word_str = word_str
|
||||||
if word_str.startswith('<p>'):
|
if word_str.startswith('<p>'):
|
||||||
word_str = word_str.replace('<p>', '')
|
word_str = word_str.replace('<p>', '')
|
||||||
if word_str.startswith('<'):
|
if word_str.startswith('<'):
|
||||||
|
@ -1112,8 +1113,12 @@ def remove_long_words(content: str, max_word_length: int,
|
||||||
if '/' in word_str:
|
if '/' in word_str:
|
||||||
continue
|
continue
|
||||||
if len(word_str[max_word_length:]) < max_word_length:
|
if len(word_str[max_word_length:]) < max_word_length:
|
||||||
|
end_of_line_char = '\n'
|
||||||
|
if '<br>' in original_word_str:
|
||||||
|
end_of_line_char = ''
|
||||||
content = content.replace(word_str,
|
content = content.replace(word_str,
|
||||||
word_str[:max_word_length] + '\n' +
|
word_str[:max_word_length] +
|
||||||
|
end_of_line_char +
|
||||||
word_str[max_word_length:])
|
word_str[max_word_length:])
|
||||||
else:
|
else:
|
||||||
content = content.replace(word_str,
|
content = content.replace(word_str,
|
||||||
|
|
26
tests.py
26
tests.py
|
@ -8644,6 +8644,32 @@ def _test_check_individual_post_content():
|
||||||
print(content5)
|
print(content5)
|
||||||
assert content5 == content
|
assert content5 == content
|
||||||
|
|
||||||
|
content = "<p>D-A-N-G-E-R-O-U-S<br>A-N-I-M-A-L</p>" + \
|
||||||
|
"<p>D-A-N-G-E-R-O-U-S<br>A-N-I-M-A-L</p>"
|
||||||
|
content2 = remove_style_within_html(content)
|
||||||
|
if content2 != content:
|
||||||
|
print(content)
|
||||||
|
print(content2)
|
||||||
|
assert content2 == content
|
||||||
|
|
||||||
|
content3 = remove_long_words(content, 40, [])
|
||||||
|
if content3 != content:
|
||||||
|
print(content)
|
||||||
|
print(content3)
|
||||||
|
assert content3 == content
|
||||||
|
|
||||||
|
content4 = remove_text_formatting(content, False)
|
||||||
|
if content4 != content:
|
||||||
|
print(content)
|
||||||
|
print(content4)
|
||||||
|
assert content4 == content
|
||||||
|
|
||||||
|
content5 = limit_repeated_words(content, 6)
|
||||||
|
if content5 != content:
|
||||||
|
print(content)
|
||||||
|
print(content5)
|
||||||
|
assert content5 == content
|
||||||
|
|
||||||
|
|
||||||
def run_all_tests():
|
def run_all_tests():
|
||||||
base_dir = os.getcwd()
|
base_dir = os.getcwd()
|
||||||
|
|
Loading…
Reference in New Issue