mirror of https://gitlab.com/bashrc2/epicyon
Fix for removing long words
parent
29d4f68fd5
commit
bdc52a1fce
20
content.py
20
content.py
|
@ -1088,6 +1088,16 @@ def remove_long_words(content: str, max_word_length: int,
|
|||
if is_pgp_encrypted(content) or contains_pgp_public_key(content):
|
||||
return content
|
||||
content = replace_content_duplicates(content)
|
||||
|
||||
non_html_list = False
|
||||
if '\n\n' in content and '<p>' not in content:
|
||||
content = '<p>' + content.replace('\n\n', '</p> <p>') + '</p>'
|
||||
non_html_list = True
|
||||
non_html_list2 = False
|
||||
if '\n' in content and '<p>' not in content:
|
||||
content = '<p>' + content.replace('\n', '</p> <p>') + '</p>'
|
||||
non_html_list2 = True
|
||||
|
||||
if ' ' not in content and '</p><p>' not in content:
|
||||
# handle a single very long string with no spaces
|
||||
content_str = content.replace('<p>', '').replace(r'<\p>', '')
|
||||
|
@ -1166,6 +1176,16 @@ def remove_long_words(content: str, max_word_length: int,
|
|||
if not content.endswith('</p>'):
|
||||
content = content.strip() + '</p>'
|
||||
content = content.replace('<p> </p>', '<p></p>')
|
||||
if non_html_list:
|
||||
content = content.replace('</p> <p>', '\n\n')
|
||||
content = content.replace('<p>', '')
|
||||
content = content.replace('</p>', '')
|
||||
if non_html_list2:
|
||||
content = content.replace('</p> <p>', '\n')
|
||||
content = content.replace('<p>', '')
|
||||
content = content.replace('</p>', '')
|
||||
content = content.replace('</p> <p>', '</p><p>')
|
||||
|
||||
return content
|
||||
|
||||
|
||||
|
|
55
tests.py
55
tests.py
|
@ -8855,6 +8855,61 @@ def _test_uninvert2():
|
|||
|
||||
def _test_check_individual_post_content():
|
||||
print('check_individual_post_content')
|
||||
|
||||
content = "Unenshitification?\n\n" + \
|
||||
"Counter-enshitification?\n\n" + \
|
||||
"Anti-enshitification?"
|
||||
content2 = remove_style_within_html(content)
|
||||
if content2 != content:
|
||||
print(content)
|
||||
print(content2)
|
||||
assert content2 == content
|
||||
|
||||
content3 = remove_long_words(content, 40, [])
|
||||
if content3 != content:
|
||||
print(content)
|
||||
print(content3)
|
||||
assert content3 == content
|
||||
|
||||
content4 = remove_text_formatting(content, False)
|
||||
if content4 != content:
|
||||
print(content)
|
||||
print(content4)
|
||||
assert content4 == content
|
||||
|
||||
content5 = limit_repeated_words(content, 6)
|
||||
if content5 != content:
|
||||
print(content)
|
||||
print(content5)
|
||||
assert content5 == content
|
||||
|
||||
content = "Unenshitification?\n" + \
|
||||
"Counter-enshitification?\n" + \
|
||||
"Anti-enshitification?"
|
||||
content2 = remove_style_within_html(content)
|
||||
if content2 != content:
|
||||
print(content)
|
||||
print(content2)
|
||||
assert content2 == content
|
||||
|
||||
content3 = remove_long_words(content, 40, [])
|
||||
if content3 != content:
|
||||
print(content)
|
||||
print(content3)
|
||||
assert content3 == content
|
||||
|
||||
content4 = remove_text_formatting(content, False)
|
||||
if content4 != content:
|
||||
print(content)
|
||||
print(content4)
|
||||
assert content4 == content
|
||||
|
||||
content5 = limit_repeated_words(content, 6)
|
||||
if content5 != content:
|
||||
print(content)
|
||||
print(content5)
|
||||
assert content5 == content
|
||||
|
||||
content = "<p>Unenshitification?</p><p></p><p>" + \
|
||||
"Counter-enshitification?</p><p></p>" + \
|
||||
"<p>Anti-enshitification?</p><p></p><p>Nonshitification?</p>"
|
||||
|
|
Loading…
Reference in New Issue