From b8372aa02e5b4048585f09b2f85b91d9538e3162 Mon Sep 17 00:00:00 2001
From: Bob Mottram <bob@libreserver.org>
Date: Wed, 7 Feb 2024 10:48:46 +0000
Subject: [PATCH] Another fix for removing long words

---
 content.py |  9 +++++++--
 tests.py   | 26 ++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 2 deletions(-)
diff --git a/content.py b/content.py
index 89a08162c..5e1a99212 100644
--- a/content.py
+++ b/content.py
@@ -1049,7 +1049,7 @@ def remove_long_words(content: str, max_word_length: int,
     if is_pgp_encrypted(content) or contains_pgp_public_key(content):
         return content
     content = replace_content_duplicates(content)
-    if ' ' not in content and '<p></p>' not in content:
+    if ' ' not in content and '</p><p>' not in content:
         # handle a single very long string with no spaces
         content_str = content.replace('<p>', '').replace(r'<\p>', '')
         if '://' not in content_str:
@@ -1068,6 +1068,7 @@ def remove_long_words(content: str, max_word_length: int,
                 if word_str not in long_words_list:
                     long_words_list.append(word_str)
     for word_str in long_words_list:
+        original_word_str = word_str
         if word_str.startswith('<p>'):
             word_str = word_str.replace('<p>', '')
         if word_str.startswith('<'):
@@ -1112,8 +1113,12 @@ def remove_long_words(content: str, max_word_length: int,
         if '/' in word_str:
             continue
         if len(word_str[max_word_length:]) < max_word_length:
+            end_of_line_char = '\n'
+            if '<br>' in original_word_str:
+                end_of_line_char = ''
             content = content.replace(word_str,
-                                      word_str[:max_word_length] + '\n' +
+                                      word_str[:max_word_length] +
+                                      end_of_line_char +
                                       word_str[max_word_length:])
         else:
             content = content.replace(word_str,
diff --git a/tests.py b/tests.py
index ac71ea35c..3db486be6 100644
--- a/tests.py
+++ b/tests.py
@@ -8644,6 +8644,32 @@ def _test_check_individual_post_content():
         print(content5)
     assert content5 == content
 
+    content = "<p>D-A-N-G-E-R-O-U-S<br>A-N-I-M-A-L</p>" + \
+        "<p>D-A-N-G-E-R-O-U-S<br>A-N-I-M-A-L</p>"
+    content2 = remove_style_within_html(content)
+    if content2 != content:
+        print(content)
+        print(content2)
+    assert content2 == content
+
+    content3 = remove_long_words(content, 40, [])
+    if content3 != content:
+        print(content)
+        print(content3)
+    assert content3 == content
+
+    content4 = remove_text_formatting(content, False)
+    if content4 != content:
+        print(content)
+        print(content4)
+    assert content4 == content
+
+    content5 = limit_repeated_words(content, 6)
+    if content5 != content:
+        print(content)
+        print(content5)
+    assert content5 == content
+
 
 def run_all_tests():
     base_dir = os.getcwd()