From d2251eb1732eb566d7e6849f839041aa54fdfe86 Mon Sep 17 00:00:00 2001
From: Bob Mottram <bob@libreserver.org>
Date: Thu, 8 Aug 2024 18:23:33 +0100
Subject: [PATCH] Replacing multiple strings

---
 blog.py           | 19 +++++++++++-----
 categories.py     | 14 +++++++++---
 content.py        | 56 ++++++++++++++++++++++++++++++++---------------
 daemon_post.py    | 16 +++++++++-----
 desktop_client.py | 10 ++++++---
 epicyon.py        | 22 +++++++++++--------
 happening.py      | 11 +++++++---
 posts.py          |  8 +++++--
 shares.py         | 22 ++++++++++++++-----
 utils.py          | 39 ++++++++++++++++++++-------------
 webapp_utils.py   | 34 ++++++++++++++++++++--------
 11 files changed, 171 insertions(+), 80 deletions(-)
diff --git a/blog.py b/blog.py
index b2bf66caa..3b590e6cf 100644
--- a/blog.py
+++ b/blog.py
@@ -16,6 +16,7 @@ from webapp_utils import html_footer
 from webapp_utils import get_post_attachments_as_html
 from webapp_utils import edit_text_area
 from webapp_media import add_embedded_elements
+from utils import replace_strings
 from utils import data_dir
 from utils import remove_link_tracking
 from utils import get_url_from_post
@@ -164,8 +165,11 @@ def _get_blog_replies(base_dir: str, http_prefix: str, translate: {},
         replies_str = ''
         for reply_post_id in lines:
             reply_post_id = remove_eol(reply_post_id)
-            reply_post_id = reply_post_id.replace('.json', '')
-            reply_post_id = reply_post_id.replace('.replies', '')
+            replacements = {
+                '.json': '',
+                '.replies': ''
+            }
+            reply_post_id = replace_strings(reply_post_id, replacements)
             post_filename = acct_dir(base_dir, nickname, domain) + \
                 '/postcache/' + \
                 reply_post_id.replace('/', '#') + '.html'
@@ -438,10 +442,13 @@ def _html_blog_remove_cw_button(blog_str: str, translate: {}) -> str:
     """Removes the CW button from blog posts, where the
     summary field is instead used as the blog title
     """
-    blog_str = blog_str.replace('<details>', '<b>')
-    blog_str = blog_str.replace('</details>', '</b>')
-    blog_str = blog_str.replace('<summary>', '')
-    blog_str = blog_str.replace('</summary>', '')
+    replacements = {
+        '<details>': '<b>',
+        '</details>': '</b>',
+        '<summary>': '',
+        '</summary>': ''
+    }
+    blog_str = replace_strings(blog_str, replacements)
     blog_str = blog_str.replace(translate['SHOW MORE'], '')
     return blog_str
 
diff --git a/categories.py b/categories.py
index ff9826f23..6e7a5774d 100644
--- a/categories.py
+++ b/categories.py
@@ -12,6 +12,7 @@ import datetime
 from utils import data_dir
 from utils import date_utcnow
 from utils import date_epoch
+from utils import replace_strings
 
 MAX_TAG_LENGTH = 42
 
@@ -51,6 +52,14 @@ def load_city_hashtags(base_dir: str, translate: {}) -> None:
     if translate.get(category_str):
         category_str = translate[category_str]
 
+    replacements = {
+        ' & ': ' and ',
+        '/': ''
+    }
+    replacements2 = {
+        '-': '',
+        ' ': ''
+    }
     for _, _, files in os.walk(base_dir + '/data/cities'):
         for cities_file in files:
             if not cities_file.endswith('.txt'):
@@ -68,10 +77,9 @@ def load_city_hashtags(base_dir: str, translate: {}) -> None:
                 continue
             for hashtag in cities:
                 hashtag = hashtag.lower().strip()
-                hashtag = hashtag.replace(' & ', ' and ')
-                hashtag = hashtag.replace('/', '')
+                hashtag = replace_strings(hashtag, replacements)
 
-                hashtag2 = hashtag.replace('-', '').replace(' ', '')
+                hashtag2 = replace_strings(hashtag, replacements2)
                 city_filename = base_dir + '/tags/' + hashtag2 + '.category'
                 if not os.path.isfile(city_filename):
                     try:
diff --git a/content.py b/content.py
index 6a5341830..36aae0278 100644
--- a/content.py
+++ b/content.py
@@ -15,6 +15,7 @@ import email.parser
 import urllib.parse
 from shutil import copyfile
 from dateutil.parser import parse
+from utils import replace_strings
 from utils import data_dir
 from utils import remove_link_tracking
 from utils import string_contains
@@ -143,13 +144,19 @@ def html_replace_email_quote(content: str) -> str:
     if '<p>&quot;' in content:
         if '&quot;</p>' in content:
             if content.count('<p>&quot;') == content.count('&quot;</p>'):
-                content = content.replace('<p>&quot;', '<p><blockquote>')
-                content = content.replace('&quot;</p>', '</blockquote></p>')
+                replacements = {
+                    '<p>&quot;': '<p><blockquote>',
+                    '&quot;</p>': '</blockquote></p>'
+                }
+                content = replace_strings(content, replacements)
     if '>\u201c' in content:
         if '\u201d<' in content:
             if content.count('>\u201c') == content.count('\u201d<'):
-                content = content.replace('>\u201c', '><blockquote>')
-                content = content.replace('\u201d<', '</blockquote><')
+                replacements = {
+                    '>\u201c': '><blockquote>',
+                    '\u201d<': '</blockquote><'
+                }
+                content = replace_strings(content, replacements)
     # replace email style quote
     if '>&gt; ' not in content:
         return content
@@ -161,8 +168,11 @@ def html_replace_email_quote(content: str) -> str:
             continue
         if '>&gt; ' not in line_str:
             if line_str.startswith('&gt; '):
-                line_str = line_str.replace('&gt; ', '<blockquote>')
-                line_str = line_str.replace('&gt;', '<br>')
+                replacements = {
+                    '&gt; ': '<blockquote>',
+                    '&gt;': '<br>'
+                }
+                line_str = replace_strings(line_str, replacements)
                 new_content += '<p>' + line_str + '</blockquote></p>'
             else:
                 new_content += '<p>' + line_str + '</p>'
@@ -1194,8 +1204,12 @@ def _get_simplified_content(content: str) -> str:
     """Returns a simplified version of the content suitable for
     splitting up into individual words
     """
-    content_simplified = \
-        content.replace(',', ' ').replace(';', ' ').replace('- ', ' ')
+    replacements = {
+        ',': ' ',
+        ';': ' ',
+        '- ': ' '
+    }
+    content_simplified = replace_strings(content, replacements)
     content_simplified = content_simplified.replace('. ', ' ').strip()
     if content_simplified.endswith('.'):
         content_simplified = content_simplified[:len(content_simplified)-1]
@@ -1338,8 +1352,11 @@ def add_html_tags(base_dir: str, http_prefix: str,
         content = html_replace_email_quote(content)
         return html_replace_quote_marks(content)
     max_word_length = 40
-    content = content.replace('\r', '')
-    content = content.replace('\n', ' --linebreak-- ')
+    replacements = {
+        '\r': '',
+        '\n': ' --linebreak-- '
+    }
+    content = replace_strings(content, replacements)
     now_playing_str = 'NowPlaying'
     if translate.get(now_playing_str):
         now_playing_str = translate[now_playing_str]
@@ -1712,15 +1729,18 @@ def combine_textarea_lines(text: str) -> str:
     result = ''
     ctr = 0
     paragraphs = text.split('\n\n')
+    replacements = {
+        '\n* ': '***BULLET POINT*** ',
+        '\n * ': '***BULLET POINT*** ',
+        '\n- ': '***DASH POINT*** ',
+        '\n - ': '***DASH POINT*** ',
+        '\n': ' ',
+        '  ': ' ',
+        '***BULLET POINT*** ': '\n* ',
+        '***DASH POINT*** ': '\n- '
+    }
     for para in paragraphs:
-        para = para.replace('\n* ', '***BULLET POINT*** ')
-        para = para.replace('\n * ', '***BULLET POINT*** ')
-        para = para.replace('\n- ', '***DASH POINT*** ')
-        para = para.replace('\n - ', '***DASH POINT*** ')
-        para = para.replace('\n', ' ')
-        para = para.replace('  ', ' ')
-        para = para.replace('***BULLET POINT*** ', '\n* ')
-        para = para.replace('***DASH POINT*** ', '\n- ')
+        para = replace_strings(para, replacements)
         if ctr > 0:
             result += '</p><p>'
         result += para
diff --git a/daemon_post.py b/daemon_post.py
index 35e78fff0..f55dc47a4 100644
--- a/daemon_post.py
+++ b/daemon_post.py
@@ -11,6 +11,7 @@ import time
 import errno
 import json
 from socket import error as SocketError
+from utils import replace_strings
 from utils import corp_servers
 from utils import string_ends_with
 from utils import get_config_param
@@ -204,12 +205,15 @@ def daemon_http_post(self) -> None:
 
     # remove any trailing slashes from the path
     if not self.path.endswith('confirm'):
-        self.path = self.path.replace('/outbox/', '/outbox')
-        self.path = self.path.replace('/tlblogs/', '/tlblogs')
-        self.path = self.path.replace('/inbox/', '/inbox')
-        self.path = self.path.replace('/shares/', '/shares')
-        self.path = self.path.replace('/wanted/', '/wanted')
-        self.path = self.path.replace('/sharedInbox/', '/sharedInbox')
+        replacements = {
+            '/outbox/': '/outbox',
+            '/tlblogs/': '/tlblogs',
+            '/inbox/': '/inbox',
+            '/shares/': '/shares',
+            '/wanted/': '/wanted',
+            '/sharedInbox/': '/sharedInbox'
+        }
+        self.path = replace_strings(self.path, replacements)
 
     if self.path == '/inbox':
         if not self.server.enable_shared_inbox:
diff --git a/desktop_client.py b/desktop_client.py
index 86b8ec9ce..55e6597c7 100644
--- a/desktop_client.py
+++ b/desktop_client.py
@@ -16,6 +16,7 @@ import webbrowser
 import urllib.parse
 from pathlib import Path
 from random import randint
+from utils import replace_strings
 from utils import get_post_attachments
 from utils import get_url_from_post
 from utils import get_actor_languages_list
@@ -1996,9 +1997,12 @@ def run_desktop_client(base_dir: str, proxy_type: str, http_prefix: str,
                    command_str.startswith('direct message ') or \
                    command_str.startswith('post ') or \
                    command_str.startswith('send '):
-                    command_str = command_str.replace(' to ', ' ')
-                    command_str = command_str.replace(' dm ', ' ')
-                    command_str = command_str.replace(' DM ', ' ')
+                    replacements = {
+                        ' to ': ' ',
+                        ' dm ': ' ',
+                        ' DM ': ' '
+                    }
+                    command_str = replace_strings(command_str, replacements)
                     # direct message
                     to_handle = None
                     if command_str.startswith('post '):
diff --git a/epicyon.py b/epicyon.py
index 16b098908..35117c9b4 100644
--- a/epicyon.py
+++ b/epicyon.py
@@ -74,6 +74,7 @@ from tests import test_update_actor
 from tests import run_all_tests
 from auth import store_basic_credentials
 from auth import create_password
+from utils import replace_strings
 from utils import set_accounts_data_dir
 from utils import data_dir
 from utils import data_dir_testing
@@ -2914,15 +2915,18 @@ def _command_options() -> None:
         if not person_url:
             person_url = get_user_url(wf_request, 0, argb.debug)
         if nickname == domain:
-            person_url = person_url.replace('/users/', '/actor/')
-            person_url = person_url.replace('/accounts/', '/actor/')
-            person_url = person_url.replace('/channel/', '/actor/')
-            person_url = person_url.replace('/profile/', '/actor/')
-            person_url = person_url.replace('/author/', '/actor/')
-            person_url = person_url.replace('/u/', '/actor/')
-            person_url = person_url.replace('/fediverse/blog/', '/actor/')
-            person_url = person_url.replace('/c/', '/actor/')
-            person_url = person_url.replace('/m/', '/actor/')
+            replacements = {
+                '/users/': '/actor/',
+                '/accounts/': '/actor/',
+                '/channel/': '/actor/',
+                '/profile/': '/actor/',
+                '/author/': '/actor/',
+                '/u/': '/actor/',
+                '/fediverse/blog/': '/actor/',
+                '/c/': '/actor/',
+                '/m/': '/actor/'
+            }
+            person_url = replace_strings(person_url, replacements)
         if not person_url:
             # try single user instance
             person_url = http_prefix + '://' + domain
diff --git a/happening.py b/happening.py
index f4cb7c435..05a47fddf 100644
--- a/happening.py
+++ b/happening.py
@@ -12,6 +12,7 @@ from uuid import UUID
 from hashlib import md5
 from datetime import datetime
 from datetime import timedelta
+from utils import replace_strings
 from utils import date_from_numbers
 from utils import date_from_string_format
 from utils import acct_handle_dir
@@ -405,9 +406,13 @@ def get_todays_events(base_dir: str, nickname: str, domain: str,
 def _ical_date_string(date_str: str) -> str:
     """Returns an icalendar formatted date
     """
-    date_str = date_str.replace('-', '')
-    date_str = date_str.replace(':', '')
-    return date_str.replace(' ', '')
+    replacements = {
+        '-': '',
+        ':': '',
+        ' ': ''
+    }
+    date_str = replace_strings(date_str, replacements)
+    return date_str
 
 
 def _dav_encode_token(year: int, month_number: int,
diff --git a/posts.py b/posts.py
index 92dfd8f96..0808eb89b 100644
--- a/posts.py
+++ b/posts.py
@@ -34,6 +34,7 @@ from webfinger import webfinger_handle
 from httpsig import create_signed_header
 from siteactive import site_is_active
 from languages import understood_post_language
+from utils import replace_strings
 from utils import valid_content_warning
 from utils import get_actor_from_post_id
 from utils import string_contains
@@ -4684,8 +4685,11 @@ def _create_box_items(base_dir: str,
     # Why are url's hashed? Since storage is in the filesystem this avoids
     # confusion with directories by not using the / character
     if first_post_id:
-        first_post_id = first_post_id.replace('--', '#')
-        first_post_id = first_post_id.replace('/', '#')
+        replacements = {
+            '--': '#',
+            '/': '#'
+        }
+        first_post_id = replace_strings(first_post_id, replacements)
 
     try:
         with open(index_filename, 'r', encoding='utf-8') as fp_index:
diff --git a/shares.py b/shares.py
index e00e51e4c..530645d0e 100644
--- a/shares.py
+++ b/shares.py
@@ -23,6 +23,7 @@ from session import post_json
 from session import post_image
 from session import create_session
 from session import get_json_valid
+from utils import replace_strings
 from utils import data_dir
 from utils import resembles_url
 from utils import date_utcnow
@@ -125,10 +126,16 @@ def _get_valid_shared_item_id(actor: str, display_name: str) -> str:
     remove_chars2 = ('+', '/', '\\', '?', '&')
     for char in remove_chars2:
         display_name = display_name.replace(char, '-')
-    display_name = display_name.replace('.', '_')
-    display_name = display_name.replace("’", "'")
-    actor = actor.replace('://', '___')
-    actor = actor.replace('/', '--')
+    replacements = {
+        '.': '_',
+        "’": "'"
+    }
+    display_name = replace_strings(display_name, replacements)
+    replacements2 = {
+        '://': '___',
+        '/': '--'
+    }
+    actor = replace_strings(actor, replacements2)
     return actor + '--shareditems--' + display_name
 
 
@@ -227,8 +234,11 @@ def _getshare_dfc_id(base_dir: str, system_language: str,
     matched_product_type = \
         _dfc_product_type_from_category(base_dir, item_category, translate)
     if not matched_product_type:
-        item_type = item_type.replace(' ', '_')
-        item_type = item_type.replace('.', '')
+        replacements = {
+            ' ': '_',
+            '.': ''
+        }
+        item_type = replace_strings(item_type, replacements)
         return 'epicyon#' + item_type
     if not dfc_ids:
         dfc_ids = _load_dfc_ids(base_dir, system_language,
diff --git a/utils.py b/utils.py
index e3d775b4c..8f21e64d3 100644
--- a/utils.py
+++ b/utils.py
@@ -458,16 +458,18 @@ def get_content_from_post(post_json_object: {}, system_language: str,
        not has_contentmap_dict:
         return ''
     content = ''
+    replacements = {
+        '&amp;': '&',
+        '<u>': '',
+        '</u>': ''
+    }
     if has_contentmap_dict:
         if this_post_json[map_dict].get(system_language):
             sys_lang = this_post_json[map_dict][system_language]
             if isinstance(sys_lang, str):
                 content = sys_lang
                 content = remove_markup_tag(content, 'pre')
-                content = content.replace('&amp;', '&')
-                # remove underlines
-                content = content.replace('<u>', '')
-                content = content.replace('</u>', '')
+                content = replace_strings(content, replacements)
                 return standardize_text(content)
         else:
             # is there a contentMap/summaryMap entry for one of
@@ -480,18 +482,12 @@ def get_content_from_post(post_json_object: {}, system_language: str,
                     continue
                 content = map_lang
                 content = remove_markup_tag(content, 'pre')
-                content = content.replace('&amp;', '&')
-                # remove underlines
-                content = content.replace('<u>', '')
-                content = content.replace('</u>', '')
+                content = replace_strings(content, replacements)
                 return standardize_text(content)
     else:
         if isinstance(this_post_json[content_type], str):
             content = this_post_json[content_type]
-            content = content.replace('&amp;', '&')
-            # remove underlines
-            content = content.replace('<u>', '')
-            content = content.replace('</u>', '')
+            content = replace_strings(content, replacements)
             content = remove_markup_tag(content, 'pre')
     return standardize_text(content)
 
@@ -1010,9 +1006,14 @@ def remove_html(content: str) -> str:
     if '<' not in content:
         return content
     removing = False
-    content = content.replace('<a href', ' <a href')
-    content = content.replace('<q>', '"').replace('</q>', '"')
-    content = content.replace('</p>', '\n\n').replace('<br>', '\n')
+    replacements = {
+        '<a href': ' <a href',
+        '<q>': '"',
+        '</q>': '"',
+        '</p>': '\n\n',
+        '<br>': '\n'
+    }
+    content = replace_strings(content, replacements)
     result = ''
     for char in content:
         if char == '<':
@@ -5656,3 +5657,11 @@ def get_watermark_file(base_dir: str,
     watermark_file, watermark_filename = \
         get_image_file(base_dir, 'watermark_image', account_dir, '')
     return watermark_file, watermark_filename
+
+
+def replace_strings(text: str, replacements: {}) -> str:
+    """Does a series of string replacements
+    """
+    for orig_str, new_str in replacements.items():
+        text = text.replace(orig_str, new_str)
+    return text
diff --git a/webapp_utils.py b/webapp_utils.py
index 677851600..31dea5b0f 100644
--- a/webapp_utils.py
+++ b/webapp_utils.py
@@ -12,6 +12,7 @@ from shutil import copyfile
 from collections import OrderedDict
 from session import get_json
 from session import get_json_valid
+from utils import replace_strings
 from utils import get_image_file
 from utils import data_dir
 from utils import string_contains
@@ -159,10 +160,13 @@ def csv_following_list(following_filename: str,
                                      following_address)
                 if person_notes:
                     # make notes suitable for csv file
-                    person_notes = person_notes.replace(',', ' ')
-                    person_notes = person_notes.replace('"', "'")
-                    person_notes = person_notes.replace('\n', '<br>')
-                    person_notes = person_notes.replace('  ', ' ')
+                    replacements = {
+                        ',': ' ',
+                        '"': "'",
+                        '\n': '<br>',
+                        '  ': ' '
+                    }
+                    person_notes = replace_strings(person_notes, replacements)
                 if not following_list_csv:
                     following_list_csv = \
                         'Account address,Show boosts,' + \
@@ -538,8 +542,12 @@ def shares_timeline_json(actor: str, page_number: int, items_per_page: int,
                         if '--shareditems--' not in item_id:
                             continue
                         share_actor = item_id.split('--shareditems--')[0]
-                        share_actor = share_actor.replace('___', '://')
-                        share_actor = share_actor.replace('--', '/')
+                        replacements = {
+                            '___': '://',
+                            '--': '/'
+                        }
+                        share_actor = \
+                            replace_strings(share_actor, replacements)
                         share_nickname = get_nickname_from_actor(share_actor)
                         if not share_nickname:
                             continue
@@ -1092,14 +1100,18 @@ def add_emoji_to_display_name(session, base_dir: str, http_prefix: str,
     if ':' not in display_name:
         return display_name
 
-    display_name = display_name.replace('<p>', '').replace('</p>', '')
+    replacements = {
+        '<p>': '',
+        '</p>': ''
+    }
+    display_name = replace_strings(display_name, replacements)
     emoji_tags = {}
 #    print('TAG: display_name before tags: ' + display_name)
     display_name = \
         add_html_tags(base_dir, http_prefix,
                       nickname, domain, display_name, [],
                       emoji_tags, translate)
-    display_name = display_name.replace('<p>', '').replace('</p>', '')
+    display_name = replace_strings(display_name, replacements)
 #    print('TAG: display_name after tags: ' + display_name)
     # convert the emoji dictionary to a list
     emoji_tags_list = []
@@ -2007,7 +2019,11 @@ def html_show_share(base_dir: str, domain: str, nickname: str,
     """
     shares_json = None
 
-    share_url = item_id.replace('___', '://').replace('--', '/')
+    replacements = {
+        '___': '://',
+        '--': '/'
+    }
+    share_url = replace_strings(item_id, replacements)
     contact_nickname = get_nickname_from_actor(share_url)
     if not contact_nickname:
         return None