Replacing multiple strings

2024-08-08 18:23:33 +01:00 · 2024-08-08 18:23:33 +01:00 · d2251eb173
parent 06673b61c7
commit d2251eb173
11 changed files with 171 additions and 80 deletions
--- a/blog.py
+++ b/blog.py
@ -16,6 +16,7 @@ from webapp_utils import html_footer
 from webapp_utils import get_post_attachments_as_html
 from webapp_utils import edit_text_area
 from webapp_media import add_embedded_elements
+from utils import replace_strings
 from utils import data_dir
 from utils import remove_link_tracking
 from utils import get_url_from_post
@ -164,8 +165,11 @@ def _get_blog_replies(base_dir: str, http_prefix: str, translate: {},
        replies_str = ''
        for reply_post_id in lines:
            reply_post_id = remove_eol(reply_post_id)
-            reply_post_id = reply_post_id.replace('.json', '')
-            reply_post_id = reply_post_id.replace('.replies', '')
+            replacements = {
+                '.json': '',
+                '.replies': ''
+            }
+            reply_post_id = replace_strings(reply_post_id, replacements)
            post_filename = acct_dir(base_dir, nickname, domain) + \
                '/postcache/' + \
                reply_post_id.replace('/', '#') + '.html'
@ -438,10 +442,13 @@ def _html_blog_remove_cw_button(blog_str: str, translate: {}) -> str:
    """Removes the CW button from blog posts, where the
    summary field is instead used as the blog title
    """
-    blog_str = blog_str.replace('<details>', '<b>')
-    blog_str = blog_str.replace('</details>', '</b>')
-    blog_str = blog_str.replace('<summary>', '')
-    blog_str = blog_str.replace('</summary>', '')
+    replacements = {
+        '<details>': '<b>',
+        '</details>': '</b>',
+        '<summary>': '',
+        '</summary>': ''
+    }
+    blog_str = replace_strings(blog_str, replacements)
    blog_str = blog_str.replace(translate['SHOW MORE'], '')
    return blog_str

--- a/categories.py
+++ b/categories.py
@ -12,6 +12,7 @@ import datetime
 from utils import data_dir
 from utils import date_utcnow
 from utils import date_epoch
+from utils import replace_strings

 MAX_TAG_LENGTH = 42

@ -51,6 +52,14 @@ def load_city_hashtags(base_dir: str, translate: {}) -> None:
    if translate.get(category_str):
        category_str = translate[category_str]

+    replacements = {
+        ' & ': ' and ',
+        '/': ''
+    }
+    replacements2 = {
+        '-': '',
+        ' ': ''
+    }
    for _, _, files in os.walk(base_dir + '/data/cities'):
        for cities_file in files:
            if not cities_file.endswith('.txt'):
@ -68,10 +77,9 @@ def load_city_hashtags(base_dir: str, translate: {}) -> None:
                continue
            for hashtag in cities:
                hashtag = hashtag.lower().strip()
-                hashtag = hashtag.replace(' & ', ' and ')
-                hashtag = hashtag.replace('/', '')
+                hashtag = replace_strings(hashtag, replacements)

-                hashtag2 = hashtag.replace('-', '').replace(' ', '')
+                hashtag2 = replace_strings(hashtag, replacements2)
                city_filename = base_dir + '/tags/' + hashtag2 + '.category'
                if not os.path.isfile(city_filename):
                    try:
--- a/content.py
+++ b/content.py
@ -15,6 +15,7 @@ import email.parser
 import urllib.parse
 from shutil import copyfile
 from dateutil.parser import parse
+from utils import replace_strings
 from utils import data_dir
 from utils import remove_link_tracking
 from utils import string_contains
@ -143,13 +144,19 @@ def html_replace_email_quote(content: str) -> str:
    if '<p>&quot;' in content:
        if '&quot;</p>' in content:
            if content.count('<p>&quot;') == content.count('&quot;</p>'):
-                content = content.replace('<p>&quot;', '<p><blockquote>')
-                content = content.replace('&quot;</p>', '</blockquote></p>')
+                replacements = {
+                    '<p>&quot;': '<p><blockquote>',
+                    '&quot;</p>': '</blockquote></p>'
+                }
+                content = replace_strings(content, replacements)
    if '>\u201c' in content:
        if '\u201d<' in content:
            if content.count('>\u201c') == content.count('\u201d<'):
-                content = content.replace('>\u201c', '><blockquote>')
-                content = content.replace('\u201d<', '</blockquote><')
+                replacements = {
+                    '>\u201c': '><blockquote>',
+                    '\u201d<': '</blockquote><'
+                }
+                content = replace_strings(content, replacements)
    # replace email style quote
    if '>&gt; ' not in content:
        return content
@ -161,8 +168,11 @@ def html_replace_email_quote(content: str) -> str:
            continue
        if '>&gt; ' not in line_str:
            if line_str.startswith('&gt; '):
-                line_str = line_str.replace('&gt; ', '<blockquote>')
-                line_str = line_str.replace('&gt;', '<br>')
+                replacements = {
+                    '&gt; ': '<blockquote>',
+                    '&gt;': '<br>'
+                }
+                line_str = replace_strings(line_str, replacements)
                new_content += '<p>' + line_str + '</blockquote></p>'
            else:
                new_content += '<p>' + line_str + '</p>'
@ -1194,8 +1204,12 @@ def _get_simplified_content(content: str) -> str:
    """Returns a simplified version of the content suitable for
    splitting up into individual words
    """
-    content_simplified = \
-        content.replace(',', ' ').replace(';', ' ').replace('- ', ' ')
+    replacements = {
+        ',': ' ',
+        ';': ' ',
+        '- ': ' '
+    }
+    content_simplified = replace_strings(content, replacements)
    content_simplified = content_simplified.replace('. ', ' ').strip()
    if content_simplified.endswith('.'):
        content_simplified = content_simplified[:len(content_simplified)-1]
@ -1338,8 +1352,11 @@ def add_html_tags(base_dir: str, http_prefix: str,
        content = html_replace_email_quote(content)
        return html_replace_quote_marks(content)
    max_word_length = 40
-    content = content.replace('\r', '')
-    content = content.replace('\n', ' --linebreak-- ')
+    replacements = {
+        '\r': '',
+        '\n': ' --linebreak-- '
+    }
+    content = replace_strings(content, replacements)
    now_playing_str = 'NowPlaying'
    if translate.get(now_playing_str):
        now_playing_str = translate[now_playing_str]
@ -1712,15 +1729,18 @@ def combine_textarea_lines(text: str) -> str:
    result = ''
    ctr = 0
    paragraphs = text.split('\n\n')
+    replacements = {
+        '\n* ': '***BULLET POINT*** ',
+        '\n * ': '***BULLET POINT*** ',
+        '\n- ': '***DASH POINT*** ',
+        '\n - ': '***DASH POINT*** ',
+        '\n': ' ',
+        '  ': ' ',
+        '***BULLET POINT*** ': '\n* ',
+        '***DASH POINT*** ': '\n- '
+    }
    for para in paragraphs:
-        para = para.replace('\n* ', '***BULLET POINT*** ')
-        para = para.replace('\n * ', '***BULLET POINT*** ')
-        para = para.replace('\n- ', '***DASH POINT*** ')
-        para = para.replace('\n - ', '***DASH POINT*** ')
-        para = para.replace('\n', ' ')
-        para = para.replace('  ', ' ')
-        para = para.replace('***BULLET POINT*** ', '\n* ')
-        para = para.replace('***DASH POINT*** ', '\n- ')
+        para = replace_strings(para, replacements)
        if ctr > 0:
            result += '</p><p>'
        result += para
--- a/daemon_post.py
+++ b/daemon_post.py
@ -11,6 +11,7 @@ import time
 import errno
 import json
 from socket import error as SocketError
+from utils import replace_strings
 from utils import corp_servers
 from utils import string_ends_with
 from utils import get_config_param
@ -204,12 +205,15 @@ def daemon_http_post(self) -> None:

    # remove any trailing slashes from the path
    if not self.path.endswith('confirm'):
-        self.path = self.path.replace('/outbox/', '/outbox')
-        self.path = self.path.replace('/tlblogs/', '/tlblogs')
-        self.path = self.path.replace('/inbox/', '/inbox')
-        self.path = self.path.replace('/shares/', '/shares')
-        self.path = self.path.replace('/wanted/', '/wanted')
-        self.path = self.path.replace('/sharedInbox/', '/sharedInbox')
+        replacements = {
+            '/outbox/': '/outbox',
+            '/tlblogs/': '/tlblogs',
+            '/inbox/': '/inbox',
+            '/shares/': '/shares',
+            '/wanted/': '/wanted',
+            '/sharedInbox/': '/sharedInbox'
+        }
+        self.path = replace_strings(self.path, replacements)

    if self.path == '/inbox':
        if not self.server.enable_shared_inbox:
--- a/desktop_client.py
+++ b/desktop_client.py
@ -16,6 +16,7 @@ import webbrowser
 import urllib.parse
 from pathlib import Path
 from random import randint
+from utils import replace_strings
 from utils import get_post_attachments
 from utils import get_url_from_post
 from utils import get_actor_languages_list
@ -1996,9 +1997,12 @@ def run_desktop_client(base_dir: str, proxy_type: str, http_prefix: str,
                   command_str.startswith('direct message ') or \
                   command_str.startswith('post ') or \
                   command_str.startswith('send '):
-                    command_str = command_str.replace(' to ', ' ')
-                    command_str = command_str.replace(' dm ', ' ')
-                    command_str = command_str.replace(' DM ', ' ')
+                    replacements = {
+                        ' to ': ' ',
+                        ' dm ': ' ',
+                        ' DM ': ' '
+                    }
+                    command_str = replace_strings(command_str, replacements)
                    # direct message
                    to_handle = None
                    if command_str.startswith('post '):
--- a/epicyon.py
+++ b/epicyon.py
@ -74,6 +74,7 @@ from tests import test_update_actor
 from tests import run_all_tests
 from auth import store_basic_credentials
 from auth import create_password
+from utils import replace_strings
 from utils import set_accounts_data_dir
 from utils import data_dir
 from utils import data_dir_testing
@ -2914,15 +2915,18 @@ def _command_options() -> None:
        if not person_url:
            person_url = get_user_url(wf_request, 0, argb.debug)
        if nickname == domain:
-            person_url = person_url.replace('/users/', '/actor/')
-            person_url = person_url.replace('/accounts/', '/actor/')
-            person_url = person_url.replace('/channel/', '/actor/')
-            person_url = person_url.replace('/profile/', '/actor/')
-            person_url = person_url.replace('/author/', '/actor/')
-            person_url = person_url.replace('/u/', '/actor/')
-            person_url = person_url.replace('/fediverse/blog/', '/actor/')
-            person_url = person_url.replace('/c/', '/actor/')
-            person_url = person_url.replace('/m/', '/actor/')
+            replacements = {
+                '/users/': '/actor/',
+                '/accounts/': '/actor/',
+                '/channel/': '/actor/',
+                '/profile/': '/actor/',
+                '/author/': '/actor/',
+                '/u/': '/actor/',
+                '/fediverse/blog/': '/actor/',
+                '/c/': '/actor/',
+                '/m/': '/actor/'
+            }
+            person_url = replace_strings(person_url, replacements)
        if not person_url:
            # try single user instance
            person_url = http_prefix + '://' + domain
--- a/happening.py
+++ b/happening.py
@ -12,6 +12,7 @@ from uuid import UUID
 from hashlib import md5
 from datetime import datetime
 from datetime import timedelta
+from utils import replace_strings
 from utils import date_from_numbers
 from utils import date_from_string_format
 from utils import acct_handle_dir
@ -405,9 +406,13 @@ def get_todays_events(base_dir: str, nickname: str, domain: str,
 def _ical_date_string(date_str: str) -> str:
    """Returns an icalendar formatted date
    """
-    date_str = date_str.replace('-', '')
-    date_str = date_str.replace(':', '')
-    return date_str.replace(' ', '')
+    replacements = {
+        '-': '',
+        ':': '',
+        ' ': ''
+    }
+    date_str = replace_strings(date_str, replacements)
+    return date_str


 def _dav_encode_token(year: int, month_number: int,
--- a/posts.py
+++ b/posts.py
@ -34,6 +34,7 @@ from webfinger import webfinger_handle
 from httpsig import create_signed_header
 from siteactive import site_is_active
 from languages import understood_post_language
+from utils import replace_strings
 from utils import valid_content_warning
 from utils import get_actor_from_post_id
 from utils import string_contains
@ -4684,8 +4685,11 @@ def _create_box_items(base_dir: str,
    # Why are url's hashed? Since storage is in the filesystem this avoids
    # confusion with directories by not using the / character
    if first_post_id:
-        first_post_id = first_post_id.replace('--', '#')
-        first_post_id = first_post_id.replace('/', '#')
+        replacements = {
+            '--': '#',
+            '/': '#'
+        }
+        first_post_id = replace_strings(first_post_id, replacements)

    try:
        with open(index_filename, 'r', encoding='utf-8') as fp_index:
--- a/shares.py
+++ b/shares.py
@ -23,6 +23,7 @@ from session import post_json
 from session import post_image
 from session import create_session
 from session import get_json_valid
+from utils import replace_strings
 from utils import data_dir
 from utils import resembles_url
 from utils import date_utcnow
@ -125,10 +126,16 @@ def _get_valid_shared_item_id(actor: str, display_name: str) -> str:
    remove_chars2 = ('+', '/', '\\', '?', '&')
    for char in remove_chars2:
        display_name = display_name.replace(char, '-')
-    display_name = display_name.replace('.', '_')
-    display_name = display_name.replace("’", "'")
-    actor = actor.replace('://', '___')
-    actor = actor.replace('/', '--')
+    replacements = {
+        '.': '_',
+        "’": "'"
+    }
+    display_name = replace_strings(display_name, replacements)
+    replacements2 = {
+        '://': '___',
+        '/': '--'
+    }
+    actor = replace_strings(actor, replacements2)
    return actor + '--shareditems--' + display_name


@ -227,8 +234,11 @@ def _getshare_dfc_id(base_dir: str, system_language: str,
    matched_product_type = \
        _dfc_product_type_from_category(base_dir, item_category, translate)
    if not matched_product_type:
-        item_type = item_type.replace(' ', '_')
-        item_type = item_type.replace('.', '')
+        replacements = {
+            ' ': '_',
+            '.': ''
+        }
+        item_type = replace_strings(item_type, replacements)
        return 'epicyon#' + item_type
    if not dfc_ids:
        dfc_ids = _load_dfc_ids(base_dir, system_language,
--- a/utils.py
+++ b/utils.py
@ -458,16 +458,18 @@ def get_content_from_post(post_json_object: {}, system_language: str,
       not has_contentmap_dict:
        return ''
    content = ''
+    replacements = {
+        '&amp;': '&',
+        '<u>': '',
+        '</u>': ''
+    }
    if has_contentmap_dict:
        if this_post_json[map_dict].get(system_language):
            sys_lang = this_post_json[map_dict][system_language]
            if isinstance(sys_lang, str):
                content = sys_lang
                content = remove_markup_tag(content, 'pre')
-                content = content.replace('&amp;', '&')
-                # remove underlines
-                content = content.replace('<u>', '')
-                content = content.replace('</u>', '')
+                content = replace_strings(content, replacements)
                return standardize_text(content)
        else:
            # is there a contentMap/summaryMap entry for one of
@ -480,18 +482,12 @@ def get_content_from_post(post_json_object: {}, system_language: str,
                    continue
                content = map_lang
                content = remove_markup_tag(content, 'pre')
-                content = content.replace('&amp;', '&')
-                # remove underlines
-                content = content.replace('<u>', '')
-                content = content.replace('</u>', '')
+                content = replace_strings(content, replacements)
                return standardize_text(content)
    else:
        if isinstance(this_post_json[content_type], str):
            content = this_post_json[content_type]
-            content = content.replace('&amp;', '&')
-            # remove underlines
-            content = content.replace('<u>', '')
-            content = content.replace('</u>', '')
+            content = replace_strings(content, replacements)
            content = remove_markup_tag(content, 'pre')
    return standardize_text(content)

@ -1010,9 +1006,14 @@ def remove_html(content: str) -> str:
    if '<' not in content:
        return content
    removing = False
-    content = content.replace('<a href', ' <a href')
-    content = content.replace('<q>', '"').replace('</q>', '"')
-    content = content.replace('</p>', '\n\n').replace('<br>', '\n')
+    replacements = {
+        '<a href': ' <a href',
+        '<q>': '"',
+        '</q>': '"',
+        '</p>': '\n\n',
+        '<br>': '\n'
+    }
+    content = replace_strings(content, replacements)
    result = ''
    for char in content:
        if char == '<':
@ -5656,3 +5657,11 @@ def get_watermark_file(base_dir: str,
    watermark_file, watermark_filename = \
        get_image_file(base_dir, 'watermark_image', account_dir, '')
    return watermark_file, watermark_filename
+
+
+def replace_strings(text: str, replacements: {}) -> str:
+    """Does a series of string replacements
+    """
+    for orig_str, new_str in replacements.items():
+        text = text.replace(orig_str, new_str)
+    return text
--- a/webapp_utils.py
+++ b/webapp_utils.py
@ -12,6 +12,7 @@ from shutil import copyfile
 from collections import OrderedDict
 from session import get_json
 from session import get_json_valid
+from utils import replace_strings
 from utils import get_image_file
 from utils import data_dir
 from utils import string_contains
@ -159,10 +160,13 @@ def csv_following_list(following_filename: str,
                                     following_address)
                if person_notes:
                    # make notes suitable for csv file
-                    person_notes = person_notes.replace(',', ' ')
-                    person_notes = person_notes.replace('"', "'")
-                    person_notes = person_notes.replace('\n', '<br>')
-                    person_notes = person_notes.replace('  ', ' ')
+                    replacements = {
+                        ',': ' ',
+                        '"': "'",
+                        '\n': '<br>',
+                        '  ': ' '
+                    }
+                    person_notes = replace_strings(person_notes, replacements)
                if not following_list_csv:
                    following_list_csv = \
                        'Account address,Show boosts,' + \
@ -538,8 +542,12 @@ def shares_timeline_json(actor: str, page_number: int, items_per_page: int,
                        if '--shareditems--' not in item_id:
                            continue
                        share_actor = item_id.split('--shareditems--')[0]
-                        share_actor = share_actor.replace('___', '://')
-                        share_actor = share_actor.replace('--', '/')
+                        replacements = {
+                            '___': '://',
+                            '--': '/'
+                        }
+                        share_actor = \
+                            replace_strings(share_actor, replacements)
                        share_nickname = get_nickname_from_actor(share_actor)
                        if not share_nickname:
                            continue
@ -1092,14 +1100,18 @@ def add_emoji_to_display_name(session, base_dir: str, http_prefix: str,
    if ':' not in display_name:
        return display_name

-    display_name = display_name.replace('<p>', '').replace('</p>', '')
+    replacements = {
+        '<p>': '',
+        '</p>': ''
+    }
+    display_name = replace_strings(display_name, replacements)
    emoji_tags = {}
 #    print('TAG: display_name before tags: ' + display_name)
    display_name = \
        add_html_tags(base_dir, http_prefix,
                      nickname, domain, display_name, [],
                      emoji_tags, translate)
-    display_name = display_name.replace('<p>', '').replace('</p>', '')
+    display_name = replace_strings(display_name, replacements)
 #    print('TAG: display_name after tags: ' + display_name)
    # convert the emoji dictionary to a list
    emoji_tags_list = []
@ -2007,7 +2019,11 @@ def html_show_share(base_dir: str, domain: str, nickname: str,
    """
    shares_json = None

-    share_url = item_id.replace('___', '://').replace('--', '/')
+    replacements = {
+        '___': '://',
+        '--': '/'
+    }
+    share_url = replace_strings(item_id, replacements)
    contact_nickname = get_nickname_from_actor(share_url)
    if not contact_nickname:
        return None