From d2251eb1732eb566d7e6849f839041aa54fdfe86 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 8 Aug 2024 18:23:33 +0100 Subject: [PATCH] Replacing multiple strings --- blog.py | 19 +++++++++++----- categories.py | 14 +++++++++--- content.py | 56 ++++++++++++++++++++++++++++++++--------------- daemon_post.py | 16 +++++++++----- desktop_client.py | 10 ++++++--- epicyon.py | 22 +++++++++++-------- happening.py | 11 +++++++--- posts.py | 8 +++++-- shares.py | 22 ++++++++++++++----- utils.py | 39 ++++++++++++++++++++------------- webapp_utils.py | 34 ++++++++++++++++++++-------- 11 files changed, 171 insertions(+), 80 deletions(-) diff --git a/blog.py b/blog.py index b2bf66caa..3b590e6cf 100644 --- a/blog.py +++ b/blog.py @@ -16,6 +16,7 @@ from webapp_utils import html_footer from webapp_utils import get_post_attachments_as_html from webapp_utils import edit_text_area from webapp_media import add_embedded_elements +from utils import replace_strings from utils import data_dir from utils import remove_link_tracking from utils import get_url_from_post @@ -164,8 +165,11 @@ def _get_blog_replies(base_dir: str, http_prefix: str, translate: {}, replies_str = '' for reply_post_id in lines: reply_post_id = remove_eol(reply_post_id) - reply_post_id = reply_post_id.replace('.json', '') - reply_post_id = reply_post_id.replace('.replies', '') + replacements = { + '.json': '', + '.replies': '' + } + reply_post_id = replace_strings(reply_post_id, replacements) post_filename = acct_dir(base_dir, nickname, domain) + \ '/postcache/' + \ reply_post_id.replace('/', '#') + '.html' @@ -438,10 +442,13 @@ def _html_blog_remove_cw_button(blog_str: str, translate: {}) -> str: """Removes the CW button from blog posts, where the summary field is instead used as the blog title """ - blog_str = blog_str.replace('
', '') - blog_str = blog_str.replace('
', '') - blog_str = blog_str.replace('', '') - blog_str = blog_str.replace('', '') + replacements = { + '
': '', + '
': '', + '': '', + '': '' + } + blog_str = replace_strings(blog_str, replacements) blog_str = blog_str.replace(translate['SHOW MORE'], '') return blog_str diff --git a/categories.py b/categories.py index ff9826f23..6e7a5774d 100644 --- a/categories.py +++ b/categories.py @@ -12,6 +12,7 @@ import datetime from utils import data_dir from utils import date_utcnow from utils import date_epoch +from utils import replace_strings MAX_TAG_LENGTH = 42 @@ -51,6 +52,14 @@ def load_city_hashtags(base_dir: str, translate: {}) -> None: if translate.get(category_str): category_str = translate[category_str] + replacements = { + ' & ': ' and ', + '/': '' + } + replacements2 = { + '-': '', + ' ': '' + } for _, _, files in os.walk(base_dir + '/data/cities'): for cities_file in files: if not cities_file.endswith('.txt'): @@ -68,10 +77,9 @@ def load_city_hashtags(base_dir: str, translate: {}) -> None: continue for hashtag in cities: hashtag = hashtag.lower().strip() - hashtag = hashtag.replace(' & ', ' and ') - hashtag = hashtag.replace('/', '') + hashtag = replace_strings(hashtag, replacements) - hashtag2 = hashtag.replace('-', '').replace(' ', '') + hashtag2 = replace_strings(hashtag, replacements2) city_filename = base_dir + '/tags/' + hashtag2 + '.category' if not os.path.isfile(city_filename): try: diff --git a/content.py b/content.py index 6a5341830..36aae0278 100644 --- a/content.py +++ b/content.py @@ -15,6 +15,7 @@ import email.parser import urllib.parse from shutil import copyfile from dateutil.parser import parse +from utils import replace_strings from utils import data_dir from utils import remove_link_tracking from utils import string_contains @@ -143,13 +144,19 @@ def html_replace_email_quote(content: str) -> str: if '

"' in content: if '"

' in content: if content.count('

"') == content.count('"

'): - content = content.replace('

"', '

') - content = content.replace('"

', '

') + replacements = { + '

"': '

', + '"

': '

' + } + content = replace_strings(content, replacements) if '>\u201c' in content: if '\u201d<' in content: if content.count('>\u201c') == content.count('\u201d<'): - content = content.replace('>\u201c', '>
') - content = content.replace('\u201d<', '
<') + replacements = { + '>\u201c': '>
', + '\u201d<': '
<' + } + content = replace_strings(content, replacements) # replace email style quote if '>> ' not in content: return content @@ -161,8 +168,11 @@ def html_replace_email_quote(content: str) -> str: continue if '>> ' not in line_str: if line_str.startswith('> '): - line_str = line_str.replace('> ', '
') - line_str = line_str.replace('>', '
') + replacements = { + '> ': '
', + '>': '
' + } + line_str = replace_strings(line_str, replacements) new_content += '

' + line_str + '

' else: new_content += '

' + line_str + '

' @@ -1194,8 +1204,12 @@ def _get_simplified_content(content: str) -> str: """Returns a simplified version of the content suitable for splitting up into individual words """ - content_simplified = \ - content.replace(',', ' ').replace(';', ' ').replace('- ', ' ') + replacements = { + ',': ' ', + ';': ' ', + '- ': ' ' + } + content_simplified = replace_strings(content, replacements) content_simplified = content_simplified.replace('. ', ' ').strip() if content_simplified.endswith('.'): content_simplified = content_simplified[:len(content_simplified)-1] @@ -1338,8 +1352,11 @@ def add_html_tags(base_dir: str, http_prefix: str, content = html_replace_email_quote(content) return html_replace_quote_marks(content) max_word_length = 40 - content = content.replace('\r', '') - content = content.replace('\n', ' --linebreak-- ') + replacements = { + '\r': '', + '\n': ' --linebreak-- ' + } + content = replace_strings(content, replacements) now_playing_str = 'NowPlaying' if translate.get(now_playing_str): now_playing_str = translate[now_playing_str] @@ -1712,15 +1729,18 @@ def combine_textarea_lines(text: str) -> str: result = '' ctr = 0 paragraphs = text.split('\n\n') + replacements = { + '\n* ': '***BULLET POINT*** ', + '\n * ': '***BULLET POINT*** ', + '\n- ': '***DASH POINT*** ', + '\n - ': '***DASH POINT*** ', + '\n': ' ', + ' ': ' ', + '***BULLET POINT*** ': '\n* ', + '***DASH POINT*** ': '\n- ' + } for para in paragraphs: - para = para.replace('\n* ', '***BULLET POINT*** ') - para = para.replace('\n * ', '***BULLET POINT*** ') - para = para.replace('\n- ', '***DASH POINT*** ') - para = para.replace('\n - ', '***DASH POINT*** ') - para = para.replace('\n', ' ') - para = para.replace(' ', ' ') - para = para.replace('***BULLET POINT*** ', '\n* ') - para = para.replace('***DASH POINT*** ', '\n- ') + para = replace_strings(para, replacements) if ctr > 0: result += '

' result += para diff --git a/daemon_post.py b/daemon_post.py index 35e78fff0..f55dc47a4 100644 --- a/daemon_post.py +++ b/daemon_post.py @@ -11,6 +11,7 @@ import time import errno import json from socket import error as SocketError +from utils import replace_strings from utils import corp_servers from utils import string_ends_with from utils import get_config_param @@ -204,12 +205,15 @@ def daemon_http_post(self) -> None: # remove any trailing slashes from the path if not self.path.endswith('confirm'): - self.path = self.path.replace('/outbox/', '/outbox') - self.path = self.path.replace('/tlblogs/', '/tlblogs') - self.path = self.path.replace('/inbox/', '/inbox') - self.path = self.path.replace('/shares/', '/shares') - self.path = self.path.replace('/wanted/', '/wanted') - self.path = self.path.replace('/sharedInbox/', '/sharedInbox') + replacements = { + '/outbox/': '/outbox', + '/tlblogs/': '/tlblogs', + '/inbox/': '/inbox', + '/shares/': '/shares', + '/wanted/': '/wanted', + '/sharedInbox/': '/sharedInbox' + } + self.path = replace_strings(self.path, replacements) if self.path == '/inbox': if not self.server.enable_shared_inbox: diff --git a/desktop_client.py b/desktop_client.py index 86b8ec9ce..55e6597c7 100644 --- a/desktop_client.py +++ b/desktop_client.py @@ -16,6 +16,7 @@ import webbrowser import urllib.parse from pathlib import Path from random import randint +from utils import replace_strings from utils import get_post_attachments from utils import get_url_from_post from utils import get_actor_languages_list @@ -1996,9 +1997,12 @@ def run_desktop_client(base_dir: str, proxy_type: str, http_prefix: str, command_str.startswith('direct message ') or \ command_str.startswith('post ') or \ command_str.startswith('send '): - command_str = command_str.replace(' to ', ' ') - command_str = command_str.replace(' dm ', ' ') - command_str = command_str.replace(' DM ', ' ') + replacements = { + ' to ': ' ', + ' dm ': ' ', + ' DM ': ' ' + } + command_str = replace_strings(command_str, replacements) # direct message to_handle = None if command_str.startswith('post '): diff --git a/epicyon.py b/epicyon.py index 16b098908..35117c9b4 100644 --- a/epicyon.py +++ b/epicyon.py @@ -74,6 +74,7 @@ from tests import test_update_actor from tests import run_all_tests from auth import store_basic_credentials from auth import create_password +from utils import replace_strings from utils import set_accounts_data_dir from utils import data_dir from utils import data_dir_testing @@ -2914,15 +2915,18 @@ def _command_options() -> None: if not person_url: person_url = get_user_url(wf_request, 0, argb.debug) if nickname == domain: - person_url = person_url.replace('/users/', '/actor/') - person_url = person_url.replace('/accounts/', '/actor/') - person_url = person_url.replace('/channel/', '/actor/') - person_url = person_url.replace('/profile/', '/actor/') - person_url = person_url.replace('/author/', '/actor/') - person_url = person_url.replace('/u/', '/actor/') - person_url = person_url.replace('/fediverse/blog/', '/actor/') - person_url = person_url.replace('/c/', '/actor/') - person_url = person_url.replace('/m/', '/actor/') + replacements = { + '/users/': '/actor/', + '/accounts/': '/actor/', + '/channel/': '/actor/', + '/profile/': '/actor/', + '/author/': '/actor/', + '/u/': '/actor/', + '/fediverse/blog/': '/actor/', + '/c/': '/actor/', + '/m/': '/actor/' + } + person_url = replace_strings(person_url, replacements) if not person_url: # try single user instance person_url = http_prefix + '://' + domain diff --git a/happening.py b/happening.py index f4cb7c435..05a47fddf 100644 --- a/happening.py +++ b/happening.py @@ -12,6 +12,7 @@ from uuid import UUID from hashlib import md5 from datetime import datetime from datetime import timedelta +from utils import replace_strings from utils import date_from_numbers from utils import date_from_string_format from utils import acct_handle_dir @@ -405,9 +406,13 @@ def get_todays_events(base_dir: str, nickname: str, domain: str, def _ical_date_string(date_str: str) -> str: """Returns an icalendar formatted date """ - date_str = date_str.replace('-', '') - date_str = date_str.replace(':', '') - return date_str.replace(' ', '') + replacements = { + '-': '', + ':': '', + ' ': '' + } + date_str = replace_strings(date_str, replacements) + return date_str def _dav_encode_token(year: int, month_number: int, diff --git a/posts.py b/posts.py index 92dfd8f96..0808eb89b 100644 --- a/posts.py +++ b/posts.py @@ -34,6 +34,7 @@ from webfinger import webfinger_handle from httpsig import create_signed_header from siteactive import site_is_active from languages import understood_post_language +from utils import replace_strings from utils import valid_content_warning from utils import get_actor_from_post_id from utils import string_contains @@ -4684,8 +4685,11 @@ def _create_box_items(base_dir: str, # Why are url's hashed? Since storage is in the filesystem this avoids # confusion with directories by not using the / character if first_post_id: - first_post_id = first_post_id.replace('--', '#') - first_post_id = first_post_id.replace('/', '#') + replacements = { + '--': '#', + '/': '#' + } + first_post_id = replace_strings(first_post_id, replacements) try: with open(index_filename, 'r', encoding='utf-8') as fp_index: diff --git a/shares.py b/shares.py index e00e51e4c..530645d0e 100644 --- a/shares.py +++ b/shares.py @@ -23,6 +23,7 @@ from session import post_json from session import post_image from session import create_session from session import get_json_valid +from utils import replace_strings from utils import data_dir from utils import resembles_url from utils import date_utcnow @@ -125,10 +126,16 @@ def _get_valid_shared_item_id(actor: str, display_name: str) -> str: remove_chars2 = ('+', '/', '\\', '?', '&') for char in remove_chars2: display_name = display_name.replace(char, '-') - display_name = display_name.replace('.', '_') - display_name = display_name.replace("’", "'") - actor = actor.replace('://', '___') - actor = actor.replace('/', '--') + replacements = { + '.': '_', + "’": "'" + } + display_name = replace_strings(display_name, replacements) + replacements2 = { + '://': '___', + '/': '--' + } + actor = replace_strings(actor, replacements2) return actor + '--shareditems--' + display_name @@ -227,8 +234,11 @@ def _getshare_dfc_id(base_dir: str, system_language: str, matched_product_type = \ _dfc_product_type_from_category(base_dir, item_category, translate) if not matched_product_type: - item_type = item_type.replace(' ', '_') - item_type = item_type.replace('.', '') + replacements = { + ' ': '_', + '.': '' + } + item_type = replace_strings(item_type, replacements) return 'epicyon#' + item_type if not dfc_ids: dfc_ids = _load_dfc_ids(base_dir, system_language, diff --git a/utils.py b/utils.py index e3d775b4c..8f21e64d3 100644 --- a/utils.py +++ b/utils.py @@ -458,16 +458,18 @@ def get_content_from_post(post_json_object: {}, system_language: str, not has_contentmap_dict: return '' content = '' + replacements = { + '&': '&', + '': '', + '': '' + } if has_contentmap_dict: if this_post_json[map_dict].get(system_language): sys_lang = this_post_json[map_dict][system_language] if isinstance(sys_lang, str): content = sys_lang content = remove_markup_tag(content, 'pre') - content = content.replace('&', '&') - # remove underlines - content = content.replace('', '') - content = content.replace('', '') + content = replace_strings(content, replacements) return standardize_text(content) else: # is there a contentMap/summaryMap entry for one of @@ -480,18 +482,12 @@ def get_content_from_post(post_json_object: {}, system_language: str, continue content = map_lang content = remove_markup_tag(content, 'pre') - content = content.replace('&', '&') - # remove underlines - content = content.replace('', '') - content = content.replace('', '') + content = replace_strings(content, replacements) return standardize_text(content) else: if isinstance(this_post_json[content_type], str): content = this_post_json[content_type] - content = content.replace('&', '&') - # remove underlines - content = content.replace('', '') - content = content.replace('', '') + content = replace_strings(content, replacements) content = remove_markup_tag(content, 'pre') return standardize_text(content) @@ -1010,9 +1006,14 @@ def remove_html(content: str) -> str: if '<' not in content: return content removing = False - content = content.replace('', '"').replace('', '"') - content = content.replace('

', '\n\n').replace('
', '\n') + replacements = { + '
': '"', + '': '"', + '

': '\n\n', + '
': '\n' + } + content = replace_strings(content, replacements) result = '' for char in content: if char == '<': @@ -5656,3 +5657,11 @@ def get_watermark_file(base_dir: str, watermark_file, watermark_filename = \ get_image_file(base_dir, 'watermark_image', account_dir, '') return watermark_file, watermark_filename + + +def replace_strings(text: str, replacements: {}) -> str: + """Does a series of string replacements + """ + for orig_str, new_str in replacements.items(): + text = text.replace(orig_str, new_str) + return text diff --git a/webapp_utils.py b/webapp_utils.py index 677851600..31dea5b0f 100644 --- a/webapp_utils.py +++ b/webapp_utils.py @@ -12,6 +12,7 @@ from shutil import copyfile from collections import OrderedDict from session import get_json from session import get_json_valid +from utils import replace_strings from utils import get_image_file from utils import data_dir from utils import string_contains @@ -159,10 +160,13 @@ def csv_following_list(following_filename: str, following_address) if person_notes: # make notes suitable for csv file - person_notes = person_notes.replace(',', ' ') - person_notes = person_notes.replace('"', "'") - person_notes = person_notes.replace('\n', '
') - person_notes = person_notes.replace(' ', ' ') + replacements = { + ',': ' ', + '"': "'", + '\n': '
', + ' ': ' ' + } + person_notes = replace_strings(person_notes, replacements) if not following_list_csv: following_list_csv = \ 'Account address,Show boosts,' + \ @@ -538,8 +542,12 @@ def shares_timeline_json(actor: str, page_number: int, items_per_page: int, if '--shareditems--' not in item_id: continue share_actor = item_id.split('--shareditems--')[0] - share_actor = share_actor.replace('___', '://') - share_actor = share_actor.replace('--', '/') + replacements = { + '___': '://', + '--': '/' + } + share_actor = \ + replace_strings(share_actor, replacements) share_nickname = get_nickname_from_actor(share_actor) if not share_nickname: continue @@ -1092,14 +1100,18 @@ def add_emoji_to_display_name(session, base_dir: str, http_prefix: str, if ':' not in display_name: return display_name - display_name = display_name.replace('

', '').replace('

', '') + replacements = { + '

': '', + '

': '' + } + display_name = replace_strings(display_name, replacements) emoji_tags = {} # print('TAG: display_name before tags: ' + display_name) display_name = \ add_html_tags(base_dir, http_prefix, nickname, domain, display_name, [], emoji_tags, translate) - display_name = display_name.replace('

', '').replace('

', '') + display_name = replace_strings(display_name, replacements) # print('TAG: display_name after tags: ' + display_name) # convert the emoji dictionary to a list emoji_tags_list = [] @@ -2007,7 +2019,11 @@ def html_show_share(base_dir: str, domain: str, nickname: str, """ shares_json = None - share_url = item_id.replace('___', '://').replace('--', '/') + replacements = { + '___': '://', + '--': '/' + } + share_url = replace_strings(item_id, replacements) contact_nickname = get_nickname_from_actor(share_url) if not contact_nickname: return None