Replacing multiple strings

main
Bob Mottram 2024-08-08 18:23:33 +01:00
parent 06673b61c7
commit d2251eb173
11 changed files with 171 additions and 80 deletions

19
blog.py
View File

@ -16,6 +16,7 @@ from webapp_utils import html_footer
from webapp_utils import get_post_attachments_as_html from webapp_utils import get_post_attachments_as_html
from webapp_utils import edit_text_area from webapp_utils import edit_text_area
from webapp_media import add_embedded_elements from webapp_media import add_embedded_elements
from utils import replace_strings
from utils import data_dir from utils import data_dir
from utils import remove_link_tracking from utils import remove_link_tracking
from utils import get_url_from_post from utils import get_url_from_post
@ -164,8 +165,11 @@ def _get_blog_replies(base_dir: str, http_prefix: str, translate: {},
replies_str = '' replies_str = ''
for reply_post_id in lines: for reply_post_id in lines:
reply_post_id = remove_eol(reply_post_id) reply_post_id = remove_eol(reply_post_id)
reply_post_id = reply_post_id.replace('.json', '') replacements = {
reply_post_id = reply_post_id.replace('.replies', '') '.json': '',
'.replies': ''
}
reply_post_id = replace_strings(reply_post_id, replacements)
post_filename = acct_dir(base_dir, nickname, domain) + \ post_filename = acct_dir(base_dir, nickname, domain) + \
'/postcache/' + \ '/postcache/' + \
reply_post_id.replace('/', '#') + '.html' reply_post_id.replace('/', '#') + '.html'
@ -438,10 +442,13 @@ def _html_blog_remove_cw_button(blog_str: str, translate: {}) -> str:
"""Removes the CW button from blog posts, where the """Removes the CW button from blog posts, where the
summary field is instead used as the blog title summary field is instead used as the blog title
""" """
blog_str = blog_str.replace('<details>', '<b>') replacements = {
blog_str = blog_str.replace('</details>', '</b>') '<details>': '<b>',
blog_str = blog_str.replace('<summary>', '') '</details>': '</b>',
blog_str = blog_str.replace('</summary>', '') '<summary>': '',
'</summary>': ''
}
blog_str = replace_strings(blog_str, replacements)
blog_str = blog_str.replace(translate['SHOW MORE'], '') blog_str = blog_str.replace(translate['SHOW MORE'], '')
return blog_str return blog_str

View File

@ -12,6 +12,7 @@ import datetime
from utils import data_dir from utils import data_dir
from utils import date_utcnow from utils import date_utcnow
from utils import date_epoch from utils import date_epoch
from utils import replace_strings
MAX_TAG_LENGTH = 42 MAX_TAG_LENGTH = 42
@ -51,6 +52,14 @@ def load_city_hashtags(base_dir: str, translate: {}) -> None:
if translate.get(category_str): if translate.get(category_str):
category_str = translate[category_str] category_str = translate[category_str]
replacements = {
' & ': ' and ',
'/': ''
}
replacements2 = {
'-': '',
' ': ''
}
for _, _, files in os.walk(base_dir + '/data/cities'): for _, _, files in os.walk(base_dir + '/data/cities'):
for cities_file in files: for cities_file in files:
if not cities_file.endswith('.txt'): if not cities_file.endswith('.txt'):
@ -68,10 +77,9 @@ def load_city_hashtags(base_dir: str, translate: {}) -> None:
continue continue
for hashtag in cities: for hashtag in cities:
hashtag = hashtag.lower().strip() hashtag = hashtag.lower().strip()
hashtag = hashtag.replace(' & ', ' and ') hashtag = replace_strings(hashtag, replacements)
hashtag = hashtag.replace('/', '')
hashtag2 = hashtag.replace('-', '').replace(' ', '') hashtag2 = replace_strings(hashtag, replacements2)
city_filename = base_dir + '/tags/' + hashtag2 + '.category' city_filename = base_dir + '/tags/' + hashtag2 + '.category'
if not os.path.isfile(city_filename): if not os.path.isfile(city_filename):
try: try:

View File

@ -15,6 +15,7 @@ import email.parser
import urllib.parse import urllib.parse
from shutil import copyfile from shutil import copyfile
from dateutil.parser import parse from dateutil.parser import parse
from utils import replace_strings
from utils import data_dir from utils import data_dir
from utils import remove_link_tracking from utils import remove_link_tracking
from utils import string_contains from utils import string_contains
@ -143,13 +144,19 @@ def html_replace_email_quote(content: str) -> str:
if '<p>&quot;' in content: if '<p>&quot;' in content:
if '&quot;</p>' in content: if '&quot;</p>' in content:
if content.count('<p>&quot;') == content.count('&quot;</p>'): if content.count('<p>&quot;') == content.count('&quot;</p>'):
content = content.replace('<p>&quot;', '<p><blockquote>') replacements = {
content = content.replace('&quot;</p>', '</blockquote></p>') '<p>&quot;': '<p><blockquote>',
'&quot;</p>': '</blockquote></p>'
}
content = replace_strings(content, replacements)
if '>\u201c' in content: if '>\u201c' in content:
if '\u201d<' in content: if '\u201d<' in content:
if content.count('>\u201c') == content.count('\u201d<'): if content.count('>\u201c') == content.count('\u201d<'):
content = content.replace('>\u201c', '><blockquote>') replacements = {
content = content.replace('\u201d<', '</blockquote><') '>\u201c': '><blockquote>',
'\u201d<': '</blockquote><'
}
content = replace_strings(content, replacements)
# replace email style quote # replace email style quote
if '>&gt; ' not in content: if '>&gt; ' not in content:
return content return content
@ -161,8 +168,11 @@ def html_replace_email_quote(content: str) -> str:
continue continue
if '>&gt; ' not in line_str: if '>&gt; ' not in line_str:
if line_str.startswith('&gt; '): if line_str.startswith('&gt; '):
line_str = line_str.replace('&gt; ', '<blockquote>') replacements = {
line_str = line_str.replace('&gt;', '<br>') '&gt; ': '<blockquote>',
'&gt;': '<br>'
}
line_str = replace_strings(line_str, replacements)
new_content += '<p>' + line_str + '</blockquote></p>' new_content += '<p>' + line_str + '</blockquote></p>'
else: else:
new_content += '<p>' + line_str + '</p>' new_content += '<p>' + line_str + '</p>'
@ -1194,8 +1204,12 @@ def _get_simplified_content(content: str) -> str:
"""Returns a simplified version of the content suitable for """Returns a simplified version of the content suitable for
splitting up into individual words splitting up into individual words
""" """
content_simplified = \ replacements = {
content.replace(',', ' ').replace(';', ' ').replace('- ', ' ') ',': ' ',
';': ' ',
'- ': ' '
}
content_simplified = replace_strings(content, replacements)
content_simplified = content_simplified.replace('. ', ' ').strip() content_simplified = content_simplified.replace('. ', ' ').strip()
if content_simplified.endswith('.'): if content_simplified.endswith('.'):
content_simplified = content_simplified[:len(content_simplified)-1] content_simplified = content_simplified[:len(content_simplified)-1]
@ -1338,8 +1352,11 @@ def add_html_tags(base_dir: str, http_prefix: str,
content = html_replace_email_quote(content) content = html_replace_email_quote(content)
return html_replace_quote_marks(content) return html_replace_quote_marks(content)
max_word_length = 40 max_word_length = 40
content = content.replace('\r', '') replacements = {
content = content.replace('\n', ' --linebreak-- ') '\r': '',
'\n': ' --linebreak-- '
}
content = replace_strings(content, replacements)
now_playing_str = 'NowPlaying' now_playing_str = 'NowPlaying'
if translate.get(now_playing_str): if translate.get(now_playing_str):
now_playing_str = translate[now_playing_str] now_playing_str = translate[now_playing_str]
@ -1712,15 +1729,18 @@ def combine_textarea_lines(text: str) -> str:
result = '' result = ''
ctr = 0 ctr = 0
paragraphs = text.split('\n\n') paragraphs = text.split('\n\n')
replacements = {
'\n* ': '***BULLET POINT*** ',
'\n * ': '***BULLET POINT*** ',
'\n- ': '***DASH POINT*** ',
'\n - ': '***DASH POINT*** ',
'\n': ' ',
' ': ' ',
'***BULLET POINT*** ': '\n* ',
'***DASH POINT*** ': '\n- '
}
for para in paragraphs: for para in paragraphs:
para = para.replace('\n* ', '***BULLET POINT*** ') para = replace_strings(para, replacements)
para = para.replace('\n * ', '***BULLET POINT*** ')
para = para.replace('\n- ', '***DASH POINT*** ')
para = para.replace('\n - ', '***DASH POINT*** ')
para = para.replace('\n', ' ')
para = para.replace(' ', ' ')
para = para.replace('***BULLET POINT*** ', '\n* ')
para = para.replace('***DASH POINT*** ', '\n- ')
if ctr > 0: if ctr > 0:
result += '</p><p>' result += '</p><p>'
result += para result += para

View File

@ -11,6 +11,7 @@ import time
import errno import errno
import json import json
from socket import error as SocketError from socket import error as SocketError
from utils import replace_strings
from utils import corp_servers from utils import corp_servers
from utils import string_ends_with from utils import string_ends_with
from utils import get_config_param from utils import get_config_param
@ -204,12 +205,15 @@ def daemon_http_post(self) -> None:
# remove any trailing slashes from the path # remove any trailing slashes from the path
if not self.path.endswith('confirm'): if not self.path.endswith('confirm'):
self.path = self.path.replace('/outbox/', '/outbox') replacements = {
self.path = self.path.replace('/tlblogs/', '/tlblogs') '/outbox/': '/outbox',
self.path = self.path.replace('/inbox/', '/inbox') '/tlblogs/': '/tlblogs',
self.path = self.path.replace('/shares/', '/shares') '/inbox/': '/inbox',
self.path = self.path.replace('/wanted/', '/wanted') '/shares/': '/shares',
self.path = self.path.replace('/sharedInbox/', '/sharedInbox') '/wanted/': '/wanted',
'/sharedInbox/': '/sharedInbox'
}
self.path = replace_strings(self.path, replacements)
if self.path == '/inbox': if self.path == '/inbox':
if not self.server.enable_shared_inbox: if not self.server.enable_shared_inbox:

View File

@ -16,6 +16,7 @@ import webbrowser
import urllib.parse import urllib.parse
from pathlib import Path from pathlib import Path
from random import randint from random import randint
from utils import replace_strings
from utils import get_post_attachments from utils import get_post_attachments
from utils import get_url_from_post from utils import get_url_from_post
from utils import get_actor_languages_list from utils import get_actor_languages_list
@ -1996,9 +1997,12 @@ def run_desktop_client(base_dir: str, proxy_type: str, http_prefix: str,
command_str.startswith('direct message ') or \ command_str.startswith('direct message ') or \
command_str.startswith('post ') or \ command_str.startswith('post ') or \
command_str.startswith('send '): command_str.startswith('send '):
command_str = command_str.replace(' to ', ' ') replacements = {
command_str = command_str.replace(' dm ', ' ') ' to ': ' ',
command_str = command_str.replace(' DM ', ' ') ' dm ': ' ',
' DM ': ' '
}
command_str = replace_strings(command_str, replacements)
# direct message # direct message
to_handle = None to_handle = None
if command_str.startswith('post '): if command_str.startswith('post '):

View File

@ -74,6 +74,7 @@ from tests import test_update_actor
from tests import run_all_tests from tests import run_all_tests
from auth import store_basic_credentials from auth import store_basic_credentials
from auth import create_password from auth import create_password
from utils import replace_strings
from utils import set_accounts_data_dir from utils import set_accounts_data_dir
from utils import data_dir from utils import data_dir
from utils import data_dir_testing from utils import data_dir_testing
@ -2914,15 +2915,18 @@ def _command_options() -> None:
if not person_url: if not person_url:
person_url = get_user_url(wf_request, 0, argb.debug) person_url = get_user_url(wf_request, 0, argb.debug)
if nickname == domain: if nickname == domain:
person_url = person_url.replace('/users/', '/actor/') replacements = {
person_url = person_url.replace('/accounts/', '/actor/') '/users/': '/actor/',
person_url = person_url.replace('/channel/', '/actor/') '/accounts/': '/actor/',
person_url = person_url.replace('/profile/', '/actor/') '/channel/': '/actor/',
person_url = person_url.replace('/author/', '/actor/') '/profile/': '/actor/',
person_url = person_url.replace('/u/', '/actor/') '/author/': '/actor/',
person_url = person_url.replace('/fediverse/blog/', '/actor/') '/u/': '/actor/',
person_url = person_url.replace('/c/', '/actor/') '/fediverse/blog/': '/actor/',
person_url = person_url.replace('/m/', '/actor/') '/c/': '/actor/',
'/m/': '/actor/'
}
person_url = replace_strings(person_url, replacements)
if not person_url: if not person_url:
# try single user instance # try single user instance
person_url = http_prefix + '://' + domain person_url = http_prefix + '://' + domain

View File

@ -12,6 +12,7 @@ from uuid import UUID
from hashlib import md5 from hashlib import md5
from datetime import datetime from datetime import datetime
from datetime import timedelta from datetime import timedelta
from utils import replace_strings
from utils import date_from_numbers from utils import date_from_numbers
from utils import date_from_string_format from utils import date_from_string_format
from utils import acct_handle_dir from utils import acct_handle_dir
@ -405,9 +406,13 @@ def get_todays_events(base_dir: str, nickname: str, domain: str,
def _ical_date_string(date_str: str) -> str: def _ical_date_string(date_str: str) -> str:
"""Returns an icalendar formatted date """Returns an icalendar formatted date
""" """
date_str = date_str.replace('-', '') replacements = {
date_str = date_str.replace(':', '') '-': '',
return date_str.replace(' ', '') ':': '',
' ': ''
}
date_str = replace_strings(date_str, replacements)
return date_str
def _dav_encode_token(year: int, month_number: int, def _dav_encode_token(year: int, month_number: int,

View File

@ -34,6 +34,7 @@ from webfinger import webfinger_handle
from httpsig import create_signed_header from httpsig import create_signed_header
from siteactive import site_is_active from siteactive import site_is_active
from languages import understood_post_language from languages import understood_post_language
from utils import replace_strings
from utils import valid_content_warning from utils import valid_content_warning
from utils import get_actor_from_post_id from utils import get_actor_from_post_id
from utils import string_contains from utils import string_contains
@ -4684,8 +4685,11 @@ def _create_box_items(base_dir: str,
# Why are url's hashed? Since storage is in the filesystem this avoids # Why are url's hashed? Since storage is in the filesystem this avoids
# confusion with directories by not using the / character # confusion with directories by not using the / character
if first_post_id: if first_post_id:
first_post_id = first_post_id.replace('--', '#') replacements = {
first_post_id = first_post_id.replace('/', '#') '--': '#',
'/': '#'
}
first_post_id = replace_strings(first_post_id, replacements)
try: try:
with open(index_filename, 'r', encoding='utf-8') as fp_index: with open(index_filename, 'r', encoding='utf-8') as fp_index:

View File

@ -23,6 +23,7 @@ from session import post_json
from session import post_image from session import post_image
from session import create_session from session import create_session
from session import get_json_valid from session import get_json_valid
from utils import replace_strings
from utils import data_dir from utils import data_dir
from utils import resembles_url from utils import resembles_url
from utils import date_utcnow from utils import date_utcnow
@ -125,10 +126,16 @@ def _get_valid_shared_item_id(actor: str, display_name: str) -> str:
remove_chars2 = ('+', '/', '\\', '?', '&') remove_chars2 = ('+', '/', '\\', '?', '&')
for char in remove_chars2: for char in remove_chars2:
display_name = display_name.replace(char, '-') display_name = display_name.replace(char, '-')
display_name = display_name.replace('.', '_') replacements = {
display_name = display_name.replace("", "'") '.': '_',
actor = actor.replace('://', '___') "": "'"
actor = actor.replace('/', '--') }
display_name = replace_strings(display_name, replacements)
replacements2 = {
'://': '___',
'/': '--'
}
actor = replace_strings(actor, replacements2)
return actor + '--shareditems--' + display_name return actor + '--shareditems--' + display_name
@ -227,8 +234,11 @@ def _getshare_dfc_id(base_dir: str, system_language: str,
matched_product_type = \ matched_product_type = \
_dfc_product_type_from_category(base_dir, item_category, translate) _dfc_product_type_from_category(base_dir, item_category, translate)
if not matched_product_type: if not matched_product_type:
item_type = item_type.replace(' ', '_') replacements = {
item_type = item_type.replace('.', '') ' ': '_',
'.': ''
}
item_type = replace_strings(item_type, replacements)
return 'epicyon#' + item_type return 'epicyon#' + item_type
if not dfc_ids: if not dfc_ids:
dfc_ids = _load_dfc_ids(base_dir, system_language, dfc_ids = _load_dfc_ids(base_dir, system_language,

View File

@ -458,16 +458,18 @@ def get_content_from_post(post_json_object: {}, system_language: str,
not has_contentmap_dict: not has_contentmap_dict:
return '' return ''
content = '' content = ''
replacements = {
'&amp;': '&',
'<u>': '',
'</u>': ''
}
if has_contentmap_dict: if has_contentmap_dict:
if this_post_json[map_dict].get(system_language): if this_post_json[map_dict].get(system_language):
sys_lang = this_post_json[map_dict][system_language] sys_lang = this_post_json[map_dict][system_language]
if isinstance(sys_lang, str): if isinstance(sys_lang, str):
content = sys_lang content = sys_lang
content = remove_markup_tag(content, 'pre') content = remove_markup_tag(content, 'pre')
content = content.replace('&amp;', '&') content = replace_strings(content, replacements)
# remove underlines
content = content.replace('<u>', '')
content = content.replace('</u>', '')
return standardize_text(content) return standardize_text(content)
else: else:
# is there a contentMap/summaryMap entry for one of # is there a contentMap/summaryMap entry for one of
@ -480,18 +482,12 @@ def get_content_from_post(post_json_object: {}, system_language: str,
continue continue
content = map_lang content = map_lang
content = remove_markup_tag(content, 'pre') content = remove_markup_tag(content, 'pre')
content = content.replace('&amp;', '&') content = replace_strings(content, replacements)
# remove underlines
content = content.replace('<u>', '')
content = content.replace('</u>', '')
return standardize_text(content) return standardize_text(content)
else: else:
if isinstance(this_post_json[content_type], str): if isinstance(this_post_json[content_type], str):
content = this_post_json[content_type] content = this_post_json[content_type]
content = content.replace('&amp;', '&') content = replace_strings(content, replacements)
# remove underlines
content = content.replace('<u>', '')
content = content.replace('</u>', '')
content = remove_markup_tag(content, 'pre') content = remove_markup_tag(content, 'pre')
return standardize_text(content) return standardize_text(content)
@ -1010,9 +1006,14 @@ def remove_html(content: str) -> str:
if '<' not in content: if '<' not in content:
return content return content
removing = False removing = False
content = content.replace('<a href', ' <a href') replacements = {
content = content.replace('<q>', '"').replace('</q>', '"') '<a href': ' <a href',
content = content.replace('</p>', '\n\n').replace('<br>', '\n') '<q>': '"',
'</q>': '"',
'</p>': '\n\n',
'<br>': '\n'
}
content = replace_strings(content, replacements)
result = '' result = ''
for char in content: for char in content:
if char == '<': if char == '<':
@ -5656,3 +5657,11 @@ def get_watermark_file(base_dir: str,
watermark_file, watermark_filename = \ watermark_file, watermark_filename = \
get_image_file(base_dir, 'watermark_image', account_dir, '') get_image_file(base_dir, 'watermark_image', account_dir, '')
return watermark_file, watermark_filename return watermark_file, watermark_filename
def replace_strings(text: str, replacements: {}) -> str:
"""Does a series of string replacements
"""
for orig_str, new_str in replacements.items():
text = text.replace(orig_str, new_str)
return text

View File

@ -12,6 +12,7 @@ from shutil import copyfile
from collections import OrderedDict from collections import OrderedDict
from session import get_json from session import get_json
from session import get_json_valid from session import get_json_valid
from utils import replace_strings
from utils import get_image_file from utils import get_image_file
from utils import data_dir from utils import data_dir
from utils import string_contains from utils import string_contains
@ -159,10 +160,13 @@ def csv_following_list(following_filename: str,
following_address) following_address)
if person_notes: if person_notes:
# make notes suitable for csv file # make notes suitable for csv file
person_notes = person_notes.replace(',', ' ') replacements = {
person_notes = person_notes.replace('"', "'") ',': ' ',
person_notes = person_notes.replace('\n', '<br>') '"': "'",
person_notes = person_notes.replace(' ', ' ') '\n': '<br>',
' ': ' '
}
person_notes = replace_strings(person_notes, replacements)
if not following_list_csv: if not following_list_csv:
following_list_csv = \ following_list_csv = \
'Account address,Show boosts,' + \ 'Account address,Show boosts,' + \
@ -538,8 +542,12 @@ def shares_timeline_json(actor: str, page_number: int, items_per_page: int,
if '--shareditems--' not in item_id: if '--shareditems--' not in item_id:
continue continue
share_actor = item_id.split('--shareditems--')[0] share_actor = item_id.split('--shareditems--')[0]
share_actor = share_actor.replace('___', '://') replacements = {
share_actor = share_actor.replace('--', '/') '___': '://',
'--': '/'
}
share_actor = \
replace_strings(share_actor, replacements)
share_nickname = get_nickname_from_actor(share_actor) share_nickname = get_nickname_from_actor(share_actor)
if not share_nickname: if not share_nickname:
continue continue
@ -1092,14 +1100,18 @@ def add_emoji_to_display_name(session, base_dir: str, http_prefix: str,
if ':' not in display_name: if ':' not in display_name:
return display_name return display_name
display_name = display_name.replace('<p>', '').replace('</p>', '') replacements = {
'<p>': '',
'</p>': ''
}
display_name = replace_strings(display_name, replacements)
emoji_tags = {} emoji_tags = {}
# print('TAG: display_name before tags: ' + display_name) # print('TAG: display_name before tags: ' + display_name)
display_name = \ display_name = \
add_html_tags(base_dir, http_prefix, add_html_tags(base_dir, http_prefix,
nickname, domain, display_name, [], nickname, domain, display_name, [],
emoji_tags, translate) emoji_tags, translate)
display_name = display_name.replace('<p>', '').replace('</p>', '') display_name = replace_strings(display_name, replacements)
# print('TAG: display_name after tags: ' + display_name) # print('TAG: display_name after tags: ' + display_name)
# convert the emoji dictionary to a list # convert the emoji dictionary to a list
emoji_tags_list = [] emoji_tags_list = []
@ -2007,7 +2019,11 @@ def html_show_share(base_dir: str, domain: str, nickname: str,
""" """
shares_json = None shares_json = None
share_url = item_id.replace('___', '://').replace('--', '/') replacements = {
'___': '://',
'--': '/'
}
share_url = replace_strings(item_id, replacements)
contact_nickname = get_nickname_from_actor(share_url) contact_nickname = get_nickname_from_actor(share_url)
if not contact_nickname: if not contact_nickname:
return None return None