Replacing multiple strings

main
Bob Mottram 2024-08-08 18:23:33 +01:00
parent 06673b61c7
commit d2251eb173
11 changed files with 171 additions and 80 deletions

19
blog.py
View File

@ -16,6 +16,7 @@ from webapp_utils import html_footer
from webapp_utils import get_post_attachments_as_html
from webapp_utils import edit_text_area
from webapp_media import add_embedded_elements
from utils import replace_strings
from utils import data_dir
from utils import remove_link_tracking
from utils import get_url_from_post
@ -164,8 +165,11 @@ def _get_blog_replies(base_dir: str, http_prefix: str, translate: {},
replies_str = ''
for reply_post_id in lines:
reply_post_id = remove_eol(reply_post_id)
reply_post_id = reply_post_id.replace('.json', '')
reply_post_id = reply_post_id.replace('.replies', '')
replacements = {
'.json': '',
'.replies': ''
}
reply_post_id = replace_strings(reply_post_id, replacements)
post_filename = acct_dir(base_dir, nickname, domain) + \
'/postcache/' + \
reply_post_id.replace('/', '#') + '.html'
@ -438,10 +442,13 @@ def _html_blog_remove_cw_button(blog_str: str, translate: {}) -> str:
"""Removes the CW button from blog posts, where the
summary field is instead used as the blog title
"""
blog_str = blog_str.replace('<details>', '<b>')
blog_str = blog_str.replace('</details>', '</b>')
blog_str = blog_str.replace('<summary>', '')
blog_str = blog_str.replace('</summary>', '')
replacements = {
'<details>': '<b>',
'</details>': '</b>',
'<summary>': '',
'</summary>': ''
}
blog_str = replace_strings(blog_str, replacements)
blog_str = blog_str.replace(translate['SHOW MORE'], '')
return blog_str

View File

@ -12,6 +12,7 @@ import datetime
from utils import data_dir
from utils import date_utcnow
from utils import date_epoch
from utils import replace_strings
MAX_TAG_LENGTH = 42
@ -51,6 +52,14 @@ def load_city_hashtags(base_dir: str, translate: {}) -> None:
if translate.get(category_str):
category_str = translate[category_str]
replacements = {
' & ': ' and ',
'/': ''
}
replacements2 = {
'-': '',
' ': ''
}
for _, _, files in os.walk(base_dir + '/data/cities'):
for cities_file in files:
if not cities_file.endswith('.txt'):
@ -68,10 +77,9 @@ def load_city_hashtags(base_dir: str, translate: {}) -> None:
continue
for hashtag in cities:
hashtag = hashtag.lower().strip()
hashtag = hashtag.replace(' & ', ' and ')
hashtag = hashtag.replace('/', '')
hashtag = replace_strings(hashtag, replacements)
hashtag2 = hashtag.replace('-', '').replace(' ', '')
hashtag2 = replace_strings(hashtag, replacements2)
city_filename = base_dir + '/tags/' + hashtag2 + '.category'
if not os.path.isfile(city_filename):
try:

View File

@ -15,6 +15,7 @@ import email.parser
import urllib.parse
from shutil import copyfile
from dateutil.parser import parse
from utils import replace_strings
from utils import data_dir
from utils import remove_link_tracking
from utils import string_contains
@ -143,13 +144,19 @@ def html_replace_email_quote(content: str) -> str:
if '<p>&quot;' in content:
if '&quot;</p>' in content:
if content.count('<p>&quot;') == content.count('&quot;</p>'):
content = content.replace('<p>&quot;', '<p><blockquote>')
content = content.replace('&quot;</p>', '</blockquote></p>')
replacements = {
'<p>&quot;': '<p><blockquote>',
'&quot;</p>': '</blockquote></p>'
}
content = replace_strings(content, replacements)
if '>\u201c' in content:
if '\u201d<' in content:
if content.count('>\u201c') == content.count('\u201d<'):
content = content.replace('>\u201c', '><blockquote>')
content = content.replace('\u201d<', '</blockquote><')
replacements = {
'>\u201c': '><blockquote>',
'\u201d<': '</blockquote><'
}
content = replace_strings(content, replacements)
# replace email style quote
if '>&gt; ' not in content:
return content
@ -161,8 +168,11 @@ def html_replace_email_quote(content: str) -> str:
continue
if '>&gt; ' not in line_str:
if line_str.startswith('&gt; '):
line_str = line_str.replace('&gt; ', '<blockquote>')
line_str = line_str.replace('&gt;', '<br>')
replacements = {
'&gt; ': '<blockquote>',
'&gt;': '<br>'
}
line_str = replace_strings(line_str, replacements)
new_content += '<p>' + line_str + '</blockquote></p>'
else:
new_content += '<p>' + line_str + '</p>'
@ -1194,8 +1204,12 @@ def _get_simplified_content(content: str) -> str:
"""Returns a simplified version of the content suitable for
splitting up into individual words
"""
content_simplified = \
content.replace(',', ' ').replace(';', ' ').replace('- ', ' ')
replacements = {
',': ' ',
';': ' ',
'- ': ' '
}
content_simplified = replace_strings(content, replacements)
content_simplified = content_simplified.replace('. ', ' ').strip()
if content_simplified.endswith('.'):
content_simplified = content_simplified[:len(content_simplified)-1]
@ -1338,8 +1352,11 @@ def add_html_tags(base_dir: str, http_prefix: str,
content = html_replace_email_quote(content)
return html_replace_quote_marks(content)
max_word_length = 40
content = content.replace('\r', '')
content = content.replace('\n', ' --linebreak-- ')
replacements = {
'\r': '',
'\n': ' --linebreak-- '
}
content = replace_strings(content, replacements)
now_playing_str = 'NowPlaying'
if translate.get(now_playing_str):
now_playing_str = translate[now_playing_str]
@ -1712,15 +1729,18 @@ def combine_textarea_lines(text: str) -> str:
result = ''
ctr = 0
paragraphs = text.split('\n\n')
replacements = {
'\n* ': '***BULLET POINT*** ',
'\n * ': '***BULLET POINT*** ',
'\n- ': '***DASH POINT*** ',
'\n - ': '***DASH POINT*** ',
'\n': ' ',
' ': ' ',
'***BULLET POINT*** ': '\n* ',
'***DASH POINT*** ': '\n- '
}
for para in paragraphs:
para = para.replace('\n* ', '***BULLET POINT*** ')
para = para.replace('\n * ', '***BULLET POINT*** ')
para = para.replace('\n- ', '***DASH POINT*** ')
para = para.replace('\n - ', '***DASH POINT*** ')
para = para.replace('\n', ' ')
para = para.replace(' ', ' ')
para = para.replace('***BULLET POINT*** ', '\n* ')
para = para.replace('***DASH POINT*** ', '\n- ')
para = replace_strings(para, replacements)
if ctr > 0:
result += '</p><p>'
result += para

View File

@ -11,6 +11,7 @@ import time
import errno
import json
from socket import error as SocketError
from utils import replace_strings
from utils import corp_servers
from utils import string_ends_with
from utils import get_config_param
@ -204,12 +205,15 @@ def daemon_http_post(self) -> None:
# remove any trailing slashes from the path
if not self.path.endswith('confirm'):
self.path = self.path.replace('/outbox/', '/outbox')
self.path = self.path.replace('/tlblogs/', '/tlblogs')
self.path = self.path.replace('/inbox/', '/inbox')
self.path = self.path.replace('/shares/', '/shares')
self.path = self.path.replace('/wanted/', '/wanted')
self.path = self.path.replace('/sharedInbox/', '/sharedInbox')
replacements = {
'/outbox/': '/outbox',
'/tlblogs/': '/tlblogs',
'/inbox/': '/inbox',
'/shares/': '/shares',
'/wanted/': '/wanted',
'/sharedInbox/': '/sharedInbox'
}
self.path = replace_strings(self.path, replacements)
if self.path == '/inbox':
if not self.server.enable_shared_inbox:

View File

@ -16,6 +16,7 @@ import webbrowser
import urllib.parse
from pathlib import Path
from random import randint
from utils import replace_strings
from utils import get_post_attachments
from utils import get_url_from_post
from utils import get_actor_languages_list
@ -1996,9 +1997,12 @@ def run_desktop_client(base_dir: str, proxy_type: str, http_prefix: str,
command_str.startswith('direct message ') or \
command_str.startswith('post ') or \
command_str.startswith('send '):
command_str = command_str.replace(' to ', ' ')
command_str = command_str.replace(' dm ', ' ')
command_str = command_str.replace(' DM ', ' ')
replacements = {
' to ': ' ',
' dm ': ' ',
' DM ': ' '
}
command_str = replace_strings(command_str, replacements)
# direct message
to_handle = None
if command_str.startswith('post '):

View File

@ -74,6 +74,7 @@ from tests import test_update_actor
from tests import run_all_tests
from auth import store_basic_credentials
from auth import create_password
from utils import replace_strings
from utils import set_accounts_data_dir
from utils import data_dir
from utils import data_dir_testing
@ -2914,15 +2915,18 @@ def _command_options() -> None:
if not person_url:
person_url = get_user_url(wf_request, 0, argb.debug)
if nickname == domain:
person_url = person_url.replace('/users/', '/actor/')
person_url = person_url.replace('/accounts/', '/actor/')
person_url = person_url.replace('/channel/', '/actor/')
person_url = person_url.replace('/profile/', '/actor/')
person_url = person_url.replace('/author/', '/actor/')
person_url = person_url.replace('/u/', '/actor/')
person_url = person_url.replace('/fediverse/blog/', '/actor/')
person_url = person_url.replace('/c/', '/actor/')
person_url = person_url.replace('/m/', '/actor/')
replacements = {
'/users/': '/actor/',
'/accounts/': '/actor/',
'/channel/': '/actor/',
'/profile/': '/actor/',
'/author/': '/actor/',
'/u/': '/actor/',
'/fediverse/blog/': '/actor/',
'/c/': '/actor/',
'/m/': '/actor/'
}
person_url = replace_strings(person_url, replacements)
if not person_url:
# try single user instance
person_url = http_prefix + '://' + domain

View File

@ -12,6 +12,7 @@ from uuid import UUID
from hashlib import md5
from datetime import datetime
from datetime import timedelta
from utils import replace_strings
from utils import date_from_numbers
from utils import date_from_string_format
from utils import acct_handle_dir
@ -405,9 +406,13 @@ def get_todays_events(base_dir: str, nickname: str, domain: str,
def _ical_date_string(date_str: str) -> str:
"""Returns an icalendar formatted date
"""
date_str = date_str.replace('-', '')
date_str = date_str.replace(':', '')
return date_str.replace(' ', '')
replacements = {
'-': '',
':': '',
' ': ''
}
date_str = replace_strings(date_str, replacements)
return date_str
def _dav_encode_token(year: int, month_number: int,

View File

@ -34,6 +34,7 @@ from webfinger import webfinger_handle
from httpsig import create_signed_header
from siteactive import site_is_active
from languages import understood_post_language
from utils import replace_strings
from utils import valid_content_warning
from utils import get_actor_from_post_id
from utils import string_contains
@ -4684,8 +4685,11 @@ def _create_box_items(base_dir: str,
# Why are url's hashed? Since storage is in the filesystem this avoids
# confusion with directories by not using the / character
if first_post_id:
first_post_id = first_post_id.replace('--', '#')
first_post_id = first_post_id.replace('/', '#')
replacements = {
'--': '#',
'/': '#'
}
first_post_id = replace_strings(first_post_id, replacements)
try:
with open(index_filename, 'r', encoding='utf-8') as fp_index:

View File

@ -23,6 +23,7 @@ from session import post_json
from session import post_image
from session import create_session
from session import get_json_valid
from utils import replace_strings
from utils import data_dir
from utils import resembles_url
from utils import date_utcnow
@ -125,10 +126,16 @@ def _get_valid_shared_item_id(actor: str, display_name: str) -> str:
remove_chars2 = ('+', '/', '\\', '?', '&')
for char in remove_chars2:
display_name = display_name.replace(char, '-')
display_name = display_name.replace('.', '_')
display_name = display_name.replace("", "'")
actor = actor.replace('://', '___')
actor = actor.replace('/', '--')
replacements = {
'.': '_',
"": "'"
}
display_name = replace_strings(display_name, replacements)
replacements2 = {
'://': '___',
'/': '--'
}
actor = replace_strings(actor, replacements2)
return actor + '--shareditems--' + display_name
@ -227,8 +234,11 @@ def _getshare_dfc_id(base_dir: str, system_language: str,
matched_product_type = \
_dfc_product_type_from_category(base_dir, item_category, translate)
if not matched_product_type:
item_type = item_type.replace(' ', '_')
item_type = item_type.replace('.', '')
replacements = {
' ': '_',
'.': ''
}
item_type = replace_strings(item_type, replacements)
return 'epicyon#' + item_type
if not dfc_ids:
dfc_ids = _load_dfc_ids(base_dir, system_language,

View File

@ -458,16 +458,18 @@ def get_content_from_post(post_json_object: {}, system_language: str,
not has_contentmap_dict:
return ''
content = ''
replacements = {
'&amp;': '&',
'<u>': '',
'</u>': ''
}
if has_contentmap_dict:
if this_post_json[map_dict].get(system_language):
sys_lang = this_post_json[map_dict][system_language]
if isinstance(sys_lang, str):
content = sys_lang
content = remove_markup_tag(content, 'pre')
content = content.replace('&amp;', '&')
# remove underlines
content = content.replace('<u>', '')
content = content.replace('</u>', '')
content = replace_strings(content, replacements)
return standardize_text(content)
else:
# is there a contentMap/summaryMap entry for one of
@ -480,18 +482,12 @@ def get_content_from_post(post_json_object: {}, system_language: str,
continue
content = map_lang
content = remove_markup_tag(content, 'pre')
content = content.replace('&amp;', '&')
# remove underlines
content = content.replace('<u>', '')
content = content.replace('</u>', '')
content = replace_strings(content, replacements)
return standardize_text(content)
else:
if isinstance(this_post_json[content_type], str):
content = this_post_json[content_type]
content = content.replace('&amp;', '&')
# remove underlines
content = content.replace('<u>', '')
content = content.replace('</u>', '')
content = replace_strings(content, replacements)
content = remove_markup_tag(content, 'pre')
return standardize_text(content)
@ -1010,9 +1006,14 @@ def remove_html(content: str) -> str:
if '<' not in content:
return content
removing = False
content = content.replace('<a href', ' <a href')
content = content.replace('<q>', '"').replace('</q>', '"')
content = content.replace('</p>', '\n\n').replace('<br>', '\n')
replacements = {
'<a href': ' <a href',
'<q>': '"',
'</q>': '"',
'</p>': '\n\n',
'<br>': '\n'
}
content = replace_strings(content, replacements)
result = ''
for char in content:
if char == '<':
@ -5656,3 +5657,11 @@ def get_watermark_file(base_dir: str,
watermark_file, watermark_filename = \
get_image_file(base_dir, 'watermark_image', account_dir, '')
return watermark_file, watermark_filename
def replace_strings(text: str, replacements: {}) -> str:
"""Does a series of string replacements
"""
for orig_str, new_str in replacements.items():
text = text.replace(orig_str, new_str)
return text

View File

@ -12,6 +12,7 @@ from shutil import copyfile
from collections import OrderedDict
from session import get_json
from session import get_json_valid
from utils import replace_strings
from utils import get_image_file
from utils import data_dir
from utils import string_contains
@ -159,10 +160,13 @@ def csv_following_list(following_filename: str,
following_address)
if person_notes:
# make notes suitable for csv file
person_notes = person_notes.replace(',', ' ')
person_notes = person_notes.replace('"', "'")
person_notes = person_notes.replace('\n', '<br>')
person_notes = person_notes.replace(' ', ' ')
replacements = {
',': ' ',
'"': "'",
'\n': '<br>',
' ': ' '
}
person_notes = replace_strings(person_notes, replacements)
if not following_list_csv:
following_list_csv = \
'Account address,Show boosts,' + \
@ -538,8 +542,12 @@ def shares_timeline_json(actor: str, page_number: int, items_per_page: int,
if '--shareditems--' not in item_id:
continue
share_actor = item_id.split('--shareditems--')[0]
share_actor = share_actor.replace('___', '://')
share_actor = share_actor.replace('--', '/')
replacements = {
'___': '://',
'--': '/'
}
share_actor = \
replace_strings(share_actor, replacements)
share_nickname = get_nickname_from_actor(share_actor)
if not share_nickname:
continue
@ -1092,14 +1100,18 @@ def add_emoji_to_display_name(session, base_dir: str, http_prefix: str,
if ':' not in display_name:
return display_name
display_name = display_name.replace('<p>', '').replace('</p>', '')
replacements = {
'<p>': '',
'</p>': ''
}
display_name = replace_strings(display_name, replacements)
emoji_tags = {}
# print('TAG: display_name before tags: ' + display_name)
display_name = \
add_html_tags(base_dir, http_prefix,
nickname, domain, display_name, [],
emoji_tags, translate)
display_name = display_name.replace('<p>', '').replace('</p>', '')
display_name = replace_strings(display_name, replacements)
# print('TAG: display_name after tags: ' + display_name)
# convert the emoji dictionary to a list
emoji_tags_list = []
@ -2007,7 +2019,11 @@ def html_show_share(base_dir: str, domain: str, nickname: str,
"""
shares_json = None
share_url = item_id.replace('___', '://').replace('--', '/')
replacements = {
'___': '://',
'--': '/'
}
share_url = replace_strings(item_id, replacements)
contact_nickname = get_nickname_from_actor(share_url)
if not contact_nickname:
return None