diff --git a/blog.py b/blog.py index 6c9462bbb..85358a644 100644 --- a/blog.py +++ b/blog.py @@ -311,9 +311,11 @@ def _html_blog_post_content(debug: bool, session, authorized: bool, continue if not tag_json.get('url'): continue + citation_url = remove_html(tag_json['url']) + citation_name = remove_html(tag_json['name']) citations_str += \ - '
' + translate['Citations'] + \ ':
' + \ @@ -475,7 +477,7 @@ def html_blog_post(session, authorized: bool, title = post_json_object['object']['summary'] url = '' if post_json_object['object'].get('url'): - url = post_json_object['object']['url'] + url = remove_html(post_json_object['object']['url']) snippet = _get_snippet_from_blog_content(post_json_object, system_language) blog_str = html_header_with_blog_markup(css_filename, instance_title, diff --git a/bookmarks.py b/bookmarks.py index 584afa02a..be7f6ddf0 100644 --- a/bookmarks.py +++ b/bookmarks.py @@ -30,6 +30,7 @@ from utils import has_actor from utils import has_object_string_type from utils import text_in_file from utils import remove_eol +from utils import remove_html from posts import get_person_box from session import post_json @@ -599,6 +600,7 @@ def outbox_bookmark(recent_posts_cache: {}, print('DEBUG: c2s bookmark Add request arrived in outbox') message_url = remove_id_ending(message_json['object']['url']) + message_url = remove_html(message_url) domain = remove_domain_port(domain) post_filename = locate_post(base_dir, nickname, domain, message_url) if not post_filename: @@ -656,6 +658,7 @@ def outbox_undo_bookmark(recent_posts_cache: {}, print('DEBUG: c2s unbookmark Remove request arrived in outbox') message_url = remove_id_ending(message_json['object']['url']) + message_url = remove_html(message_url) domain = remove_domain_port(domain) post_filename = locate_post(base_dir, nickname, domain, message_url) if not post_filename: diff --git a/content.py b/content.py index 0493ad087..500ac71c7 100644 --- a/content.py +++ b/content.py @@ -445,7 +445,8 @@ def replace_emoji_from_tags(session, base_dir: str, continue if tag_item['name'] not in content: continue - icon_name = tag_item['icon']['url'].split('/')[-1] + tag_url = remove_html(tag_item['icon']['url']) + icon_name = tag_url.split('/')[-1] if icon_name: if len(icon_name) > 1: if icon_name[0].isdigit(): @@ -472,14 +473,12 @@ def replace_emoji_from_tags(session, base_dir: str, 'no conversion of ' + str(icon_name) + ' to chr ' + tag_item['name'] + ' ' + - tag_item['icon']['url']) + tag_url) if not replaced: _save_custom_emoji(session, base_dir, tag_item['name'], - tag_item['icon']['url'], - debug) - _update_common_emoji(base_dir, - icon_name) + tag_url, debug) + _update_common_emoji(base_dir, icon_name) else: _update_common_emoji(base_dir, "0x" + icon_name) @@ -501,12 +500,11 @@ def replace_emoji_from_tags(session, base_dir: str, 'no conversion of ' + str(icode) + ' to chr ' + tag_item['name'] + ' ' + - tag_item['icon']['url']) + tag_url) if not replaced: _save_custom_emoji(session, base_dir, tag_item['name'], - tag_item['icon']['url'], - debug) + tag_url, debug) _update_common_emoji(base_dir, icon_name) else: @@ -529,7 +527,8 @@ def replace_emoji_from_tags(session, base_dir: str, emoji_tag_name = tag_item['name'].replace(':', '') else: emoji_tag_name = '' - emoji_html = "" content = content.replace(tag_item['name'], emoji_html) diff --git a/daemon.py b/daemon.py index d966c1b3a..9ea3248e1 100644 --- a/daemon.py +++ b/daemon.py @@ -6419,7 +6419,7 @@ class PubServer(BaseHTTPRequestHandler): for m_type, last_part in uploads: rep_str = '/' + last_part if m_type == 'avatar': - actor_url = actor_json['icon']['url'] + actor_url = remove_html(actor_json['icon']['url']) last_part_of_url = actor_url.split('/')[-1] srch_str = '/' + last_part_of_url actor_url = actor_url.replace(srch_str, rep_str) @@ -6432,15 +6432,14 @@ class PubServer(BaseHTTPRequestHandler): actor_json['icon']['mediaType'] = \ 'image/' + img_ext elif m_type == 'image': - last_part_of_url = \ - actor_json['image']['url'].split('/')[-1] + im_url = \ + remove_html(actor_json['image']['url']) + last_part_of_url = im_url.split('/')[-1] srch_str = '/' + last_part_of_url actor_json['image']['url'] = \ - actor_json['image']['url'].replace(srch_str, - rep_str) - if '.' in actor_json['image']['url']: - img_ext = \ - actor_json['image']['url'].split('.')[-1] + im_url.replace(srch_str, rep_str) + if '.' in im_url: + img_ext = im_url.split('.')[-1] if img_ext == 'jpg': img_ext = 'jpeg' actor_json['image']['mediaType'] = \ diff --git a/desktop_client.py b/desktop_client.py index 67175b4bb..79f27224e 100644 --- a/desktop_client.py +++ b/desktop_client.py @@ -742,7 +742,8 @@ def _show_replies_on_post(post_json_object: {}, max_replies: int) -> None: print('') ctr = 0 for item in object_replies['items']: - print(' ↰ ' + str(item['url'])) + item_url = remove_html(item['url']) + print(' ↰ ' + str(item_url)) ctr += 1 if ctr >= max_replies: break diff --git a/inbox.py b/inbox.py index 7957a1ad0..f52ef1d66 100644 --- a/inbox.py +++ b/inbox.py @@ -187,7 +187,7 @@ def cache_svg_images(session, base_dir: str, http_prefix: str, continue if attach['url'].endswith('.svg') or \ 'svg' in attach['mediaType']: - url = attach['url'] + url = remove_html(attach['url']) if not url_permitted(url, federation_list): continue # if this is a local image then it has already been @@ -1175,8 +1175,9 @@ def _person_receive_update(base_dir: str, debug: bool, http_prefix: str) -> bool: """Changes an actor. eg: avatar or display name change """ + person_url = remove_html(person_json['url']) if debug: - print('Receiving actor update for ' + person_json['url'] + + print('Receiving actor update for ' + person_url + ' ' + str(person_json)) domain_full = get_full_domain(domain, port) update_domain_full = get_full_domain(update_domain, update_port) @@ -2581,7 +2582,8 @@ def _receive_bookmark(recent_posts_cache: {}, if debug: print('DEBUG: c2s inbox bookmark Add request arrived in outbox') - message_url = remove_id_ending(message_json['object']['url']) + message_url2 = remove_html(message_json['object']['url']) + message_url = remove_id_ending(message_url2) domain = remove_domain_port(domain) post_filename = locate_post(base_dir, nickname, domain, message_url) if not post_filename: @@ -2591,7 +2593,7 @@ def _receive_bookmark(recent_posts_cache: {}, return True update_bookmarks_collection(recent_posts_cache, base_dir, post_filename, - message_json['object']['url'], + message_url2, message_json['actor'], domain, debug) # regenerate the html bookmarked_post_json = load_json(post_filename, 0, 1) @@ -2707,7 +2709,8 @@ def _receive_undo_bookmark(recent_posts_cache: {}, print('DEBUG: c2s inbox Remove bookmark ' + 'request arrived in outbox') - message_url = remove_id_ending(message_json['object']['url']) + message_url2 = remove_html(message_json['object']['url']) + message_url = remove_id_ending(message_url2) domain = remove_domain_port(domain) post_filename = locate_post(base_dir, nickname, domain, message_url) if not post_filename: diff --git a/maps.py b/maps.py index c9fe0a20b..48489db6f 100644 --- a/maps.py +++ b/maps.py @@ -15,6 +15,7 @@ from utils import acct_dir from utils import load_json from utils import save_json from utils import locate_post +from utils import remove_html def get_location_from_tags(tags: []) -> str: @@ -340,7 +341,7 @@ def get_map_preferences_url(base_dir: str, nickname: str, domain: str) -> str: if os.path.isfile(maps_filename): maps_json = load_json(maps_filename) if maps_json.get('url'): - return maps_json['url'] + return remove_html(maps_json['url']) return None diff --git a/mastoapiv1.py b/mastoapiv1.py index 3b6c42129..d165fe87a 100644 --- a/mastoapiv1.py +++ b/mastoapiv1.py @@ -11,6 +11,7 @@ import os from utils import load_json from utils import get_config_param from utils import acct_dir +from utils import remove_html from metadata import meta_data_instance @@ -62,6 +63,8 @@ def _get_masto_api_v1account(base_dir: str, nickname: str, domain: str) -> {}: account_json = load_json(account_filename) if not account_json: return {} + avatar_url = remove_html(account_json['icon']['url']) + image_url = remove_html(account_json['image']['url']) masto_account_json = { "id": get_masto_api_v1id_from_nickname(nickname), "username": nickname, @@ -74,10 +77,10 @@ def _get_masto_api_v1account(base_dir: str, nickname: str, domain: str) -> {}: "statuses_count": 0, "note": account_json['summary'], "url": account_json['id'], - "avatar": account_json['icon']['url'], - "avatar_static": account_json['icon']['url'], - "header": account_json['image']['url'], - "header_static": account_json['image']['url'] + "avatar": avatar_url, + "avatar_static": avatar_url, + "header": image_url, + "header_static": image_url } return masto_account_json diff --git a/metadata.py b/metadata.py index f42a00806..2c2b5f401 100644 --- a/metadata.py +++ b/metadata.py @@ -12,6 +12,7 @@ from utils import is_account_dir from utils import load_json from utils import no_of_accounts from utils import no_of_active_accounts_monthly +from utils import remove_html def _get_status_count(base_dir: str) -> int: @@ -152,6 +153,8 @@ def meta_data_instance(show_accounts: bool, if admin_actor.get('published'): created_at = admin_actor['published'] + icon_url = remove_html(admin_actor['icon']['url']) + image_url = remove_html(admin_actor['image']['url']) instance = { 'approval_required': False, 'invites_enabled': False, @@ -159,10 +162,10 @@ def meta_data_instance(show_accounts: bool, 'contact_account': { 'acct': admin_actor['preferredUsername'], 'created_at': created_at, - 'avatar': admin_actor['icon']['url'], - 'avatar_static': admin_actor['icon']['url'], - 'header': admin_actor['image']['url'], - 'header_static': admin_actor['image']['url'], + 'avatar': icon_url, + 'avatar_static': icon_url, + 'header': image_url, + 'header_static': image_url, 'bot': is_bot, 'discoverable': True, 'group': is_group, diff --git a/newswire.py b/newswire.py index d416e6237..8026c3fcb 100644 --- a/newswire.py +++ b/newswire.py @@ -492,9 +492,9 @@ def _valid_podcast_entry(base_dir: str, key: str, entry: {}) -> bool: if entry['protocol'].tolower() != 'activitypub': return False if entry.get('uri'): - post_url = entry['uri'] + post_url = remove_html(entry['uri']) elif entry.get('url'): - post_url = entry['uri'] + post_url = remove_html(entry['uri']) else: post_url = entry['text'] if '://' not in post_url: @@ -1133,7 +1133,7 @@ def _json_feed_v1to_dict(base_dir: str, domain: str, xml_str: str, if tag_name not in description: description += ' ' + tag_name - link = json_feed_item['url'] + link = remove_html(json_feed_item['url']) if '://' not in link: continue if len(link) > max_bytes: @@ -1551,10 +1551,10 @@ def _add_account_blogs_to_newswire(base_dir: str, nickname: str, domain: str, description = remove_html(description) tags_from_post = _get_hashtags_from_post(post_json_object) summary = post_json_object['object']['summary'] + url2 = remove_html(post_json_object['object']['url']) _add_newswire_dict_entry(base_dir, domain, newswire, published, - summary, - post_json_object['object']['url'], + summary, url2, votes, full_post_filename, description, moderated, False, tags_from_post, diff --git a/person.py b/person.py index 3cba8d394..7d816b2c2 100644 --- a/person.py +++ b/person.py @@ -1789,7 +1789,7 @@ def get_person_avatar_url(base_dir: str, person_url: str, if person_json.get('icon'): if person_json['icon'].get('url'): if '.svg' not in person_json['icon']['url'].lower(): - return person_json['icon']['url'] + return remove_html(person_json['icon']['url']) return None @@ -1971,7 +1971,7 @@ def get_featured_hashtags(actor_json: {}) -> str: tag_name = tag_name[1:] if not tag_name: continue - tag_url = tag_dict['href'] + tag_url = remove_html(tag_dict['href']) if '://' not in tag_url: continue if not valid_hash_tag(tag_name): @@ -2019,13 +2019,13 @@ def get_featured_hashtags_as_html(actor_json: {}, continue if ' #' + tag_name in profile_description: continue - tag_url = tag_dict['href'] + tag_url = remove_html(tag_dict['href']) if '://' not in tag_url: continue if not valid_hash_tag(tag_name): continue result += \ - '#' + tag_name + ' ' ctr += 1 diff --git a/posts.py b/posts.py index fd8553d54..01e960b1b 100644 --- a/posts.py +++ b/posts.py @@ -216,10 +216,10 @@ def get_user_url(wf_request: {}, source_id: int, debug: bool) -> str: else: url = link['href'] if not contains_invalid_actor_url_chars(url): - return url + return remove_html(url) url = link['href'] if not contains_invalid_actor_url_chars(url): - return url + return remove_html(url) return None @@ -404,7 +404,7 @@ def get_person_box(signing_priv_key_pem: str, origin_domain: str, avatar_url = None if person_json.get('icon'): if person_json['icon'].get('url'): - avatar_url = person_json['icon']['url'] + avatar_url = remove_html(person_json['icon']['url']) display_name = None if person_json.get('name'): display_name = person_json['name'] @@ -628,7 +628,8 @@ def _get_posts(session, outbox_url: str, max_posts: int, if url_permitted(tag_item['icon']['url'], federation_list): emoji_name = tag_item['name'] - emoji_icon = tag_item['icon']['url'] + emoji_icon = \ + remove_html(tag_item['icon']['url']) emoji[emoji_name] = emoji_icon else: if debug: @@ -675,10 +676,11 @@ def _get_posts(session, outbox_url: str, max_posts: int, for attach in this_item['attachment']: if attach.get('name') and attach.get('url'): # no attachments from non-permitted domains - if url_permitted(attach['url'], + attach_url = remove_html(attach['url']) + if url_permitted(attach_url, federation_list): attachment.append([attach['name'], - attach['url']]) + attach_url]) else: if debug: print('url not permitted ' + @@ -820,8 +822,9 @@ def get_post_domains(session, outbox_url: str, max_posts: int, debug: bool, tag_type = tag_item['type'].lower() if tag_type == 'mention': if tag_item.get('href'): + tag_url = remove_html(tag_item['href']) post_domain, _ = \ - get_domain_from_actor(tag_item['href']) + get_domain_from_actor(tag_url) if post_domain: if post_domain not in post_domains: post_domains.append(post_domain) @@ -879,6 +882,7 @@ def _get_posts_for_blocked_domains(base_dir: str, url = item['object']['url'] else: url = item['object']['id'] + url = remove_html(url) if not blocked_posts.get(post_domain): blocked_posts[post_domain] = [url] else: @@ -891,8 +895,9 @@ def _get_posts_for_blocked_domains(base_dir: str, continue tag_type = tag_item['type'].lower() if tag_type == 'mention' and tag_item.get('href'): + tag_url = remove_html(tag_item['href']) post_domain, _ = \ - get_domain_from_actor(tag_item['href']) + get_domain_from_actor(tag_url) if not post_domain: continue if is_blocked_domain(base_dir, post_domain): @@ -900,6 +905,7 @@ def _get_posts_for_blocked_domains(base_dir: str, url = item['object']['url'] else: url = item['object']['id'] + url = remove_html(url) if not blocked_posts.get(post_domain): blocked_posts[post_domain] = [url] else: @@ -1496,7 +1502,8 @@ def _create_post_mentions(cc_url: str, new_post: {}, if tag['type'] != 'Mention': continue if tag['href'] not in to_cc: - new_post['object']['cc'].append(tag['href']) + tag_url = remove_html(tag['href']) + new_post['object']['cc'].append(tag_url) _consolidate_actors_list(new_post['object']['cc']) new_post['cc'] = new_post['object']['cc'] @@ -2099,9 +2106,9 @@ def create_blog_post(base_dir: str, low_bandwidth, content_license_url, media_license_url, media_creator, languages_understood, translate, buy_url, chat_url) - if '/@/' not in blog_json['object']['url']: - blog_json['object']['url'] = \ - blog_json['object']['url'].replace('/@', '/users/') + obj_url = remove_html(blog_json['object']['url']) + if '/@/' not in obj_url: + blog_json['object']['url'] = obj_url.replace('/@', '/users/') _append_citations_to_blog_post(base_dir, nickname, domain, blog_json) return blog_json diff --git a/utils.py b/utils.py index 9a4ab922d..ad8e1f1bf 100644 --- a/utils.py +++ b/utils.py @@ -1862,6 +1862,7 @@ def _remove_attachment(base_dir: str, http_prefix: str, domain: str, attachment_url = post_json['attachment'][0]['url'] if not attachment_url: return + attachment_url = remove_html(attachment_url) media_filename = base_dir + '/' + \ attachment_url.replace(http_prefix + '://' + domain + '/', '') if os.path.isfile(media_filename): diff --git a/video.py b/video.py index 77974f482..9e06ddb68 100644 --- a/video.py +++ b/video.py @@ -7,6 +7,7 @@ __email__ = "bob@libreserver.org" __status__ = "Production" __module_group__ = "Timeline" +from utils import remove_html from utils import get_full_domain from utils import get_nickname_from_actor from utils import get_domain_from_actor @@ -110,15 +111,15 @@ def convert_video_to_note(base_dir: str, nickname: str, domain: str, if not media_link.get('href'): continue if media_link['mediaType'] == 'application/x-bittorrent': - media_torrent = media_link['href'] + media_torrent = remove_html(media_link['href']) if media_link['href'].startswith('magnet:'): - media_magnet = media_link['href'] + media_magnet = remove_html(media_link['href']) if media_link['mediaType'] != 'video/mp4' and \ media_link['mediaType'] != 'video/ogv': continue if not media_url: media_type = media_link['mediaType'] - media_url = media_link['href'] + media_url = remove_html(media_link['href']) if not media_url: return None @@ -138,7 +139,8 @@ def convert_video_to_note(base_dir: str, nickname: str, domain: str, content += '🧲' content += '' - new_post_id = remove_id_ending(post_json_object['id']) + new_post_id2 = remove_html(post_json_object['id']) + new_post_id = remove_id_ending(new_post_id2) new_post = { '@context': post_json_object['@context'], 'id': new_post_id + '/activity', diff --git a/webapp_moderation.py b/webapp_moderation.py index 24829bba1..102285150 100644 --- a/webapp_moderation.py +++ b/webapp_moderation.py @@ -8,6 +8,7 @@ __status__ = "Production" __module_group__ = "Moderation" import os +from utils import remove_html from utils import is_artist from utils import is_account_dir from utils import get_full_domain @@ -387,7 +388,7 @@ def html_moderation_info(translate: {}, base_dir: str, ext = '' if actor_json.get('icon'): if actor_json['icon'].get('url'): - avatar_url = actor_json['icon']['url'] + avatar_url = remove_html(actor_json['icon']['url']) if '.' in avatar_url: ext = '.' + avatar_url.split('.')[-1] acct_url = \ diff --git a/webapp_podcast.py b/webapp_podcast.py index 4d396c94a..b9104918c 100644 --- a/webapp_podcast.py +++ b/webapp_podcast.py @@ -38,7 +38,7 @@ def _html_podcast_chapters(link_url: str, if not isinstance(podcast_properties[key], dict): return '' if podcast_properties[key].get('url'): - chapters_url = podcast_properties[key]['url'] + chapters_url = remove_html(podcast_properties[key]['url']) elif podcast_properties[key].get('uri'): chapters_url = podcast_properties[key]['uri'] else: @@ -79,7 +79,7 @@ def _html_podcast_chapters(link_url: str, chapter_title = chapter['title'] chapter_url = '' if chapter.get('url'): - chapter_url = chapter['url'] + chapter_url = remove_html(chapter['url']) chapter_title = \ '' + \ chapter['title'] + '<\a>' @@ -121,7 +121,7 @@ def _html_podcast_transcripts(podcast_properties: {}, translate: {}) -> str: for _ in podcast_properties[key]: transcript_url = None if podcast_properties[key].get('url'): - transcript_url = podcast_properties[key]['url'] + transcript_url = remove_html(podcast_properties[key]['url']) elif podcast_properties[key].get('uri'): transcript_url = podcast_properties[key]['uri'] if not transcript_url: @@ -154,7 +154,7 @@ def _html_podcast_social_interactions(podcast_properties: {}, if podcast_properties[key].get('uri'): episode_post_url = podcast_properties[key]['uri'] elif podcast_properties[key].get('url'): - episode_post_url = podcast_properties[key]['url'] + episode_post_url = remove_html(podcast_properties[key]['url']) elif podcast_properties[key].get('text'): episode_post_url = podcast_properties[key]['text'] else: @@ -218,7 +218,7 @@ def _html_podcast_performers(podcast_properties: {}) -> str: performer_url = '' if performer.get('href'): - performer_url = performer['href'] + performer_url = remove_html(performer['href']) performer_img = '' if performer.get('img'): @@ -431,7 +431,7 @@ def html_podcast_episode(translate: {}, # donate button if podcast_properties.get('funding'): if podcast_properties['funding'].get('url'): - donate_url = podcast_properties['funding']['url'] + donate_url = remove_html(podcast_properties['funding']['url']) podcast_str += \ '