Sanitise links to avoid injection attacks in rendered html

merge-requests/30/head
Bob Mottram 2023-07-12 12:08:02 +01:00
parent af04377d36
commit eca0fa1736
21 changed files with 132 additions and 89 deletions

View File

@ -311,9 +311,11 @@ def _html_blog_post_content(debug: bool, session, authorized: bool,
continue continue
if not tag_json.get('url'): if not tag_json.get('url'):
continue continue
citation_url = remove_html(tag_json['url'])
citation_name = remove_html(tag_json['name'])
citations_str += \ citations_str += \
'<li><a href="' + tag_json['url'] + '">' + \ '<li><a href="' + citation_url + '">' + \
'<cite>' + tag_json['name'] + '</cite></a></li>\n' '<cite>' + citation_name + '</cite></a></li>\n'
if citations_str: if citations_str:
citations_str = '<p><b>' + translate['Citations'] + \ citations_str = '<p><b>' + translate['Citations'] + \
':</b></p>' + \ ':</b></p>' + \
@ -475,7 +477,7 @@ def html_blog_post(session, authorized: bool,
title = post_json_object['object']['summary'] title = post_json_object['object']['summary']
url = '' url = ''
if post_json_object['object'].get('url'): if post_json_object['object'].get('url'):
url = post_json_object['object']['url'] url = remove_html(post_json_object['object']['url'])
snippet = _get_snippet_from_blog_content(post_json_object, snippet = _get_snippet_from_blog_content(post_json_object,
system_language) system_language)
blog_str = html_header_with_blog_markup(css_filename, instance_title, blog_str = html_header_with_blog_markup(css_filename, instance_title,

View File

@ -30,6 +30,7 @@ from utils import has_actor
from utils import has_object_string_type from utils import has_object_string_type
from utils import text_in_file from utils import text_in_file
from utils import remove_eol from utils import remove_eol
from utils import remove_html
from posts import get_person_box from posts import get_person_box
from session import post_json from session import post_json
@ -599,6 +600,7 @@ def outbox_bookmark(recent_posts_cache: {},
print('DEBUG: c2s bookmark Add request arrived in outbox') print('DEBUG: c2s bookmark Add request arrived in outbox')
message_url = remove_id_ending(message_json['object']['url']) message_url = remove_id_ending(message_json['object']['url'])
message_url = remove_html(message_url)
domain = remove_domain_port(domain) domain = remove_domain_port(domain)
post_filename = locate_post(base_dir, nickname, domain, message_url) post_filename = locate_post(base_dir, nickname, domain, message_url)
if not post_filename: if not post_filename:
@ -656,6 +658,7 @@ def outbox_undo_bookmark(recent_posts_cache: {},
print('DEBUG: c2s unbookmark Remove request arrived in outbox') print('DEBUG: c2s unbookmark Remove request arrived in outbox')
message_url = remove_id_ending(message_json['object']['url']) message_url = remove_id_ending(message_json['object']['url'])
message_url = remove_html(message_url)
domain = remove_domain_port(domain) domain = remove_domain_port(domain)
post_filename = locate_post(base_dir, nickname, domain, message_url) post_filename = locate_post(base_dir, nickname, domain, message_url)
if not post_filename: if not post_filename:

View File

@ -445,7 +445,8 @@ def replace_emoji_from_tags(session, base_dir: str,
continue continue
if tag_item['name'] not in content: if tag_item['name'] not in content:
continue continue
icon_name = tag_item['icon']['url'].split('/')[-1] tag_url = remove_html(tag_item['icon']['url'])
icon_name = tag_url.split('/')[-1]
if icon_name: if icon_name:
if len(icon_name) > 1: if len(icon_name) > 1:
if icon_name[0].isdigit(): if icon_name[0].isdigit():
@ -472,14 +473,12 @@ def replace_emoji_from_tags(session, base_dir: str,
'no conversion of ' + 'no conversion of ' +
str(icon_name) + ' to chr ' + str(icon_name) + ' to chr ' +
tag_item['name'] + ' ' + tag_item['name'] + ' ' +
tag_item['icon']['url']) tag_url)
if not replaced: if not replaced:
_save_custom_emoji(session, base_dir, _save_custom_emoji(session, base_dir,
tag_item['name'], tag_item['name'],
tag_item['icon']['url'], tag_url, debug)
debug) _update_common_emoji(base_dir, icon_name)
_update_common_emoji(base_dir,
icon_name)
else: else:
_update_common_emoji(base_dir, _update_common_emoji(base_dir,
"0x" + icon_name) "0x" + icon_name)
@ -501,12 +500,11 @@ def replace_emoji_from_tags(session, base_dir: str,
'no conversion of ' + 'no conversion of ' +
str(icode) + ' to chr ' + str(icode) + ' to chr ' +
tag_item['name'] + ' ' + tag_item['name'] + ' ' +
tag_item['icon']['url']) tag_url)
if not replaced: if not replaced:
_save_custom_emoji(session, base_dir, _save_custom_emoji(session, base_dir,
tag_item['name'], tag_item['name'],
tag_item['icon']['url'], tag_url, debug)
debug)
_update_common_emoji(base_dir, _update_common_emoji(base_dir,
icon_name) icon_name)
else: else:
@ -529,7 +527,8 @@ def replace_emoji_from_tags(session, base_dir: str,
emoji_tag_name = tag_item['name'].replace(':', '') emoji_tag_name = tag_item['name'].replace(':', '')
else: else:
emoji_tag_name = '' emoji_tag_name = ''
emoji_html = "<img src=\"" + tag_item['icon']['url'] + "\" alt=\"" + \ tag_url = remove_html(tag_item['icon']['url'])
emoji_html = "<img src=\"" + tag_url + "\" alt=\"" + \
emoji_tag_name + \ emoji_tag_name + \
"\" align=\"middle\" class=\"" + html_class + "\"/>" "\" align=\"middle\" class=\"" + html_class + "\"/>"
content = content.replace(tag_item['name'], emoji_html) content = content.replace(tag_item['name'], emoji_html)

View File

@ -6419,7 +6419,7 @@ class PubServer(BaseHTTPRequestHandler):
for m_type, last_part in uploads: for m_type, last_part in uploads:
rep_str = '/' + last_part rep_str = '/' + last_part
if m_type == 'avatar': if m_type == 'avatar':
actor_url = actor_json['icon']['url'] actor_url = remove_html(actor_json['icon']['url'])
last_part_of_url = actor_url.split('/')[-1] last_part_of_url = actor_url.split('/')[-1]
srch_str = '/' + last_part_of_url srch_str = '/' + last_part_of_url
actor_url = actor_url.replace(srch_str, rep_str) actor_url = actor_url.replace(srch_str, rep_str)
@ -6432,15 +6432,14 @@ class PubServer(BaseHTTPRequestHandler):
actor_json['icon']['mediaType'] = \ actor_json['icon']['mediaType'] = \
'image/' + img_ext 'image/' + img_ext
elif m_type == 'image': elif m_type == 'image':
last_part_of_url = \ im_url = \
actor_json['image']['url'].split('/')[-1] remove_html(actor_json['image']['url'])
last_part_of_url = im_url.split('/')[-1]
srch_str = '/' + last_part_of_url srch_str = '/' + last_part_of_url
actor_json['image']['url'] = \ actor_json['image']['url'] = \
actor_json['image']['url'].replace(srch_str, im_url.replace(srch_str, rep_str)
rep_str) if '.' in im_url:
if '.' in actor_json['image']['url']: img_ext = im_url.split('.')[-1]
img_ext = \
actor_json['image']['url'].split('.')[-1]
if img_ext == 'jpg': if img_ext == 'jpg':
img_ext = 'jpeg' img_ext = 'jpeg'
actor_json['image']['mediaType'] = \ actor_json['image']['mediaType'] = \

View File

@ -742,7 +742,8 @@ def _show_replies_on_post(post_json_object: {}, max_replies: int) -> None:
print('') print('')
ctr = 0 ctr = 0
for item in object_replies['items']: for item in object_replies['items']:
print('' + str(item['url'])) item_url = remove_html(item['url'])
print('' + str(item_url))
ctr += 1 ctr += 1
if ctr >= max_replies: if ctr >= max_replies:
break break

View File

@ -187,7 +187,7 @@ def cache_svg_images(session, base_dir: str, http_prefix: str,
continue continue
if attach['url'].endswith('.svg') or \ if attach['url'].endswith('.svg') or \
'svg' in attach['mediaType']: 'svg' in attach['mediaType']:
url = attach['url'] url = remove_html(attach['url'])
if not url_permitted(url, federation_list): if not url_permitted(url, federation_list):
continue continue
# if this is a local image then it has already been # if this is a local image then it has already been
@ -1175,8 +1175,9 @@ def _person_receive_update(base_dir: str,
debug: bool, http_prefix: str) -> bool: debug: bool, http_prefix: str) -> bool:
"""Changes an actor. eg: avatar or display name change """Changes an actor. eg: avatar or display name change
""" """
person_url = remove_html(person_json['url'])
if debug: if debug:
print('Receiving actor update for ' + person_json['url'] + print('Receiving actor update for ' + person_url +
' ' + str(person_json)) ' ' + str(person_json))
domain_full = get_full_domain(domain, port) domain_full = get_full_domain(domain, port)
update_domain_full = get_full_domain(update_domain, update_port) update_domain_full = get_full_domain(update_domain, update_port)
@ -2581,7 +2582,8 @@ def _receive_bookmark(recent_posts_cache: {},
if debug: if debug:
print('DEBUG: c2s inbox bookmark Add request arrived in outbox') print('DEBUG: c2s inbox bookmark Add request arrived in outbox')
message_url = remove_id_ending(message_json['object']['url']) message_url2 = remove_html(message_json['object']['url'])
message_url = remove_id_ending(message_url2)
domain = remove_domain_port(domain) domain = remove_domain_port(domain)
post_filename = locate_post(base_dir, nickname, domain, message_url) post_filename = locate_post(base_dir, nickname, domain, message_url)
if not post_filename: if not post_filename:
@ -2591,7 +2593,7 @@ def _receive_bookmark(recent_posts_cache: {},
return True return True
update_bookmarks_collection(recent_posts_cache, base_dir, post_filename, update_bookmarks_collection(recent_posts_cache, base_dir, post_filename,
message_json['object']['url'], message_url2,
message_json['actor'], domain, debug) message_json['actor'], domain, debug)
# regenerate the html # regenerate the html
bookmarked_post_json = load_json(post_filename, 0, 1) bookmarked_post_json = load_json(post_filename, 0, 1)
@ -2707,7 +2709,8 @@ def _receive_undo_bookmark(recent_posts_cache: {},
print('DEBUG: c2s inbox Remove bookmark ' + print('DEBUG: c2s inbox Remove bookmark ' +
'request arrived in outbox') 'request arrived in outbox')
message_url = remove_id_ending(message_json['object']['url']) message_url2 = remove_html(message_json['object']['url'])
message_url = remove_id_ending(message_url2)
domain = remove_domain_port(domain) domain = remove_domain_port(domain)
post_filename = locate_post(base_dir, nickname, domain, message_url) post_filename = locate_post(base_dir, nickname, domain, message_url)
if not post_filename: if not post_filename:

View File

@ -15,6 +15,7 @@ from utils import acct_dir
from utils import load_json from utils import load_json
from utils import save_json from utils import save_json
from utils import locate_post from utils import locate_post
from utils import remove_html
def get_location_from_tags(tags: []) -> str: def get_location_from_tags(tags: []) -> str:
@ -340,7 +341,7 @@ def get_map_preferences_url(base_dir: str, nickname: str, domain: str) -> str:
if os.path.isfile(maps_filename): if os.path.isfile(maps_filename):
maps_json = load_json(maps_filename) maps_json = load_json(maps_filename)
if maps_json.get('url'): if maps_json.get('url'):
return maps_json['url'] return remove_html(maps_json['url'])
return None return None

View File

@ -11,6 +11,7 @@ import os
from utils import load_json from utils import load_json
from utils import get_config_param from utils import get_config_param
from utils import acct_dir from utils import acct_dir
from utils import remove_html
from metadata import meta_data_instance from metadata import meta_data_instance
@ -62,6 +63,8 @@ def _get_masto_api_v1account(base_dir: str, nickname: str, domain: str) -> {}:
account_json = load_json(account_filename) account_json = load_json(account_filename)
if not account_json: if not account_json:
return {} return {}
avatar_url = remove_html(account_json['icon']['url'])
image_url = remove_html(account_json['image']['url'])
masto_account_json = { masto_account_json = {
"id": get_masto_api_v1id_from_nickname(nickname), "id": get_masto_api_v1id_from_nickname(nickname),
"username": nickname, "username": nickname,
@ -74,10 +77,10 @@ def _get_masto_api_v1account(base_dir: str, nickname: str, domain: str) -> {}:
"statuses_count": 0, "statuses_count": 0,
"note": account_json['summary'], "note": account_json['summary'],
"url": account_json['id'], "url": account_json['id'],
"avatar": account_json['icon']['url'], "avatar": avatar_url,
"avatar_static": account_json['icon']['url'], "avatar_static": avatar_url,
"header": account_json['image']['url'], "header": image_url,
"header_static": account_json['image']['url'] "header_static": image_url
} }
return masto_account_json return masto_account_json

View File

@ -12,6 +12,7 @@ from utils import is_account_dir
from utils import load_json from utils import load_json
from utils import no_of_accounts from utils import no_of_accounts
from utils import no_of_active_accounts_monthly from utils import no_of_active_accounts_monthly
from utils import remove_html
def _get_status_count(base_dir: str) -> int: def _get_status_count(base_dir: str) -> int:
@ -152,6 +153,8 @@ def meta_data_instance(show_accounts: bool,
if admin_actor.get('published'): if admin_actor.get('published'):
created_at = admin_actor['published'] created_at = admin_actor['published']
icon_url = remove_html(admin_actor['icon']['url'])
image_url = remove_html(admin_actor['image']['url'])
instance = { instance = {
'approval_required': False, 'approval_required': False,
'invites_enabled': False, 'invites_enabled': False,
@ -159,10 +162,10 @@ def meta_data_instance(show_accounts: bool,
'contact_account': { 'contact_account': {
'acct': admin_actor['preferredUsername'], 'acct': admin_actor['preferredUsername'],
'created_at': created_at, 'created_at': created_at,
'avatar': admin_actor['icon']['url'], 'avatar': icon_url,
'avatar_static': admin_actor['icon']['url'], 'avatar_static': icon_url,
'header': admin_actor['image']['url'], 'header': image_url,
'header_static': admin_actor['image']['url'], 'header_static': image_url,
'bot': is_bot, 'bot': is_bot,
'discoverable': True, 'discoverable': True,
'group': is_group, 'group': is_group,

View File

@ -492,9 +492,9 @@ def _valid_podcast_entry(base_dir: str, key: str, entry: {}) -> bool:
if entry['protocol'].tolower() != 'activitypub': if entry['protocol'].tolower() != 'activitypub':
return False return False
if entry.get('uri'): if entry.get('uri'):
post_url = entry['uri'] post_url = remove_html(entry['uri'])
elif entry.get('url'): elif entry.get('url'):
post_url = entry['uri'] post_url = remove_html(entry['uri'])
else: else:
post_url = entry['text'] post_url = entry['text']
if '://' not in post_url: if '://' not in post_url:
@ -1133,7 +1133,7 @@ def _json_feed_v1to_dict(base_dir: str, domain: str, xml_str: str,
if tag_name not in description: if tag_name not in description:
description += ' ' + tag_name description += ' ' + tag_name
link = json_feed_item['url'] link = remove_html(json_feed_item['url'])
if '://' not in link: if '://' not in link:
continue continue
if len(link) > max_bytes: if len(link) > max_bytes:
@ -1551,10 +1551,10 @@ def _add_account_blogs_to_newswire(base_dir: str, nickname: str, domain: str,
description = remove_html(description) description = remove_html(description)
tags_from_post = _get_hashtags_from_post(post_json_object) tags_from_post = _get_hashtags_from_post(post_json_object)
summary = post_json_object['object']['summary'] summary = post_json_object['object']['summary']
url2 = remove_html(post_json_object['object']['url'])
_add_newswire_dict_entry(base_dir, domain, _add_newswire_dict_entry(base_dir, domain,
newswire, published, newswire, published,
summary, summary, url2,
post_json_object['object']['url'],
votes, full_post_filename, votes, full_post_filename,
description, moderated, False, description, moderated, False,
tags_from_post, tags_from_post,

View File

@ -1789,7 +1789,7 @@ def get_person_avatar_url(base_dir: str, person_url: str,
if person_json.get('icon'): if person_json.get('icon'):
if person_json['icon'].get('url'): if person_json['icon'].get('url'):
if '.svg' not in person_json['icon']['url'].lower(): if '.svg' not in person_json['icon']['url'].lower():
return person_json['icon']['url'] return remove_html(person_json['icon']['url'])
return None return None
@ -1971,7 +1971,7 @@ def get_featured_hashtags(actor_json: {}) -> str:
tag_name = tag_name[1:] tag_name = tag_name[1:]
if not tag_name: if not tag_name:
continue continue
tag_url = tag_dict['href'] tag_url = remove_html(tag_dict['href'])
if '://' not in tag_url: if '://' not in tag_url:
continue continue
if not valid_hash_tag(tag_name): if not valid_hash_tag(tag_name):
@ -2019,13 +2019,13 @@ def get_featured_hashtags_as_html(actor_json: {},
continue continue
if ' #' + tag_name in profile_description: if ' #' + tag_name in profile_description:
continue continue
tag_url = tag_dict['href'] tag_url = remove_html(tag_dict['href'])
if '://' not in tag_url: if '://' not in tag_url:
continue continue
if not valid_hash_tag(tag_name): if not valid_hash_tag(tag_name):
continue continue
result += \ result += \
'<a href="' + tag_dict['href'] + '" ' + \ '<a href="' + tag_url + '" ' + \
'class="mention hashtag" rel="tag" ' + \ 'class="mention hashtag" rel="tag" ' + \
'tabindex="10">#' + tag_name + '</a> ' 'tabindex="10">#' + tag_name + '</a> '
ctr += 1 ctr += 1

View File

@ -216,10 +216,10 @@ def get_user_url(wf_request: {}, source_id: int, debug: bool) -> str:
else: else:
url = link['href'] url = link['href']
if not contains_invalid_actor_url_chars(url): if not contains_invalid_actor_url_chars(url):
return url return remove_html(url)
url = link['href'] url = link['href']
if not contains_invalid_actor_url_chars(url): if not contains_invalid_actor_url_chars(url):
return url return remove_html(url)
return None return None
@ -404,7 +404,7 @@ def get_person_box(signing_priv_key_pem: str, origin_domain: str,
avatar_url = None avatar_url = None
if person_json.get('icon'): if person_json.get('icon'):
if person_json['icon'].get('url'): if person_json['icon'].get('url'):
avatar_url = person_json['icon']['url'] avatar_url = remove_html(person_json['icon']['url'])
display_name = None display_name = None
if person_json.get('name'): if person_json.get('name'):
display_name = person_json['name'] display_name = person_json['name']
@ -628,7 +628,8 @@ def _get_posts(session, outbox_url: str, max_posts: int,
if url_permitted(tag_item['icon']['url'], if url_permitted(tag_item['icon']['url'],
federation_list): federation_list):
emoji_name = tag_item['name'] emoji_name = tag_item['name']
emoji_icon = tag_item['icon']['url'] emoji_icon = \
remove_html(tag_item['icon']['url'])
emoji[emoji_name] = emoji_icon emoji[emoji_name] = emoji_icon
else: else:
if debug: if debug:
@ -675,10 +676,11 @@ def _get_posts(session, outbox_url: str, max_posts: int,
for attach in this_item['attachment']: for attach in this_item['attachment']:
if attach.get('name') and attach.get('url'): if attach.get('name') and attach.get('url'):
# no attachments from non-permitted domains # no attachments from non-permitted domains
if url_permitted(attach['url'], attach_url = remove_html(attach['url'])
if url_permitted(attach_url,
federation_list): federation_list):
attachment.append([attach['name'], attachment.append([attach['name'],
attach['url']]) attach_url])
else: else:
if debug: if debug:
print('url not permitted ' + print('url not permitted ' +
@ -820,8 +822,9 @@ def get_post_domains(session, outbox_url: str, max_posts: int, debug: bool,
tag_type = tag_item['type'].lower() tag_type = tag_item['type'].lower()
if tag_type == 'mention': if tag_type == 'mention':
if tag_item.get('href'): if tag_item.get('href'):
tag_url = remove_html(tag_item['href'])
post_domain, _ = \ post_domain, _ = \
get_domain_from_actor(tag_item['href']) get_domain_from_actor(tag_url)
if post_domain: if post_domain:
if post_domain not in post_domains: if post_domain not in post_domains:
post_domains.append(post_domain) post_domains.append(post_domain)
@ -879,6 +882,7 @@ def _get_posts_for_blocked_domains(base_dir: str,
url = item['object']['url'] url = item['object']['url']
else: else:
url = item['object']['id'] url = item['object']['id']
url = remove_html(url)
if not blocked_posts.get(post_domain): if not blocked_posts.get(post_domain):
blocked_posts[post_domain] = [url] blocked_posts[post_domain] = [url]
else: else:
@ -891,8 +895,9 @@ def _get_posts_for_blocked_domains(base_dir: str,
continue continue
tag_type = tag_item['type'].lower() tag_type = tag_item['type'].lower()
if tag_type == 'mention' and tag_item.get('href'): if tag_type == 'mention' and tag_item.get('href'):
tag_url = remove_html(tag_item['href'])
post_domain, _ = \ post_domain, _ = \
get_domain_from_actor(tag_item['href']) get_domain_from_actor(tag_url)
if not post_domain: if not post_domain:
continue continue
if is_blocked_domain(base_dir, post_domain): if is_blocked_domain(base_dir, post_domain):
@ -900,6 +905,7 @@ def _get_posts_for_blocked_domains(base_dir: str,
url = item['object']['url'] url = item['object']['url']
else: else:
url = item['object']['id'] url = item['object']['id']
url = remove_html(url)
if not blocked_posts.get(post_domain): if not blocked_posts.get(post_domain):
blocked_posts[post_domain] = [url] blocked_posts[post_domain] = [url]
else: else:
@ -1496,7 +1502,8 @@ def _create_post_mentions(cc_url: str, new_post: {},
if tag['type'] != 'Mention': if tag['type'] != 'Mention':
continue continue
if tag['href'] not in to_cc: if tag['href'] not in to_cc:
new_post['object']['cc'].append(tag['href']) tag_url = remove_html(tag['href'])
new_post['object']['cc'].append(tag_url)
_consolidate_actors_list(new_post['object']['cc']) _consolidate_actors_list(new_post['object']['cc'])
new_post['cc'] = new_post['object']['cc'] new_post['cc'] = new_post['object']['cc']
@ -2099,9 +2106,9 @@ def create_blog_post(base_dir: str,
low_bandwidth, content_license_url, low_bandwidth, content_license_url,
media_license_url, media_creator, media_license_url, media_creator,
languages_understood, translate, buy_url, chat_url) languages_understood, translate, buy_url, chat_url)
if '/@/' not in blog_json['object']['url']: obj_url = remove_html(blog_json['object']['url'])
blog_json['object']['url'] = \ if '/@/' not in obj_url:
blog_json['object']['url'].replace('/@', '/users/') blog_json['object']['url'] = obj_url.replace('/@', '/users/')
_append_citations_to_blog_post(base_dir, nickname, domain, blog_json) _append_citations_to_blog_post(base_dir, nickname, domain, blog_json)
return blog_json return blog_json

View File

@ -1862,6 +1862,7 @@ def _remove_attachment(base_dir: str, http_prefix: str, domain: str,
attachment_url = post_json['attachment'][0]['url'] attachment_url = post_json['attachment'][0]['url']
if not attachment_url: if not attachment_url:
return return
attachment_url = remove_html(attachment_url)
media_filename = base_dir + '/' + \ media_filename = base_dir + '/' + \
attachment_url.replace(http_prefix + '://' + domain + '/', '') attachment_url.replace(http_prefix + '://' + domain + '/', '')
if os.path.isfile(media_filename): if os.path.isfile(media_filename):

View File

@ -7,6 +7,7 @@ __email__ = "bob@libreserver.org"
__status__ = "Production" __status__ = "Production"
__module_group__ = "Timeline" __module_group__ = "Timeline"
from utils import remove_html
from utils import get_full_domain from utils import get_full_domain
from utils import get_nickname_from_actor from utils import get_nickname_from_actor
from utils import get_domain_from_actor from utils import get_domain_from_actor
@ -110,15 +111,15 @@ def convert_video_to_note(base_dir: str, nickname: str, domain: str,
if not media_link.get('href'): if not media_link.get('href'):
continue continue
if media_link['mediaType'] == 'application/x-bittorrent': if media_link['mediaType'] == 'application/x-bittorrent':
media_torrent = media_link['href'] media_torrent = remove_html(media_link['href'])
if media_link['href'].startswith('magnet:'): if media_link['href'].startswith('magnet:'):
media_magnet = media_link['href'] media_magnet = remove_html(media_link['href'])
if media_link['mediaType'] != 'video/mp4' and \ if media_link['mediaType'] != 'video/mp4' and \
media_link['mediaType'] != 'video/ogv': media_link['mediaType'] != 'video/ogv':
continue continue
if not media_url: if not media_url:
media_type = media_link['mediaType'] media_type = media_link['mediaType']
media_url = media_link['href'] media_url = remove_html(media_link['href'])
if not media_url: if not media_url:
return None return None
@ -138,7 +139,8 @@ def convert_video_to_note(base_dir: str, nickname: str, domain: str,
content += '<a href="' + media_magnet + '">🧲</a>' content += '<a href="' + media_magnet + '">🧲</a>'
content += '</p>' content += '</p>'
new_post_id = remove_id_ending(post_json_object['id']) new_post_id2 = remove_html(post_json_object['id'])
new_post_id = remove_id_ending(new_post_id2)
new_post = { new_post = {
'@context': post_json_object['@context'], '@context': post_json_object['@context'],
'id': new_post_id + '/activity', 'id': new_post_id + '/activity',

View File

@ -8,6 +8,7 @@ __status__ = "Production"
__module_group__ = "Moderation" __module_group__ = "Moderation"
import os import os
from utils import remove_html
from utils import is_artist from utils import is_artist
from utils import is_account_dir from utils import is_account_dir
from utils import get_full_domain from utils import get_full_domain
@ -387,7 +388,7 @@ def html_moderation_info(translate: {}, base_dir: str,
ext = '' ext = ''
if actor_json.get('icon'): if actor_json.get('icon'):
if actor_json['icon'].get('url'): if actor_json['icon'].get('url'):
avatar_url = actor_json['icon']['url'] avatar_url = remove_html(actor_json['icon']['url'])
if '.' in avatar_url: if '.' in avatar_url:
ext = '.' + avatar_url.split('.')[-1] ext = '.' + avatar_url.split('.')[-1]
acct_url = \ acct_url = \

View File

@ -38,7 +38,7 @@ def _html_podcast_chapters(link_url: str,
if not isinstance(podcast_properties[key], dict): if not isinstance(podcast_properties[key], dict):
return '' return ''
if podcast_properties[key].get('url'): if podcast_properties[key].get('url'):
chapters_url = podcast_properties[key]['url'] chapters_url = remove_html(podcast_properties[key]['url'])
elif podcast_properties[key].get('uri'): elif podcast_properties[key].get('uri'):
chapters_url = podcast_properties[key]['uri'] chapters_url = podcast_properties[key]['uri']
else: else:
@ -79,7 +79,7 @@ def _html_podcast_chapters(link_url: str,
chapter_title = chapter['title'] chapter_title = chapter['title']
chapter_url = '' chapter_url = ''
if chapter.get('url'): if chapter.get('url'):
chapter_url = chapter['url'] chapter_url = remove_html(chapter['url'])
chapter_title = \ chapter_title = \
'<a href="' + chapter_url + '">' + \ '<a href="' + chapter_url + '">' + \
chapter['title'] + '<\a>' chapter['title'] + '<\a>'
@ -121,7 +121,7 @@ def _html_podcast_transcripts(podcast_properties: {}, translate: {}) -> str:
for _ in podcast_properties[key]: for _ in podcast_properties[key]:
transcript_url = None transcript_url = None
if podcast_properties[key].get('url'): if podcast_properties[key].get('url'):
transcript_url = podcast_properties[key]['url'] transcript_url = remove_html(podcast_properties[key]['url'])
elif podcast_properties[key].get('uri'): elif podcast_properties[key].get('uri'):
transcript_url = podcast_properties[key]['uri'] transcript_url = podcast_properties[key]['uri']
if not transcript_url: if not transcript_url:
@ -154,7 +154,7 @@ def _html_podcast_social_interactions(podcast_properties: {},
if podcast_properties[key].get('uri'): if podcast_properties[key].get('uri'):
episode_post_url = podcast_properties[key]['uri'] episode_post_url = podcast_properties[key]['uri']
elif podcast_properties[key].get('url'): elif podcast_properties[key].get('url'):
episode_post_url = podcast_properties[key]['url'] episode_post_url = remove_html(podcast_properties[key]['url'])
elif podcast_properties[key].get('text'): elif podcast_properties[key].get('text'):
episode_post_url = podcast_properties[key]['text'] episode_post_url = podcast_properties[key]['text']
else: else:
@ -218,7 +218,7 @@ def _html_podcast_performers(podcast_properties: {}) -> str:
performer_url = '' performer_url = ''
if performer.get('href'): if performer.get('href'):
performer_url = performer['href'] performer_url = remove_html(performer['href'])
performer_img = '' performer_img = ''
if performer.get('img'): if performer.get('img'):
@ -431,7 +431,7 @@ def html_podcast_episode(translate: {},
# donate button # donate button
if podcast_properties.get('funding'): if podcast_properties.get('funding'):
if podcast_properties['funding'].get('url'): if podcast_properties['funding'].get('url'):
donate_url = podcast_properties['funding']['url'] donate_url = remove_html(podcast_properties['funding']['url'])
podcast_str += \ podcast_str += \
'<p><span itemprop="funding"><a href="' + donate_url + \ '<p><span itemprop="funding"><a href="' + donate_url + \
'" rel="donation"><button class="donateButton">' + \ '" rel="donation"><button class="donateButton">' + \

View File

@ -149,8 +149,9 @@ def _html_post_metadata_open_graph(domain: str, post_json_object: {},
" <meta content=\"@" + actor_handle + \ " <meta content=\"@" + actor_handle + \
"\" property=\"og:title\" />\n" "\" property=\"og:title\" />\n"
if obj_json.get('url'): if obj_json.get('url'):
obj_url = remove_html(obj_json['url'])
metadata += \ metadata += \
" <meta content=\"" + obj_json['url'] + \ " <meta content=\"" + obj_url + \
"\" property=\"og:url\" />\n" "\" property=\"og:url\" />\n"
if obj_json.get('published'): if obj_json.get('published'):
metadata += " <meta name=\"DC.date\" " + \ metadata += " <meta name=\"DC.date\" " + \
@ -204,8 +205,9 @@ def _html_post_metadata_open_graph(domain: str, post_json_object: {},
metadata += \ metadata += \
" <meta content=\"" + description + \ " <meta content=\"" + description + \
"\" name=\"og:description\">\n" "\" name=\"og:description\">\n"
attach_url = remove_html(attach_json['url'])
metadata += \ metadata += \
" <meta content=\"" + attach_json['url'] + \ " <meta content=\"" + attach_url + \
"\" property=\"og:image\" />\n" "\" property=\"og:image\" />\n"
metadata += \ metadata += \
" <meta content=\"" + attach_json['mediaType'] + \ " <meta content=\"" + attach_json['mediaType'] + \
@ -1188,9 +1190,11 @@ def _get_blog_citations_html(box_name: str,
continue continue
if not tag_json.get('url'): if not tag_json.get('url'):
continue continue
citation_url = remove_html(tag_json['url'])
citation_name = remove_html(tag_json['name'])
citations_str += \ citations_str += \
'<li><a href="' + tag_json['url'] + '" tabindex="10">' + \ '<li><a href="' + citation_url + '" tabindex="10">' + \
'<cite>' + tag_json['name'] + '</cite></a></li>\n' '<cite>' + citation_name + '</cite></a></li>\n'
if citations_str: if citations_str:
translated_citations_str = 'Citations' translated_citations_str = 'Citations'
@ -1844,9 +1848,9 @@ def _get_content_license(post_json_object: {}) -> str:
'licence' not in name_lower: 'licence' not in name_lower:
continue continue
if item.get('value'): if item.get('value'):
value = item['value'] value = remove_html(item['value'])
elif item.get('href'): elif item.get('href'):
value = item['href'] value = remove_html(item['href'])
else: else:
continue continue
if '://' not in value: if '://' not in value:

View File

@ -238,7 +238,7 @@ def html_profile_after_search(recent_posts_cache: {}, max_recent_posts: int,
avatar_url = '' avatar_url = ''
if profile_json.get('icon'): if profile_json.get('icon'):
if profile_json['icon'].get('url'): if profile_json['icon'].get('url'):
avatar_url = profile_json['icon']['url'] avatar_url = remove_html(profile_json['icon']['url'])
if not avatar_url: if not avatar_url:
avatar_url = get_person_avatar_url(base_dir, person_url, person_cache) avatar_url = get_person_avatar_url(base_dir, person_url, person_cache)
display_name = search_nickname display_name = search_nickname
@ -286,7 +286,8 @@ def html_profile_after_search(recent_posts_cache: {}, max_recent_posts: int,
# profileBackgroundImage = '' # profileBackgroundImage = ''
# if profile_json.get('image'): # if profile_json.get('image'):
# if profile_json['image'].get('url'): # if profile_json['image'].get('url'):
# profileBackgroundImage = profile_json['image']['url'] # profileBackgroundImage = \
# remove_html(profile_json['image']['url'])
# url to return to # url to return to
back_url = path back_url = path
@ -318,7 +319,7 @@ def html_profile_after_search(recent_posts_cache: {}, max_recent_posts: int,
image_url = '' image_url = ''
if profile_json.get('image'): if profile_json.get('image'):
if profile_json['image'].get('url'): if profile_json['image'].get('url'):
image_url = profile_json['image']['url'] image_url = remove_html(profile_json['image']['url'])
also_known_as = None also_known_as = None
if profile_json.get('alsoKnownAs'): if profile_json.get('alsoKnownAs'):
@ -1096,7 +1097,7 @@ def html_profile(signing_priv_key_pem: str,
if profile_json.get('hasOccupation'): if profile_json.get('hasOccupation'):
occupation_name = get_occupation_name(profile_json) occupation_name = get_occupation_name(profile_json)
avatar_url = profile_json['icon']['url'] avatar_url = remove_html(profile_json['icon']['url'])
# use alternate path for local avatars to avoid any caching issues # use alternate path for local avatars to avoid any caching issues
if '://' + domain_full + '/system/accounts/avatars/' in avatar_url: if '://' + domain_full + '/system/accounts/avatars/' in avatar_url:
avatar_url = \ avatar_url = \

View File

@ -11,6 +11,7 @@ import os
from shutil import copyfile from shutil import copyfile
import urllib.parse import urllib.parse
from datetime import datetime from datetime import datetime
from utils import remove_html
from utils import harmless_markup from utils import harmless_markup
from utils import remove_id_ending from utils import remove_id_ending
from utils import has_object_dict from utils import has_object_dict
@ -567,10 +568,11 @@ def html_skills_search(actor: str, translate: {}, base_dir: str,
skill_level_str = '0' + skill_level_str skill_level_str = '0' + skill_level_str
if skill_level < 10: if skill_level < 10:
skill_level_str = '0' + skill_level_str skill_level_str = '0' + skill_level_str
icon_url = remove_html(actor_json['icon']['url'])
index_str = \ index_str = \
skill_level_str + ';' + actor + ';' + \ skill_level_str + ';' + actor + ';' + \
actor_json['name'] + \ actor_json['name'] + \
';' + actor_json['icon']['url'] ';' + icon_url
if index_str not in results: if index_str not in results:
results.append(index_str) results.append(index_str)
break break
@ -606,10 +608,11 @@ def html_skills_search(actor: str, translate: {}, base_dir: str,
skill_level_str = '0' + skill_level_str skill_level_str = '0' + skill_level_str
if skill_level < 10: if skill_level < 10:
skill_level_str = '0' + skill_level_str skill_level_str = '0' + skill_level_str
icon_url = remove_html(actor_json['icon']['url'])
index_str = \ index_str = \
skill_level_str + ';' + actor + ';' + \ skill_level_str + ';' + actor + ';' + \
actor_json['name'] + \ actor_json['name'] + \
';' + actor_json['icon']['url'] ';' + icon_url
if index_str not in results: if index_str not in results:
results.append(index_str) results.append(index_str)
break break
@ -1369,8 +1372,9 @@ def rss_hashtag_search(nickname: str, domain: str, port: int,
for attach in post_json_object['object']['attachment']: for attach in post_json_object['object']['attachment']:
if not attach.get('url'): if not attach.get('url'):
continue continue
attach_url = remove_html(attach['url'])
hashtag_feed += \ hashtag_feed += \
' <link>' + attach['url'] + '</link>' ' <link>' + attach_url + '</link>'
hashtag_feed += ' </item>' hashtag_feed += ' </item>'
index += 1 index += 1
if index >= max_feed_length: if index >= max_feed_length:

View File

@ -627,11 +627,12 @@ def get_shares_collection(actor: str, page_number: int, items_per_page: int,
file_extension = mtype file_extension = mtype
if file_extension: if file_extension:
media_type = 'image/' + file_extension media_type = 'image/' + file_extension
shared_item_url = remove_html(shared_item['imageUrl'])
offer_item['object']['attachment'].append({ offer_item['object']['attachment'].append({
'mediaType': media_type, 'mediaType': media_type,
'name': shared_item['displayName'], 'name': shared_item['displayName'],
'type': 'Document', 'type': 'Document',
'url': shared_item['imageUrl'] 'url': shared_item_url
}) })
if shared_item['itemPrice'] and shared_item['itemCurrency']: if shared_item['itemPrice'] and shared_item['itemCurrency']:
offer_item['object']['attachment'].append({ offer_item['object']['attachment'].append({
@ -939,11 +940,12 @@ def html_header_with_person_markup(css_filename: str, instance_title: str,
domain_full = actor_json['id'].split('://')[1].split('/')[0] domain_full = actor_json['id'].split('://')[1].split('/')[0]
handle = actor_json['preferredUsername'] + '@' + domain_full handle = actor_json['preferredUsername'] + '@' + domain_full
icon_url = remove_html(actor_json['icon']['url'])
person_markup = \ person_markup = \
' "about": {\n' + \ ' "about": {\n' + \
' "@type" : "Person",\n' + \ ' "@type" : "Person",\n' + \
' "name": "' + name_str + '",\n' + \ ' "name": "' + name_str + '",\n' + \
' "image": "' + actor_json['icon']['url'] + '",\n' + \ ' "image": "' + icon_url + '",\n' + \
' "description": "' + description + '",\n' + \ ' "description": "' + description + '",\n' + \
city_markup + skills_markup + \ city_markup + skills_markup + \
' "url": "' + actor_json['id'] + '"\n' + \ ' "url": "' + actor_json['id'] + '"\n' + \
@ -967,18 +969,19 @@ def html_header_with_person_markup(css_filename: str, instance_title: str,
' "name": "' + name_str + '"\n' + \ ' "name": "' + name_str + '"\n' + \
' },\n' + \ ' },\n' + \
' "name": "' + name_str + '",\n' + \ ' "name": "' + name_str + '",\n' + \
' "image": "' + actor_json['icon']['url'] + '",\n' + \ ' "image": "' + icon_url + '",\n' + \
' "description": "' + description + '",\n' + \ ' "description": "' + description + '",\n' + \
' "license": "' + content_license_url + '"\n' + \ ' "license": "' + content_license_url + '"\n' + \
' }\n' + \ ' }\n' + \
' </script>\n' ' </script>\n'
description = remove_html(description) description = remove_html(description)
actor2_url = remove_html(actor_json['url'])
og_metadata = \ og_metadata = \
" <meta content=\"profile\" property=\"og:type\" />\n" + \ " <meta content=\"profile\" property=\"og:type\" />\n" + \
" <meta content=\"" + description + \ " <meta content=\"" + description + \
"\" name='description'>\n" + \ "\" name='description'>\n" + \
" <meta content=\"" + actor_json['url'] + \ " <meta content=\"" + actor2_url + \
"\" property=\"og:url\" />\n" + \ "\" property=\"og:url\" />\n" + \
" <meta content=\"" + domain_full + \ " <meta content=\"" + domain_full + \
"\" property=\"og:site_name\" />\n" + \ "\" property=\"og:site_name\" />\n" + \
@ -986,7 +989,7 @@ def html_header_with_person_markup(css_filename: str, instance_title: str,
")\" property=\"og:title\" />\n" + \ ")\" property=\"og:title\" />\n" + \
" <meta content=\"" + description + \ " <meta content=\"" + description + \
"\" property=\"og:description\" />\n" + \ "\" property=\"og:description\" />\n" + \
" <meta content=\"" + actor_json['icon']['url'] + \ " <meta content=\"" + icon_url + \
"\" property=\"og:image\" />\n" + \ "\" property=\"og:image\" />\n" + \
" <meta content=\"400\" property=\"og:image:width\" />\n" + \ " <meta content=\"400\" property=\"og:image:width\" />\n" + \
" <meta content=\"400\" property=\"og:image:height\" />\n" + \ " <meta content=\"400\" property=\"og:image:height\" />\n" + \
@ -1362,7 +1365,8 @@ def get_post_attachments_as_html(base_dir: str,
continue continue
# get the domain for the chat link # get the domain for the chat link
chat_domain_str = '' chat_domain_str = ''
chat_domain, _ = get_domain_from_actor(attach['href']) attach_url = remove_html(attach['href'])
chat_domain, _ = get_domain_from_actor(attach_url)
if chat_domain: if chat_domain:
if local_network_host(chat_domain): if local_network_host(chat_domain):
print('REJECT: local network chat link ' + attach['href']) print('REJECT: local network chat link ' + attach['href'])
@ -1505,6 +1509,7 @@ def get_post_attachments_as_html(base_dir: str,
image_post_url = post_json_object['object']['url'] image_post_url = post_json_object['object']['url']
else: else:
image_post_url = post_json_object['object']['id'] image_post_url = post_json_object['object']['id']
image_post_url = remove_html(image_post_url)
if image_description and not is_muted: if image_description and not is_muted:
gallery_str += \ gallery_str += \
' <a href="' + image_post_url + \ ' <a href="' + image_post_url + \
@ -1632,6 +1637,7 @@ def get_post_attachments_as_html(base_dir: str,
video_post_url = post_json_object['object']['url'] video_post_url = post_json_object['object']['url']
else: else:
video_post_url = post_json_object['object']['id'] video_post_url = post_json_object['object']['id']
video_post_url = remove_html(video_post_url)
if image_description and not is_muted: if image_description and not is_muted:
gallery_str += \ gallery_str += \
' <a href="' + video_post_url + \ ' <a href="' + video_post_url + \
@ -1709,6 +1715,7 @@ def get_post_attachments_as_html(base_dir: str,
audio_post_url = post_json_object['object']['url'] audio_post_url = post_json_object['object']['url']
else: else:
audio_post_url = post_json_object['object']['id'] audio_post_url = post_json_object['object']['id']
audio_post_url = remove_html(audio_post_url)
if image_description and not is_muted: if image_description and not is_muted:
gallery_str += \ gallery_str += \
' <a href="' + audio_post_url + \ ' <a href="' + audio_post_url + \

View File

@ -12,6 +12,7 @@ import urllib.parse
from session import get_json from session import get_json
from cache import store_webfinger_in_cache from cache import store_webfinger_in_cache
from cache import get_webfinger_from_cache from cache import get_webfinger_from_cache
from utils import remove_html
from utils import acct_handle_dir from utils import acct_handle_dir
from utils import get_attachment_property_value from utils import get_attachment_property_value
from utils import get_full_domain from utils import get_full_domain
@ -425,7 +426,7 @@ def _webfinger_update_avatar(wf_json: {}, actor_json: {}) -> bool:
"""Updates the avatar image link """Updates the avatar image link
""" """
found = False found = False
avatar_url = actor_json['icon']['url'] avatar_url = remove_html(actor_json['icon']['url'])
media_type = actor_json['icon']['mediaType'] media_type = actor_json['icon']['mediaType']
for link in wf_json['links']: for link in wf_json['links']:
if not link.get('rel'): if not link.get('rel'):
@ -455,8 +456,9 @@ def _webfinger_update_vcard(wf_json: {}, actor_json: {}) -> bool:
if link.get('type'): if link.get('type'):
if link['type'] == 'text/vcard': if link['type'] == 'text/vcard':
return False return False
actor_url = remove_html(actor_json['url'])
wf_json['links'].append({ wf_json['links'].append({
"href": actor_json['url'], "href": actor_url,
"rel": "http://webfinger.net/rel/profile-page", "rel": "http://webfinger.net/rel/profile-page",
"type": "text/vcard" "type": "text/vcard"
}) })