From bedb68f7055da33a5d867f6f927702d847477557 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 26 May 2022 11:26:19 +0100 Subject: [PATCH] Cache svg image attachments so that any scripts can be removed --- inbox.py | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/inbox.py b/inbox.py index c1e70a3f7..8128c3620 100644 --- a/inbox.py +++ b/inbox.py @@ -70,6 +70,7 @@ from categories import set_hashtag_category from httpsig import get_digest_algorithm_from_headers from httpsig import verify_post_headers from session import create_session +from session import download_image from follow import follower_approval_active from follow import is_following_actor from follow import get_followers_of_actor @@ -127,6 +128,80 @@ from webapp_hashtagswarm import html_hash_tag_swarm from person import valid_sending_actor from fitnessFunctions import fitness_performance from content import valid_url_lengths +from content import remove_script + + +def _cache_svg_images(session, base_dir: str, http_prefix: str, + nickname: str, domain: str, domain_full: str, + onion_domain: str, i2p_domain: str, + post_json_object: {}, + federation_list: [], debug: bool) -> bool: + """Creates a local copy of a remote svg file + """ + if has_object_dict(post_json_object): + obj = post_json_object['object'] + else: + obj = post_json_object + if not obj.get('id'): + return False + if not obj.get('attachment'): + return False + if not isinstance(obj['attachment'], list): + return False + cached = False + post_id = remove_id_ending(obj['id']).replace('/', '--') + for index in range(len(obj['attachment'])): + attach = obj['attachment'][index] + if not attach.get('mediaType'): + continue + if not attach.get('url'): + continue + if attach['url'].endswith('.svg') or \ + 'svg' in attach['mediaType']: + url = attach['url'] + if not url_permitted(url, federation_list): + continue + # if this is a local image then it has already been + # validated on upload + if '://' + domain in url: + continue + if '://' + onion_domain in url: + continue + if '://' + i2p_domain in url: + continue + filename = url.split('/')[-1] + image_filename = \ + base_dir + '/media/' + post_id + '_' + filename + if not download_image(session, base_dir, url, + image_filename, debug): + continue + image_data = None + try: + with open(image_filename, 'rb') as fp_svg: + image_data = fp_svg.read() + except OSError: + print('EX: unable to read svg file data') + if image_data: + image_data = image_data.decode() + cleaned_up = remove_script(image_data) + if cleaned_up != image_data: + svg_written = False + cleaned_up = cleaned_up.encode('utf-8') + try: + with open(image_filename, 'wb') as im_file: + im_file.write(cleaned_up) + svg_written = True + except OSError: + print('EX: unable to write cleaned up svg ' + url) + if svg_written: + # change the url to be the local version + obj['attachment'][index]['url'] = \ + http_prefix + '://' + domain_full + '/media/' + \ + post_id + '_' + filename + cached = True + else: + cached = True + return cached def _store_last_post_id(base_dir: str, nickname: str, domain: str, @@ -4020,6 +4095,15 @@ def _inbox_after_initial(server, inbox_start_time, fitness_performance(inbox_start_time, server.fitness, 'INBOX', '_obtain_avatar_for_reply_post', debug) + + # cache any svg image attachments locally + # This is so that any scripts can be removed + _cache_svg_images(session, base_dir, http_prefix, + nickname, domain, domain_full, + onion_domain, i2p_domain, + post_json_object, + federation_list, debug) + inbox_start_time = time.time() # save the post to file