From 080b2ca352ad2e58c7daa06ba19a008abc257bb1 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 9 Dec 2023 14:18:24 +0000 Subject: [PATCH] Handle situations where urls are lists --- blog.py | 7 +++++-- bookmarks.py | 7 +++++-- content.py | 11 ++++++++--- daemon.py | 21 ++++++++++++++++++--- desktop_client.py | 4 +++- inbox.py | 21 +++++++++++++-------- maps.py | 4 +++- mastoapiv1.py | 13 +++++++++---- mastoapiv2.py | 7 +++++-- newswire.py | 10 +++++++--- person.py | 21 ++++++++++++++------- pgp.py | 17 +++++++++++------ posts.py | 31 +++++++++++++++++-------------- tests.py | 16 +++++++++------- utils.py | 29 +++++++++++++++++++++++++++-- video.py | 8 +++++--- webapp_moderation.py | 4 +++- webapp_podcast.py | 16 +++++++++++----- webapp_post.py | 10 +++++++--- webapp_profile.py | 14 +++++++++----- webapp_search.py | 11 ++++++++--- webapp_utils.py | 24 ++++++++++++++++-------- webfinger.py | 7 +++++-- 23 files changed, 218 insertions(+), 95 deletions(-) diff --git a/blog.py b/blog.py index 63e98488f..065be2805 100644 --- a/blog.py +++ b/blog.py @@ -16,6 +16,7 @@ from webapp_utils import html_footer from webapp_utils import get_post_attachments_as_html from webapp_utils import edit_text_area from webapp_media import add_embedded_elements +from utils import get_url_from_post from utils import date_from_string_format from utils import get_attributed_to from utils import remove_eol @@ -314,7 +315,8 @@ def _html_blog_post_content(debug: bool, session, authorized: bool, continue if not tag_json.get('url'): continue - citation_url = remove_html(tag_json['url']) + url_str = get_url_from_post(tag_json['url']) + citation_url = remove_html(url_str) citation_name = remove_html(tag_json['name']) citations_str += \ '
  • ' + \ @@ -482,7 +484,8 @@ def html_blog_post(session, authorized: bool, title = post_json_object['object']['summary'] url = '' if post_json_object['object'].get('url'): - url = remove_html(post_json_object['object']['url']) + url_str = get_url_from_post(post_json_object['object']['url']) + url = remove_html(url_str) snippet = _get_snippet_from_blog_content(post_json_object, system_language) blog_str = html_header_with_blog_markup(css_filename, instance_title, diff --git a/bookmarks.py b/bookmarks.py index f786a6a21..f514451e8 100644 --- a/bookmarks.py +++ b/bookmarks.py @@ -11,6 +11,7 @@ import os from pprint import pprint from webfinger import webfinger_handle from auth import create_basic_auth_header +from utils import get_url_from_post from utils import remove_domain_port from utils import has_users_path from utils import get_full_domain @@ -603,7 +604,8 @@ def outbox_bookmark(recent_posts_cache: {}, if debug: print('DEBUG: c2s bookmark Add request arrived in outbox') - message_url = remove_id_ending(message_json['object']['url']) + url_str = get_url_from_post(message_json['object']['url']) + message_url = remove_id_ending(url_str) message_url = remove_html(message_url) domain = remove_domain_port(domain) post_filename = locate_post(base_dir, nickname, domain, message_url) @@ -661,7 +663,8 @@ def outbox_undo_bookmark(recent_posts_cache: {}, if debug: print('DEBUG: c2s unbookmark Remove request arrived in outbox') - message_url = remove_id_ending(message_json['object']['url']) + url_str = get_url_from_post(message_json['object']['url']) + message_url = remove_id_ending(url_str) message_url = remove_html(message_url) domain = remove_domain_port(domain) post_filename = locate_post(base_dir, nickname, domain, message_url) diff --git a/content.py b/content.py index 68f5819b2..effe0e36d 100644 --- a/content.py +++ b/content.py @@ -15,6 +15,7 @@ import email.parser import urllib.parse from shutil import copyfile from dateutil.parser import parse +from utils import get_url_from_post from utils import is_right_to_left_text from utils import language_right_to_left from utils import binary_is_image @@ -446,11 +447,14 @@ def replace_emoji_from_tags(session, base_dir: str, continue if not tag_item['icon'].get('url'): continue - if '/' not in tag_item['icon']['url']: + url_str = get_url_from_post(tag_item['icon']['url']) + if '/' not in url_str: continue if tag_item['name'] not in content: continue - tag_url = remove_html(tag_item['icon']['url']) + tag_url = remove_html(url_str) + if not tag_url: + continue icon_name = tag_url.split('/')[-1] if icon_name: if len(icon_name) > 1: @@ -532,7 +536,8 @@ def replace_emoji_from_tags(session, base_dir: str, emoji_tag_name = tag_item['name'].replace(':', '') else: emoji_tag_name = '' - tag_url = remove_html(tag_item['icon']['url']) + url_str = get_url_from_post(tag_item['icon']['url']) + tag_url = remove_html(url_str) emoji_html = "\""" diff --git a/daemon.py b/daemon.py index 7d0706ba0..43ba56023 100644 --- a/daemon.py +++ b/daemon.py @@ -300,6 +300,7 @@ from languages import set_actor_languages from languages import get_understood_languages from like import update_likes_collection from reaction import update_reaction_collection +from utils import get_url_from_post from utils import date_from_string_format from utils import corp_servers from utils import get_attributed_to @@ -2209,9 +2210,19 @@ class PubServer(BaseHTTPRequestHandler): if has_object_dict(message_json): if debug: print('INBOX: checking object fields') + # check that url is a string or list + if message_json['object'].get('url'): + if not isinstance(message_json['object']['url'], str) and \ + not isinstance(message_json['object']['url'], list): + print('INBOX: url should be a string or list ' + + str(message_json['object']['url'])) + self._400() + self.server.postreq_busy = False + return 3 + # check that some fields are strings string_fields = ( 'id', 'actor', 'type', 'content', 'published', - 'summary', 'url' + 'summary' ) for check_field in string_fields: if not message_json['object'].get(check_field): @@ -6696,7 +6707,9 @@ class PubServer(BaseHTTPRequestHandler): for m_type, last_part in uploads: rep_str = '/' + last_part if m_type == 'avatar': - actor_url = remove_html(actor_json['icon']['url']) + url_str = \ + get_url_from_post(actor_json['icon']['url']) + actor_url = remove_html(url_str) last_part_of_url = actor_url.split('/')[-1] srch_str = '/' + last_part_of_url actor_url = actor_url.replace(srch_str, rep_str) @@ -6709,8 +6722,10 @@ class PubServer(BaseHTTPRequestHandler): actor_json['icon']['mediaType'] = \ 'image/' + img_ext elif m_type == 'image': + url_str = \ + get_url_from_post(actor_json['image']['url']) im_url = \ - remove_html(actor_json['image']['url']) + remove_html(url_str) last_part_of_url = im_url.split('/')[-1] srch_str = '/' + last_part_of_url actor_json['image']['url'] = \ diff --git a/desktop_client.py b/desktop_client.py index 37805df4a..1d789badd 100644 --- a/desktop_client.py +++ b/desktop_client.py @@ -16,6 +16,7 @@ import webbrowser import urllib.parse from pathlib import Path from random import randint +from utils import get_url_from_post from utils import get_actor_languages_list from utils import get_attributed_to from utils import remove_html @@ -761,7 +762,8 @@ def _show_replies_on_post(post_json_object: {}, max_replies: int) -> None: print('') ctr = 0 for item in object_replies['items']: - item_url = remove_html(item['url']) + url_str = get_url_from_post(item['url']) + item_url = remove_html(url_str) print(' ↰ ' + str(item_url)) ctr += 1 if ctr >= max_replies: diff --git a/inbox.py b/inbox.py index 2491a0d0e..24de733da 100644 --- a/inbox.py +++ b/inbox.py @@ -18,6 +18,7 @@ from languages import understood_post_language from like import update_likes_collection from reaction import update_reaction_collection from reaction import valid_emoji_content +from utils import get_url_from_post from utils import date_from_string_format from utils import date_epoch from utils import date_utcnow @@ -192,9 +193,10 @@ def cache_svg_images(session, base_dir: str, http_prefix: str, continue if not attach.get('url'): continue - if attach['url'].endswith('.svg') or \ + url_str = get_url_from_post(attach['url']) + if url_str.endswith('.svg') or \ 'svg' in attach['mediaType']: - url = remove_html(attach['url']) + url = remove_html(url_str) if not url_permitted(url, federation_list): continue # if this is a local image then it has already been @@ -1209,7 +1211,8 @@ def _person_receive_update(base_dir: str, debug: bool, http_prefix: str) -> bool: """Changes an actor. eg: avatar or display name change """ - person_url = remove_html(person_json['url']) + url_str = get_url_from_post(person_json['url']) + person_url = remove_html(url_str) if debug: print('Receiving actor update for ' + person_url + ' ' + str(person_json)) @@ -1901,7 +1904,7 @@ def _receive_update_activity(recent_posts_cache: {}, session, base_dir: str, print('Person Update: ' + str(message_json)) if debug: print('DEBUG: Profile update was received for ' + - message_json['object']['url']) + str(message_json['object']['url'])) return True return False @@ -2714,14 +2717,15 @@ def _receive_bookmark(recent_posts_cache: {}, if debug: print('DEBUG: inbox bookmark Add missing url') return False - if '/statuses/' not in message_json['object']['url']: + url_str = get_url_from_post(message_json['object']['url']) + if '/statuses/' not in url_str: if debug: print('DEBUG: inbox bookmark Add missing statuses un url') return False if debug: print('DEBUG: c2s inbox bookmark Add request arrived in outbox') - message_url2 = remove_html(message_json['object']['url']) + message_url2 = remove_html(url_str) message_url = remove_id_ending(message_url2) domain = remove_domain_port(domain) post_filename = locate_post(base_dir, nickname, domain, message_url) @@ -2840,7 +2844,8 @@ def _receive_undo_bookmark(recent_posts_cache: {}, if debug: print('DEBUG: inbox undo bookmark Remove missing url') return False - if '/statuses/' not in message_json['object']['url']: + url_str = get_url_from_post(message_json['object']['url']) + if '/statuses/' not in url_str: if debug: print('DEBUG: inbox undo bookmark Remove missing statuses un url') return False @@ -2848,7 +2853,7 @@ def _receive_undo_bookmark(recent_posts_cache: {}, print('DEBUG: c2s inbox Remove bookmark ' + 'request arrived in outbox') - message_url2 = remove_html(message_json['object']['url']) + message_url2 = remove_html(url_str) message_url = remove_id_ending(message_url2) domain = remove_domain_port(domain) post_filename = locate_post(base_dir, nickname, domain, message_url) diff --git a/maps.py b/maps.py index 0ca78041a..edc02c339 100644 --- a/maps.py +++ b/maps.py @@ -9,6 +9,7 @@ __module_group__ = "Core" import os +from utils import get_url_from_post from utils import is_float from utils import acct_dir from utils import load_json @@ -400,7 +401,8 @@ def get_map_preferences_url(base_dir: str, nickname: str, domain: str) -> str: if os.path.isfile(maps_filename): maps_json = load_json(maps_filename) if maps_json.get('url'): - return remove_html(maps_json['url']) + url_str = get_url_from_post(maps_json['url']) + return remove_html(url_str) return None diff --git a/mastoapiv1.py b/mastoapiv1.py index ab8e8baf1..6da3d1682 100644 --- a/mastoapiv1.py +++ b/mastoapiv1.py @@ -8,6 +8,7 @@ __status__ = "Production" __module_group__ = "API" import os +from utils import get_url_from_post from utils import load_json from utils import get_config_param from utils import acct_dir @@ -79,8 +80,10 @@ def _meta_data_instance_v1(show_accounts: bool, if admin_actor.get('published'): created_at = admin_actor['published'] - icon_url = remove_html(admin_actor['icon']['url']) - image_url = remove_html(admin_actor['image']['url']) + url_str = get_url_from_post(admin_actor['icon']['url']) + icon_url = remove_html(url_str) + url_str = get_url_from_post(admin_actor['image']['url']) + image_url = remove_html(url_str) instance = { 'approval_required': False, 'invites_enabled': False, @@ -205,8 +208,10 @@ def _get_masto_api_v1account(base_dir: str, nickname: str, domain: str, account_json = load_json(account_filename) if not account_json: return {} - avatar_url = remove_html(account_json['icon']['url']) - image_url = remove_html(account_json['image']['url']) + url_str = get_url_from_post(account_json['icon']['url']) + avatar_url = remove_html(url_str) + url_str = get_url_from_post(account_json['image']['url']) + image_url = remove_html(url_str) joined_date = "2016-10-05T10:30:00Z" if account_json.get('published'): joined_date = account_json['published'] diff --git a/mastoapiv2.py b/mastoapiv2.py index 07ee2663c..06c8212ec 100644 --- a/mastoapiv2.py +++ b/mastoapiv2.py @@ -8,6 +8,7 @@ __status__ = "Production" __module_group__ = "API" import os +from utils import get_url_from_post from utils import load_json from utils import get_config_param from utils import acct_dir @@ -85,8 +86,10 @@ def _meta_data_instance_v2(show_accounts: bool, if admin_actor.get('published'): created_at = admin_actor['published'] - icon_url = remove_html(admin_actor['icon']['url']) - image_url = remove_html(admin_actor['image']['url']) + url_str = get_url_from_post(admin_actor['icon']['url']) + icon_url = remove_html(url_str) + url_str = get_url_from_post(admin_actor['image']['url']) + image_url = remove_html(url_str) thumbnail_url = http_prefix + '://' + domain_full + '/login.png' admin_email = None noindex = True diff --git a/newswire.py b/newswire.py index fbffdcc53..31b8ff4e8 100644 --- a/newswire.py +++ b/newswire.py @@ -19,6 +19,7 @@ from datetime import timezone from collections import OrderedDict from utils import valid_post_date from categories import set_hashtag_category +from utils import get_url_from_post from utils import remove_zero_length_strings from utils import date_from_string_format from utils import acct_handle_dir @@ -1143,7 +1144,8 @@ def _json_feed_v1to_dict(base_dir: str, domain: str, xml_str: str, continue if not json_feed_item.get('url'): continue - if not isinstance(json_feed_item['url'], str): + url_str = get_url_from_post(json_feed_item['url']) + if not url_str: continue if not json_feed_item.get('date_published'): if not json_feed_item.get('date_modified'): @@ -1182,7 +1184,7 @@ def _json_feed_v1to_dict(base_dir: str, domain: str, xml_str: str, if tag_name not in description: description += ' ' + tag_name - link = remove_html(json_feed_item['url']) + link = remove_html(url_str) if '://' not in link: continue if len(link) > max_bytes: @@ -1602,7 +1604,9 @@ def _add_account_blogs_to_newswire(base_dir: str, nickname: str, domain: str, description = remove_html(description) tags_from_post = _get_hashtags_from_post(post_json_object) summary = post_json_object['object']['summary'] - url2 = remove_html(post_json_object['object']['url']) + url_str = \ + get_url_from_post(post_json_object['object']['url']) + url2 = remove_html(url_str) _add_newswire_dict_entry(base_dir, domain, newswire, published, summary, url2, diff --git a/person.py b/person.py index 29ca9c9da..95e66adb4 100644 --- a/person.py +++ b/person.py @@ -37,6 +37,7 @@ from roles import set_role from roles import actor_roles_from_list from roles import get_actor_roles_list from media import process_meta_data +from utils import get_url_from_post from utils import date_utcnow from utils import get_memorials from utils import is_account_dir @@ -200,7 +201,8 @@ def randomize_actor_images(person_json: {}) -> None: This causes other instances to update their cached avatar image """ person_id = person_json['id'] - last_part_of_filename = person_json['icon']['url'].split('/')[-1] + url_str = get_url_from_post(person_json['icon']['url']) + last_part_of_filename = url_str.split('/')[-1] existing_extension = last_part_of_filename.split('.')[1] # NOTE: these files don't need to have cryptographically # secure names @@ -210,7 +212,8 @@ def randomize_actor_images(person_json: {}) -> None: person_json['icon']['url'] = \ base_url + '/system/accounts/avatars/' + nickname + \ '/avatar' + rand_str + '.' + existing_extension - last_part_of_filename = person_json['image']['url'].split('/')[-1] + url_str = get_url_from_post(person_json['image']['url']) + last_part_of_filename = url_str.split('/')[-1] existing_extension = last_part_of_filename.split('.')[1] rand_str = str(randint(10000000000000, 99999999999999)) # nosec person_json['image']['url'] = \ @@ -229,6 +232,9 @@ def get_actor_update_json(actor_json: {}) -> {}: indexable = False if actor_json.get('indexable'): indexable = True + actor_url = get_url_from_post(actor_json['url']) + icon_url = get_url_from_post(actor_json['icon']['url']) + image_url = get_url_from_post(actor_json['image']['url']) return { '@context': [ "https://www.w3.org/ns/activitystreams", @@ -311,11 +317,11 @@ def get_actor_update_json(actor_json: {}) -> {}: 'type': actor_json['type'], 'icon': { 'type': 'Image', - 'url': actor_json['icon']['url'] + 'url': icon_url }, 'image': { 'type': 'Image', - 'url': actor_json['image']['url'] + 'url': image_url }, 'attachment': actor_json['attachment'], 'following': actor_json['id'] + '/following', @@ -327,7 +333,7 @@ def get_actor_update_json(actor_json: {}) -> {}: 'preferredUsername': actor_json['preferredUsername'], 'name': actor_json['name'], 'summary': actor_json['summary'], - 'url': actor_json['url'], + 'url': actor_url, 'manuallyApprovesFollowers': manually_approves_followers, 'discoverable': actor_json['discoverable'], 'memorial': memorial, @@ -1844,8 +1850,9 @@ def get_person_avatar_url(base_dir: str, person_url: str, if person_json.get('icon'): if person_json['icon'].get('url'): - if '.svg' not in person_json['icon']['url'].lower(): - return remove_html(person_json['icon']['url']) + url_str = get_url_from_post(person_json['icon']['url']) + if '.svg' not in url_str.lower(): + return remove_html(url_str) return None diff --git a/pgp.py b/pgp.py index b4a6a522f..5955b2cd6 100644 --- a/pgp.py +++ b/pgp.py @@ -12,6 +12,7 @@ import base64 import subprocess from pathlib import Path from person import get_actor_json +from utils import get_url_from_post from utils import safe_system_string from utils import contains_pgp_public_key from utils import is_pgp_encrypted @@ -712,18 +713,20 @@ def pgp_public_key_upload(base_dir: str, session, def actor_to_vcard(actor: {}, domain: str) -> str: """Returns a vcard for a given actor """ + actor_url_str = get_url_from_post(actor['url']) vcard_str = 'BEGIN:VCARD\n' vcard_str += 'VERSION:4.0\n' vcard_str += 'REV:' + actor['published'] + '\n' vcard_str += 'FN:' + remove_html(actor['name']) + '\n' vcard_str += 'NICKNAME:' + actor['preferredUsername'] + '\n' - vcard_str += 'URL;TYPE=profile:' + actor['url'] + '\n' + vcard_str += 'URL;TYPE=profile:' + actor_url_str + '\n' blog_address = get_blog_address(actor) if blog_address: vcard_str += 'URL;TYPE=blog:' + blog_address + '\n' vcard_str += 'NOTE:' + remove_html(actor['summary']) + '\n' - if actor['icon']['url']: - vcard_str += 'PHOTO:' + actor['icon']['url'] + '\n' + url_str = get_url_from_post(actor['icon']['url']) + if url_str: + vcard_str += 'PHOTO:' + url_str + '\n' pgp_key = get_pgp_pub_key(actor) if pgp_key: vcard_str += 'KEY:data:application/pgp-keys;base64,' + \ @@ -801,18 +804,20 @@ def actor_to_vcard_xml(actor: {}, domain: str) -> str: vcard_str += ' ' + \ 'cwtch' + \ '' + cwtch_address + '\n' + url_str = get_url_from_post(actor['url']) vcard_str += ' ' + \ 'profile' + \ - '' + actor['url'] + '\n' + '' + url_str + '\n' blog_address = get_blog_address(actor) if blog_address: vcard_str += ' ' + \ 'blog' + \ '' + blog_address + '\n' vcard_str += ' ' + actor['published'] + '\n' - if actor['icon']['url']: + url_str = get_url_from_post(actor['icon']['url']) + if url_str: vcard_str += \ - ' ' + actor['icon']['url'] + '\n' + ' ' + url_str + '\n' pgp_key = get_pgp_pub_key(actor) if pgp_key: pgp_key_encoded = \ diff --git a/posts.py b/posts.py index 9a992f86e..461fa8632 100644 --- a/posts.py +++ b/posts.py @@ -34,6 +34,7 @@ from webfinger import webfinger_handle from httpsig import create_signed_header from siteactive import site_is_active from languages import understood_post_language +from utils import get_url_from_post from utils import date_from_string_format from utils import date_epoch from utils import date_utcnow @@ -406,7 +407,8 @@ def get_person_box(signing_priv_key_pem: str, origin_domain: str, avatar_url = None if person_json.get('icon'): if person_json['icon'].get('url'): - avatar_url = remove_html(person_json['icon']['url']) + url_str = get_url_from_post(person_json['icon']['url']) + avatar_url = remove_html(url_str) display_name = None possible_display_name = None if person_json.get('name'): @@ -652,16 +654,16 @@ def _get_posts(session, outbox_url: str, max_posts: int, if tag_item.get('name') and tag_item.get('icon'): if tag_item['icon'].get('url'): # No emoji from non-permitted domains - if url_permitted(tag_item['icon']['url'], + url_str = \ + get_url_from_post(tag_item['icon']['url']) + if url_permitted(url_str, federation_list): emoji_name = tag_item['name'] - emoji_icon = \ - remove_html(tag_item['icon']['url']) + emoji_icon = remove_html(url_str) emoji[emoji_name] = emoji_icon else: if debug: - print('url not permitted ' + - tag_item['icon']['url']) + print('url not permitted ' + url_str) if tag_type == 'mention': if tag_item.get('name'): if tag_item['name'] not in mentions: @@ -703,15 +705,15 @@ def _get_posts(session, outbox_url: str, max_posts: int, for attach in this_item['attachment']: if attach.get('name') and attach.get('url'): # no attachments from non-permitted domains - attach_url = remove_html(attach['url']) + url_str = get_url_from_post(attach['url']) + attach_url = remove_html(url_str) if url_permitted(attach_url, federation_list): attachment.append([attach['name'], attach_url]) else: if debug: - print('url not permitted ' + - attach['url']) + print('url not permitted ' + url_str) sensitive = False if this_item.get('sensitive'): @@ -906,9 +908,9 @@ def _get_posts_for_blocked_domains(base_dir: str, continue if is_blocked_domain(base_dir, post_domain): if item['object'].get('url'): - url = item['object']['url'] + url = get_url_from_post(item['object']['url']) else: - url = item['object']['id'] + url = get_url_from_post(item['object']['id']) url = remove_html(url) if not blocked_posts.get(post_domain): blocked_posts[post_domain] = [url] @@ -929,9 +931,9 @@ def _get_posts_for_blocked_domains(base_dir: str, continue if is_blocked_domain(base_dir, post_domain): if item['object'].get('url'): - url = item['object']['url'] + url = get_url_from_post(item['object']['url']) else: - url = item['object']['id'] + url = get_url_from_post(item['object']['id']) url = remove_html(url) if not blocked_posts.get(post_domain): blocked_posts[post_domain] = [url] @@ -2206,7 +2208,8 @@ def create_blog_post(base_dir: str, low_bandwidth, content_license_url, media_license_url, media_creator, languages_understood, translate, buy_url, chat_url) - obj_url = remove_html(blog_json['object']['url']) + url_str = get_url_from_post(blog_json['object']['url']) + obj_url = remove_html(url_str) if '/@/' not in obj_url: blog_json['object']['url'] = obj_url.replace('/@', '/users/') _append_citations_to_blog_post(base_dir, nickname, domain, blog_json) diff --git a/tests.py b/tests.py index fb6161f11..90581b2f6 100644 --- a/tests.py +++ b/tests.py @@ -56,6 +56,7 @@ from follow import clear_followers from follow import send_follow_request_via_server from follow import send_unfollow_request_via_server from siteactive import site_is_active +from utils import get_url_from_post from utils import date_from_string_format from utils import date_utcnow from utils import is_right_to_left_text @@ -1470,10 +1471,11 @@ def test_post_message_between_servers(base_dir: str) -> None: assert attached.get('type') assert attached.get('url') assert attached['mediaType'] == 'image/png' - if '/system/media_attachments/files/' not in attached['url']: - print(attached['url']) - assert '/system/media_attachments/files/' in attached['url'] - assert attached['url'].endswith('.png') + url_str = get_url_from_post(attached['url']) + if '/system/media_attachments/files/' not in url_str: + print(str(attached['url'])) + assert '/system/media_attachments/files/' in url_str + assert url_str.endswith('.png') assert attached.get('width') assert attached.get('height') assert attached['width'] > 0 @@ -4255,7 +4257,7 @@ def _test_danger_svg(base_dir: str) -> None: federation_list, debug, svg_image_filename) - url = post_json_object['object']['attachment'][0]['url'] + url = get_url_from_post(post_json_object['object']['attachment'][0]['url']) assert url == 'https://ratsratsrats.live/media/1234_wibble.svg' with open(svg_image_filename, 'rb') as fp_svg: @@ -7317,8 +7319,8 @@ def _test_xml_podcast_dict(base_dir: str) -> None: assert podcast_properties.get('funding') assert int(podcast_properties['episode']) == 5 assert podcast_properties['funding']['text'] == "Support the show" - assert podcast_properties['funding']['url'] == \ - "https://whoframed.rodger/donate" + url_str = get_url_from_post(podcast_properties['funding']['url']) + assert url_str == "https://whoframed.rodger/donate" assert len(podcast_properties['transcripts']) == 3 assert len(podcast_properties['valueRecipients']) == 2 assert len(podcast_properties['persons']) == 5 diff --git a/utils.py b/utils.py index 705168ab5..9785a1b00 100644 --- a/utils.py +++ b/utils.py @@ -110,6 +110,31 @@ def date_epoch(): return date_from_numbers(1970, 1, 1, 0, 0) +def get_url_from_post(url_field) -> str: + """Returns a url from a post object + """ + if isinstance(url_field, str): + return url_field + if isinstance(url_field, list): + for url_dict in url_field: + if not isinstance(url_dict, dict): + continue + if 'href' not in url_dict: + continue + if 'mediaType' not in url_dict: + continue + if not isinstance(url_dict['href'], str): + continue + if not isinstance(url_dict['mediaType'], str): + continue + if url_dict['mediaType'] != 'text/html': + continue + if '://' not in url_dict['href']: + continue + return url_dict['href'] + return '' + + def get_attributed_to(field) -> str: """Returns the actor """ @@ -404,7 +429,7 @@ def get_media_descriptions_from_post(post_json_object: {}) -> str: continue descriptions += attach['name'] + ' ' if attach.get('url'): - descriptions += attach['url'] + ' ' + descriptions += get_url_from_post(attach['url']) + ' ' return descriptions.strip() @@ -2056,7 +2081,7 @@ def _remove_attachment(base_dir: str, http_prefix: str, domain: str, return if not post_json['attachment'][0].get('url'): return - attachment_url = post_json['attachment'][0]['url'] + attachment_url = get_url_from_post(post_json['attachment'][0]['url']) if not attachment_url: return attachment_url = remove_html(attachment_url) diff --git a/video.py b/video.py index 38edfdb7a..e2814e0ff 100644 --- a/video.py +++ b/video.py @@ -7,6 +7,7 @@ __email__ = "bob@libreserver.org" __status__ = "Production" __module_group__ = "Timeline" +from utils import get_url_from_post from utils import remove_html from utils import get_full_domain from utils import get_nickname_from_actor @@ -204,9 +205,10 @@ def convert_video_to_note(base_dir: str, nickname: str, domain: str, continue if not lang.get('url'): continue - if not isinstance(lang['url'], str): + url_str = get_url_from_post(lang['url']) + if not url_str: continue - if not lang['url'].endswith('.vtt'): + if not url_str.endswith('.vtt'): continue for understood in languages_understood: if understood in lang['identifier']: @@ -214,7 +216,7 @@ def convert_video_to_note(base_dir: str, nickname: str, domain: str, "type": "Document", "name": understood, "mediaType": "text/vtt", - "url": lang['url'] + "url": url_str }) break diff --git a/webapp_moderation.py b/webapp_moderation.py index 102285150..3fb7a3eed 100644 --- a/webapp_moderation.py +++ b/webapp_moderation.py @@ -8,6 +8,7 @@ __status__ = "Production" __module_group__ = "Moderation" import os +from utils import get_url_from_post from utils import remove_html from utils import is_artist from utils import is_account_dir @@ -388,7 +389,8 @@ def html_moderation_info(translate: {}, base_dir: str, ext = '' if actor_json.get('icon'): if actor_json['icon'].get('url'): - avatar_url = remove_html(actor_json['icon']['url']) + url_str = get_url_from_post(actor_json['icon']['url']) + avatar_url = remove_html(url_str) if '.' in avatar_url: ext = '.' + avatar_url.split('.')[-1] acct_url = \ diff --git a/webapp_podcast.py b/webapp_podcast.py index dbe932a4f..13ebe5c2e 100644 --- a/webapp_podcast.py +++ b/webapp_podcast.py @@ -12,6 +12,7 @@ import html import datetime import urllib.parse from shutil import copyfile +from utils import get_url_from_post from utils import get_config_param from utils import remove_html from media import path_is_audio @@ -39,7 +40,8 @@ def _html_podcast_chapters(link_url: str, if not isinstance(podcast_properties[key], dict): return '' if podcast_properties[key].get('url'): - chapters_url = remove_html(podcast_properties[key]['url']) + url_str = get_url_from_post(podcast_properties[key]['url']) + chapters_url = remove_html(url_str) elif podcast_properties[key].get('uri'): chapters_url = podcast_properties[key]['uri'] else: @@ -80,7 +82,8 @@ def _html_podcast_chapters(link_url: str, chapter_title = chapter['title'] chapter_url = '' if chapter.get('url'): - chapter_url = remove_html(chapter['url']) + url_str = get_url_from_post(chapter['url']) + chapter_url = remove_html(url_str) chapter_title = \ '' + \ chapter['title'] + '<\a>' @@ -122,7 +125,8 @@ def _html_podcast_transcripts(podcast_properties: {}, translate: {}) -> str: for _ in podcast_properties[key]: transcript_url = None if podcast_properties[key].get('url'): - transcript_url = remove_html(podcast_properties[key]['url']) + url_str = get_url_from_post(podcast_properties[key]['url']) + transcript_url = remove_html(url_str) elif podcast_properties[key].get('uri'): transcript_url = podcast_properties[key]['uri'] if not transcript_url: @@ -155,7 +159,8 @@ def _html_podcast_social_interactions(podcast_properties: {}, if podcast_properties[key].get('uri'): episode_post_url = podcast_properties[key]['uri'] elif podcast_properties[key].get('url'): - episode_post_url = remove_html(podcast_properties[key]['url']) + url_str = get_url_from_post(podcast_properties[key]['url']) + episode_post_url = remove_html(url_str) elif podcast_properties[key].get('text'): episode_post_url = podcast_properties[key]['text'] else: @@ -439,7 +444,8 @@ def html_podcast_episode(translate: {}, # donate button if podcast_properties.get('funding'): if podcast_properties['funding'].get('url'): - donate_url = remove_html(podcast_properties['funding']['url']) + url_str = get_url_from_post(podcast_properties['funding']['url']) + donate_url = remove_html(url_str) podcast_str += \ '