From 775bb198b589c6e80678ca5dc4115ba7a0d0d1b8 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 26 Dec 2024 14:26:20 +0000 Subject: [PATCH] Handle extra case for image attachments --- webapp_utils.py | 67 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 54 insertions(+), 13 deletions(-) diff --git a/webapp_utils.py b/webapp_utils.py index 43b929695..a8f9d6a68 100644 --- a/webapp_utils.py +++ b/webapp_utils.py @@ -13,6 +13,7 @@ from collections import OrderedDict from session import get_json from session import get_json_valid from flags import is_float +from utils import media_file_mime_type from utils import replace_strings from utils import get_image_file from utils import data_dir @@ -1314,38 +1315,44 @@ def get_post_attachments_as_html(base_dir: str, # https://codeberg.org/fediverse/fep/src/branch/main/fep/1970/fep-1970.md attached_urls: list[str] = [] for attach in attachment_dict: + url = None + if attach.get('url'): + url = get_url_from_post(attach['url']) + elif attach.get('href'): + url = attach['href'] + if not attach.get('type') or \ not attach.get('name') or \ - not attach.get('href') or \ + not url or \ not attach.get('rel'): continue if not isinstance(attach['type'], str) or \ not isinstance(attach['name'], str) or \ - not isinstance(attach['href'], str) or \ + not isinstance(url, str) or \ not isinstance(attach['rel'], str): continue if attach['type'] != 'Link' or \ attach['name'] != 'Chat' or \ attach['rel'] != 'discussion' or \ '://' not in attach['href'] or \ - '.' not in attach['href']: + '.' not in url: continue # get the domain for the chat link chat_domain_str = '' - attach_url = remove_html(attach['href']) + attach_url = remove_html(url) if attach_url in attached_urls: continue attached_urls.append(attach_url) chat_domain, _ = get_domain_from_actor(attach_url) if chat_domain: if local_network_host(chat_domain): - print('REJECT: local network chat link ' + attach['href']) + print('REJECT: local network chat link ' + url) continue chat_domain_str = ' (' + chat_domain + ')' # avoid displaying very long domains if len(chat_domain_str) > 50: chat_domain_str = '' - chat_url = remove_html(attach['href']) + chat_url = remove_html(url) attachment_str += \ '

' + \ @@ -1378,8 +1385,45 @@ def get_post_attachments_as_html(base_dir: str, transcripts[name] = remove_html(url) for attach in attachment_dict: - if not (attach.get('mediaType') and attach.get('url')): + # get the image/video/audio url + url = None + if attach.get('url'): + url = get_url_from_post(attach['url']) + elif attach.get('href'): + url = attach['href'] + + if not url: + # this is not an image/video/audio attachment continue + if not isinstance(url, str): + continue + + # get the media type + media_type = None + if attach.get('mediaType'): + media_type = attach['mediaType'] + else: + # See https://data.funfedi.dev/0.1.12/mastodon__v4.3.2/ + # image_attachments/#example-13 + if url and attach.get('type'): + if attach['type'] == 'Image': + if attach.get('url'): + if isinstance(attach['url'], dict): + if attach['url'].get('mediaType'): + media_type = attach['url']['mediaType'] + if not media_type: + url_ending = url + if '/' in url: + url_ending = url.split('/')[-1] + if '.' in url_ending: + media_type = media_file_mime_type(url) + + if not media_type: + # this is not an image/video/audio attachment + continue + if not isinstance(media_type, str): + continue + media_license = '' if attach.get('schema:license'): if not dangerous_markup(attach['schema:license'], False, []): @@ -1418,14 +1462,12 @@ def get_post_attachments_as_html(base_dir: str, attrib_str, system_language): media_creator = attrib_str - media_type = attach['mediaType'] image_description = '' if attach.get('name'): image_description = attach['name'].replace('"', "'") image_description = remove_html(image_description) if _is_image_mime_type(media_type): - url_str = get_url_from_post(attach['url']) - image_url = remove_html(url_str) + image_url = remove_html(url) if image_url in attached_urls: continue attached_urls.append(image_url) @@ -1572,7 +1614,7 @@ def get_post_attachments_as_html(base_dir: str, attachment_ctr += 1 elif _is_video_mime_type(media_type): - video_url = remove_html(attach['url']) + video_url = remove_html(url) if video_url in attached_urls: continue attached_urls.append(video_url) @@ -1659,8 +1701,7 @@ def get_post_attachments_as_html(base_dir: str, attachment_ctr += 1 elif _is_audio_mime_type(media_type): extension = '.mp3' - url_str = get_url_from_post(attach['url']) - audio_url = remove_html(url_str) + audio_url = remove_html(url) if audio_url in attached_urls: continue attached_urls.append(audio_url)