From 080b2ca352ad2e58c7daa06ba19a008abc257bb1 Mon Sep 17 00:00:00 2001
From: Bob Mottram <bob@libreserver.org>
Date: Sat, 9 Dec 2023 14:18:24 +0000
Subject: [PATCH] Handle situations where urls are lists

---
 blog.py              |  7 +++++--
 bookmarks.py         |  7 +++++--
 content.py           | 11 ++++++++---
 daemon.py            | 21 ++++++++++++++++++---
 desktop_client.py    |  4 +++-
 inbox.py             | 21 +++++++++++++--------
 maps.py              |  4 +++-
 mastoapiv1.py        | 13 +++++++++----
 mastoapiv2.py        |  7 +++++--
 newswire.py          | 10 +++++++---
 person.py            | 21 ++++++++++++++-------
 pgp.py               | 17 +++++++++++------
 posts.py             | 31 +++++++++++++++++--------------
 tests.py             | 16 +++++++++-------
 utils.py             | 29 +++++++++++++++++++++++++++--
 video.py             |  8 +++++---
 webapp_moderation.py |  4 +++-
 webapp_podcast.py    | 16 +++++++++++-----
 webapp_post.py       | 10 +++++++---
 webapp_profile.py    | 14 +++++++++-----
 webapp_search.py     | 11 ++++++++---
 webapp_utils.py      | 24 ++++++++++++++++--------
 webfinger.py         |  7 +++++--
 23 files changed, 218 insertions(+), 95 deletions(-)
diff --git a/blog.py b/blog.py
index 63e98488f..065be2805 100644
--- a/blog.py
+++ b/blog.py
@@ -16,6 +16,7 @@ from webapp_utils import html_footer
 from webapp_utils import get_post_attachments_as_html
 from webapp_utils import edit_text_area
 from webapp_media import add_embedded_elements
+from utils import get_url_from_post
 from utils import date_from_string_format
 from utils import get_attributed_to
 from utils import remove_eol
@@ -314,7 +315,8 @@ def _html_blog_post_content(debug: bool, session, authorized: bool,
                 continue
             if not tag_json.get('url'):
                 continue
-            citation_url = remove_html(tag_json['url'])
+            url_str = get_url_from_post(tag_json['url'])
+            citation_url = remove_html(url_str)
             citation_name = remove_html(tag_json['name'])
             citations_str += \
                 '<li><a href="' + citation_url + '">' + \
@@ -482,7 +484,8 @@ def html_blog_post(session, authorized: bool,
     title = post_json_object['object']['summary']
     url = ''
     if post_json_object['object'].get('url'):
-        url = remove_html(post_json_object['object']['url'])
+        url_str = get_url_from_post(post_json_object['object']['url'])
+        url = remove_html(url_str)
     snippet = _get_snippet_from_blog_content(post_json_object,
                                              system_language)
     blog_str = html_header_with_blog_markup(css_filename, instance_title,
diff --git a/bookmarks.py b/bookmarks.py
index f786a6a21..f514451e8 100644
--- a/bookmarks.py
+++ b/bookmarks.py
@@ -11,6 +11,7 @@ import os
 from pprint import pprint
 from webfinger import webfinger_handle
 from auth import create_basic_auth_header
+from utils import get_url_from_post
 from utils import remove_domain_port
 from utils import has_users_path
 from utils import get_full_domain
@@ -603,7 +604,8 @@ def outbox_bookmark(recent_posts_cache: {},
     if debug:
         print('DEBUG: c2s bookmark Add request arrived in outbox')
 
-    message_url = remove_id_ending(message_json['object']['url'])
+    url_str = get_url_from_post(message_json['object']['url'])
+    message_url = remove_id_ending(url_str)
     message_url = remove_html(message_url)
     domain = remove_domain_port(domain)
     post_filename = locate_post(base_dir, nickname, domain, message_url)
@@ -661,7 +663,8 @@ def outbox_undo_bookmark(recent_posts_cache: {},
     if debug:
         print('DEBUG: c2s unbookmark Remove request arrived in outbox')
 
-    message_url = remove_id_ending(message_json['object']['url'])
+    url_str = get_url_from_post(message_json['object']['url'])
+    message_url = remove_id_ending(url_str)
     message_url = remove_html(message_url)
     domain = remove_domain_port(domain)
     post_filename = locate_post(base_dir, nickname, domain, message_url)
diff --git a/content.py b/content.py
index 68f5819b2..effe0e36d 100644
--- a/content.py
+++ b/content.py
@@ -15,6 +15,7 @@ import email.parser
 import urllib.parse
 from shutil import copyfile
 from dateutil.parser import parse
+from utils import get_url_from_post
 from utils import is_right_to_left_text
 from utils import language_right_to_left
 from utils import binary_is_image
@@ -446,11 +447,14 @@ def replace_emoji_from_tags(session, base_dir: str,
             continue
         if not tag_item['icon'].get('url'):
             continue
-        if '/' not in tag_item['icon']['url']:
+        url_str = get_url_from_post(tag_item['icon']['url'])
+        if '/' not in url_str:
             continue
         if tag_item['name'] not in content:
             continue
-        tag_url = remove_html(tag_item['icon']['url'])
+        tag_url = remove_html(url_str)
+        if not tag_url:
+            continue
         icon_name = tag_url.split('/')[-1]
         if icon_name:
             if len(icon_name) > 1:
@@ -532,7 +536,8 @@ def replace_emoji_from_tags(session, base_dir: str,
             emoji_tag_name = tag_item['name'].replace(':', '')
         else:
             emoji_tag_name = ''
-        tag_url = remove_html(tag_item['icon']['url'])
+        url_str = get_url_from_post(tag_item['icon']['url'])
+        tag_url = remove_html(url_str)
         emoji_html = "<img src=\"" + tag_url + "\" alt=\"" + \
             emoji_tag_name + \
             "\" align=\"middle\" class=\"" + html_class + "\"/>"
diff --git a/daemon.py b/daemon.py
index 7d0706ba0..43ba56023 100644
--- a/daemon.py
+++ b/daemon.py
@@ -300,6 +300,7 @@ from languages import set_actor_languages
 from languages import get_understood_languages
 from like import update_likes_collection
 from reaction import update_reaction_collection
+from utils import get_url_from_post
 from utils import date_from_string_format
 from utils import corp_servers
 from utils import get_attributed_to
@@ -2209,9 +2210,19 @@ class PubServer(BaseHTTPRequestHandler):
         if has_object_dict(message_json):
             if debug:
                 print('INBOX: checking object fields')
+            # check that url is a string or list
+            if message_json['object'].get('url'):
+                if not isinstance(message_json['object']['url'], str) and \
+                   not isinstance(message_json['object']['url'], list):
+                    print('INBOX: url should be a string or list ' +
+                          str(message_json['object']['url']))
+                    self._400()
+                    self.server.postreq_busy = False
+                    return 3
+            # check that some fields are strings
             string_fields = (
                 'id', 'actor', 'type', 'content', 'published',
-                'summary', 'url'
+                'summary'
             )
             for check_field in string_fields:
                 if not message_json['object'].get(check_field):
@@ -6696,7 +6707,9 @@ class PubServer(BaseHTTPRequestHandler):
                     for m_type, last_part in uploads:
                         rep_str = '/' + last_part
                         if m_type == 'avatar':
-                            actor_url = remove_html(actor_json['icon']['url'])
+                            url_str = \
+                                get_url_from_post(actor_json['icon']['url'])
+                            actor_url = remove_html(url_str)
                             last_part_of_url = actor_url.split('/')[-1]
                             srch_str = '/' + last_part_of_url
                             actor_url = actor_url.replace(srch_str, rep_str)
@@ -6709,8 +6722,10 @@ class PubServer(BaseHTTPRequestHandler):
                                 actor_json['icon']['mediaType'] = \
                                     'image/' + img_ext
                         elif m_type == 'image':
+                            url_str = \
+                                get_url_from_post(actor_json['image']['url'])
                             im_url = \
-                                remove_html(actor_json['image']['url'])
+                                remove_html(url_str)
                             last_part_of_url = im_url.split('/')[-1]
                             srch_str = '/' + last_part_of_url
                             actor_json['image']['url'] = \
diff --git a/desktop_client.py b/desktop_client.py
index 37805df4a..1d789badd 100644
--- a/desktop_client.py
+++ b/desktop_client.py
@@ -16,6 +16,7 @@ import webbrowser
 import urllib.parse
 from pathlib import Path
 from random import randint
+from utils import get_url_from_post
 from utils import get_actor_languages_list
 from utils import get_attributed_to
 from utils import remove_html
@@ -761,7 +762,8 @@ def _show_replies_on_post(post_json_object: {}, max_replies: int) -> None:
     print('')
     ctr = 0
     for item in object_replies['items']:
-        item_url = remove_html(item['url'])
+        url_str = get_url_from_post(item['url'])
+        item_url = remove_html(url_str)
         print('  ↰ ' + str(item_url))
         ctr += 1
         if ctr >= max_replies:
diff --git a/inbox.py b/inbox.py
index 2491a0d0e..24de733da 100644
--- a/inbox.py
+++ b/inbox.py
@@ -18,6 +18,7 @@ from languages import understood_post_language
 from like import update_likes_collection
 from reaction import update_reaction_collection
 from reaction import valid_emoji_content
+from utils import get_url_from_post
 from utils import date_from_string_format
 from utils import date_epoch
 from utils import date_utcnow
@@ -192,9 +193,10 @@ def cache_svg_images(session, base_dir: str, http_prefix: str,
             continue
         if not attach.get('url'):
             continue
-        if attach['url'].endswith('.svg') or \
+        url_str = get_url_from_post(attach['url'])
+        if url_str.endswith('.svg') or \
            'svg' in attach['mediaType']:
-            url = remove_html(attach['url'])
+            url = remove_html(url_str)
             if not url_permitted(url, federation_list):
                 continue
             # if this is a local image then it has already been
@@ -1209,7 +1211,8 @@ def _person_receive_update(base_dir: str,
                            debug: bool, http_prefix: str) -> bool:
     """Changes an actor. eg: avatar or display name change
     """
-    person_url = remove_html(person_json['url'])
+    url_str = get_url_from_post(person_json['url'])
+    person_url = remove_html(url_str)
     if debug:
         print('Receiving actor update for ' + person_url +
               ' ' + str(person_json))
@@ -1901,7 +1904,7 @@ def _receive_update_activity(recent_posts_cache: {}, session, base_dir: str,
                     print('Person Update: ' + str(message_json))
                     if debug:
                         print('DEBUG: Profile update was received for ' +
-                              message_json['object']['url'])
+                              str(message_json['object']['url']))
                         return True
     return False
 
@@ -2714,14 +2717,15 @@ def _receive_bookmark(recent_posts_cache: {},
         if debug:
             print('DEBUG: inbox bookmark Add missing url')
         return False
-    if '/statuses/' not in message_json['object']['url']:
+    url_str = get_url_from_post(message_json['object']['url'])
+    if '/statuses/' not in url_str:
         if debug:
             print('DEBUG: inbox bookmark Add missing statuses un url')
         return False
     if debug:
         print('DEBUG: c2s inbox bookmark Add request arrived in outbox')
 
-    message_url2 = remove_html(message_json['object']['url'])
+    message_url2 = remove_html(url_str)
     message_url = remove_id_ending(message_url2)
     domain = remove_domain_port(domain)
     post_filename = locate_post(base_dir, nickname, domain, message_url)
@@ -2840,7 +2844,8 @@ def _receive_undo_bookmark(recent_posts_cache: {},
         if debug:
             print('DEBUG: inbox undo bookmark Remove missing url')
         return False
-    if '/statuses/' not in message_json['object']['url']:
+    url_str = get_url_from_post(message_json['object']['url'])
+    if '/statuses/' not in url_str:
         if debug:
             print('DEBUG: inbox undo bookmark Remove missing statuses un url')
         return False
@@ -2848,7 +2853,7 @@ def _receive_undo_bookmark(recent_posts_cache: {},
         print('DEBUG: c2s inbox Remove bookmark ' +
               'request arrived in outbox')
 
-    message_url2 = remove_html(message_json['object']['url'])
+    message_url2 = remove_html(url_str)
     message_url = remove_id_ending(message_url2)
     domain = remove_domain_port(domain)
     post_filename = locate_post(base_dir, nickname, domain, message_url)
diff --git a/maps.py b/maps.py
index 0ca78041a..edc02c339 100644
--- a/maps.py
+++ b/maps.py
@@ -9,6 +9,7 @@ __module_group__ = "Core"
 
 
 import os
+from utils import get_url_from_post
 from utils import is_float
 from utils import acct_dir
 from utils import load_json
@@ -400,7 +401,8 @@ def get_map_preferences_url(base_dir: str, nickname: str, domain: str) -> str:
     if os.path.isfile(maps_filename):
         maps_json = load_json(maps_filename)
         if maps_json.get('url'):
-            return remove_html(maps_json['url'])
+            url_str = get_url_from_post(maps_json['url'])
+            return remove_html(url_str)
     return None
 
 
diff --git a/mastoapiv1.py b/mastoapiv1.py
index ab8e8baf1..6da3d1682 100644
--- a/mastoapiv1.py
+++ b/mastoapiv1.py
@@ -8,6 +8,7 @@ __status__ = "Production"
 __module_group__ = "API"
 
 import os
+from utils import get_url_from_post
 from utils import load_json
 from utils import get_config_param
 from utils import acct_dir
@@ -79,8 +80,10 @@ def _meta_data_instance_v1(show_accounts: bool,
     if admin_actor.get('published'):
         created_at = admin_actor['published']
 
-    icon_url = remove_html(admin_actor['icon']['url'])
-    image_url = remove_html(admin_actor['image']['url'])
+    url_str = get_url_from_post(admin_actor['icon']['url'])
+    icon_url = remove_html(url_str)
+    url_str = get_url_from_post(admin_actor['image']['url'])
+    image_url = remove_html(url_str)
     instance = {
         'approval_required': False,
         'invites_enabled': False,
@@ -205,8 +208,10 @@ def _get_masto_api_v1account(base_dir: str, nickname: str, domain: str,
     account_json = load_json(account_filename)
     if not account_json:
         return {}
-    avatar_url = remove_html(account_json['icon']['url'])
-    image_url = remove_html(account_json['image']['url'])
+    url_str = get_url_from_post(account_json['icon']['url'])
+    avatar_url = remove_html(url_str)
+    url_str = get_url_from_post(account_json['image']['url'])
+    image_url = remove_html(url_str)
     joined_date = "2016-10-05T10:30:00Z"
     if account_json.get('published'):
         joined_date = account_json['published']
diff --git a/mastoapiv2.py b/mastoapiv2.py
index 07ee2663c..06c8212ec 100644
--- a/mastoapiv2.py
+++ b/mastoapiv2.py
@@ -8,6 +8,7 @@ __status__ = "Production"
 __module_group__ = "API"
 
 import os
+from utils import get_url_from_post
 from utils import load_json
 from utils import get_config_param
 from utils import acct_dir
@@ -85,8 +86,10 @@ def _meta_data_instance_v2(show_accounts: bool,
     if admin_actor.get('published'):
         created_at = admin_actor['published']
 
-    icon_url = remove_html(admin_actor['icon']['url'])
-    image_url = remove_html(admin_actor['image']['url'])
+    url_str = get_url_from_post(admin_actor['icon']['url'])
+    icon_url = remove_html(url_str)
+    url_str = get_url_from_post(admin_actor['image']['url'])
+    image_url = remove_html(url_str)
     thumbnail_url = http_prefix + '://' + domain_full + '/login.png'
     admin_email = None
     noindex = True
diff --git a/newswire.py b/newswire.py
index fbffdcc53..31b8ff4e8 100644
--- a/newswire.py
+++ b/newswire.py
@@ -19,6 +19,7 @@ from datetime import timezone
 from collections import OrderedDict
 from utils import valid_post_date
 from categories import set_hashtag_category
+from utils import get_url_from_post
 from utils import remove_zero_length_strings
 from utils import date_from_string_format
 from utils import acct_handle_dir
@@ -1143,7 +1144,8 @@ def _json_feed_v1to_dict(base_dir: str, domain: str, xml_str: str,
             continue
         if not json_feed_item.get('url'):
             continue
-        if not isinstance(json_feed_item['url'], str):
+        url_str = get_url_from_post(json_feed_item['url'])
+        if not url_str:
             continue
         if not json_feed_item.get('date_published'):
             if not json_feed_item.get('date_modified'):
@@ -1182,7 +1184,7 @@ def _json_feed_v1to_dict(base_dir: str, domain: str, xml_str: str,
                         if tag_name not in description:
                             description += ' ' + tag_name
 
-        link = remove_html(json_feed_item['url'])
+        link = remove_html(url_str)
         if '://' not in link:
             continue
         if len(link) > max_bytes:
@@ -1602,7 +1604,9 @@ def _add_account_blogs_to_newswire(base_dir: str, nickname: str, domain: str,
                     description = remove_html(description)
                     tags_from_post = _get_hashtags_from_post(post_json_object)
                     summary = post_json_object['object']['summary']
-                    url2 = remove_html(post_json_object['object']['url'])
+                    url_str = \
+                        get_url_from_post(post_json_object['object']['url'])
+                    url2 = remove_html(url_str)
                     _add_newswire_dict_entry(base_dir, domain,
                                              newswire, published,
                                              summary, url2,
diff --git a/person.py b/person.py
index 29ca9c9da..95e66adb4 100644
--- a/person.py
+++ b/person.py
@@ -37,6 +37,7 @@ from roles import set_role
 from roles import actor_roles_from_list
 from roles import get_actor_roles_list
 from media import process_meta_data
+from utils import get_url_from_post
 from utils import date_utcnow
 from utils import get_memorials
 from utils import is_account_dir
@@ -200,7 +201,8 @@ def randomize_actor_images(person_json: {}) -> None:
     This causes other instances to update their cached avatar image
     """
     person_id = person_json['id']
-    last_part_of_filename = person_json['icon']['url'].split('/')[-1]
+    url_str = get_url_from_post(person_json['icon']['url'])
+    last_part_of_filename = url_str.split('/')[-1]
     existing_extension = last_part_of_filename.split('.')[1]
     # NOTE: these files don't need to have cryptographically
     # secure names
@@ -210,7 +212,8 @@ def randomize_actor_images(person_json: {}) -> None:
     person_json['icon']['url'] = \
         base_url + '/system/accounts/avatars/' + nickname + \
         '/avatar' + rand_str + '.' + existing_extension
-    last_part_of_filename = person_json['image']['url'].split('/')[-1]
+    url_str = get_url_from_post(person_json['image']['url'])
+    last_part_of_filename = url_str.split('/')[-1]
     existing_extension = last_part_of_filename.split('.')[1]
     rand_str = str(randint(10000000000000, 99999999999999))  # nosec
     person_json['image']['url'] = \
@@ -229,6 +232,9 @@ def get_actor_update_json(actor_json: {}) -> {}:
     indexable = False
     if actor_json.get('indexable'):
         indexable = True
+    actor_url = get_url_from_post(actor_json['url'])
+    icon_url = get_url_from_post(actor_json['icon']['url'])
+    image_url = get_url_from_post(actor_json['image']['url'])
     return {
         '@context': [
             "https://www.w3.org/ns/activitystreams",
@@ -311,11 +317,11 @@ def get_actor_update_json(actor_json: {}) -> {}:
             'type': actor_json['type'],
             'icon': {
                 'type': 'Image',
-                'url': actor_json['icon']['url']
+                'url': icon_url
             },
             'image': {
                 'type': 'Image',
-                'url': actor_json['image']['url']
+                'url': image_url
             },
             'attachment': actor_json['attachment'],
             'following': actor_json['id'] + '/following',
@@ -327,7 +333,7 @@ def get_actor_update_json(actor_json: {}) -> {}:
             'preferredUsername': actor_json['preferredUsername'],
             'name': actor_json['name'],
             'summary': actor_json['summary'],
-            'url': actor_json['url'],
+            'url': actor_url,
             'manuallyApprovesFollowers': manually_approves_followers,
             'discoverable': actor_json['discoverable'],
             'memorial': memorial,
@@ -1844,8 +1850,9 @@ def get_person_avatar_url(base_dir: str, person_url: str,
 
     if person_json.get('icon'):
         if person_json['icon'].get('url'):
-            if '.svg' not in person_json['icon']['url'].lower():
-                return remove_html(person_json['icon']['url'])
+            url_str = get_url_from_post(person_json['icon']['url'])
+            if '.svg' not in url_str.lower():
+                return remove_html(url_str)
     return None
 
 
diff --git a/pgp.py b/pgp.py
index b4a6a522f..5955b2cd6 100644
--- a/pgp.py
+++ b/pgp.py
@@ -12,6 +12,7 @@ import base64
 import subprocess
 from pathlib import Path
 from person import get_actor_json
+from utils import get_url_from_post
 from utils import safe_system_string
 from utils import contains_pgp_public_key
 from utils import is_pgp_encrypted
@@ -712,18 +713,20 @@ def pgp_public_key_upload(base_dir: str, session,
 def actor_to_vcard(actor: {}, domain: str) -> str:
     """Returns a vcard for a given actor
     """
+    actor_url_str = get_url_from_post(actor['url'])
     vcard_str = 'BEGIN:VCARD\n'
     vcard_str += 'VERSION:4.0\n'
     vcard_str += 'REV:' + actor['published'] + '\n'
     vcard_str += 'FN:' + remove_html(actor['name']) + '\n'
     vcard_str += 'NICKNAME:' + actor['preferredUsername'] + '\n'
-    vcard_str += 'URL;TYPE=profile:' + actor['url'] + '\n'
+    vcard_str += 'URL;TYPE=profile:' + actor_url_str + '\n'
     blog_address = get_blog_address(actor)
     if blog_address:
         vcard_str += 'URL;TYPE=blog:' + blog_address + '\n'
     vcard_str += 'NOTE:' + remove_html(actor['summary']) + '\n'
-    if actor['icon']['url']:
-        vcard_str += 'PHOTO:' + actor['icon']['url'] + '\n'
+    url_str = get_url_from_post(actor['icon']['url'])
+    if url_str:
+        vcard_str += 'PHOTO:' + url_str + '\n'
     pgp_key = get_pgp_pub_key(actor)
     if pgp_key:
         vcard_str += 'KEY:data:application/pgp-keys;base64,' + \
@@ -801,18 +804,20 @@ def actor_to_vcard_xml(actor: {}, domain: str) -> str:
         vcard_str += '    <impp>' + \
             '<parameters><type><text>cwtch</text></type></parameters>' + \
             '<text>' + cwtch_address + '</text></impp>\n'
+    url_str = get_url_from_post(actor['url'])
     vcard_str += '    <url>' + \
         '<parameters><type><text>profile</text></type></parameters>' + \
-        '<uri>' + actor['url'] + '</uri></url>\n'
+        '<uri>' + url_str + '</uri></url>\n'
     blog_address = get_blog_address(actor)
     if blog_address:
         vcard_str += '    <url>' + \
             '<parameters><type><text>blog</text></type></parameters>' + \
             '<uri>' + blog_address + '</uri></url>\n'
     vcard_str += '    <rev>' + actor['published'] + '</rev>\n'
-    if actor['icon']['url']:
+    url_str = get_url_from_post(actor['icon']['url'])
+    if url_str:
         vcard_str += \
-            '    <photo><uri>' + actor['icon']['url'] + '</uri></photo>\n'
+            '    <photo><uri>' + url_str + '</uri></photo>\n'
     pgp_key = get_pgp_pub_key(actor)
     if pgp_key:
         pgp_key_encoded = \
diff --git a/posts.py b/posts.py
index 9a992f86e..461fa8632 100644
--- a/posts.py
+++ b/posts.py
@@ -34,6 +34,7 @@ from webfinger import webfinger_handle
 from httpsig import create_signed_header
 from siteactive import site_is_active
 from languages import understood_post_language
+from utils import get_url_from_post
 from utils import date_from_string_format
 from utils import date_epoch
 from utils import date_utcnow
@@ -406,7 +407,8 @@ def get_person_box(signing_priv_key_pem: str, origin_domain: str,
     avatar_url = None
     if person_json.get('icon'):
         if person_json['icon'].get('url'):
-            avatar_url = remove_html(person_json['icon']['url'])
+            url_str = get_url_from_post(person_json['icon']['url'])
+            avatar_url = remove_html(url_str)
     display_name = None
     possible_display_name = None
     if person_json.get('name'):
@@ -652,16 +654,16 @@ def _get_posts(session, outbox_url: str, max_posts: int,
                         if tag_item.get('name') and tag_item.get('icon'):
                             if tag_item['icon'].get('url'):
                                 # No emoji from non-permitted domains
-                                if url_permitted(tag_item['icon']['url'],
+                                url_str = \
+                                    get_url_from_post(tag_item['icon']['url'])
+                                if url_permitted(url_str,
                                                  federation_list):
                                     emoji_name = tag_item['name']
-                                    emoji_icon = \
-                                        remove_html(tag_item['icon']['url'])
+                                    emoji_icon = remove_html(url_str)
                                     emoji[emoji_name] = emoji_icon
                                 else:
                                     if debug:
-                                        print('url not permitted ' +
-                                              tag_item['icon']['url'])
+                                        print('url not permitted ' + url_str)
                     if tag_type == 'mention':
                         if tag_item.get('name'):
                             if tag_item['name'] not in mentions:
@@ -703,15 +705,15 @@ def _get_posts(session, outbox_url: str, max_posts: int,
                     for attach in this_item['attachment']:
                         if attach.get('name') and attach.get('url'):
                             # no attachments from non-permitted domains
-                            attach_url = remove_html(attach['url'])
+                            url_str = get_url_from_post(attach['url'])
+                            attach_url = remove_html(url_str)
                             if url_permitted(attach_url,
                                              federation_list):
                                 attachment.append([attach['name'],
                                                    attach_url])
                             else:
                                 if debug:
-                                    print('url not permitted ' +
-                                          attach['url'])
+                                    print('url not permitted ' + url_str)
 
             sensitive = False
             if this_item.get('sensitive'):
@@ -906,9 +908,9 @@ def _get_posts_for_blocked_domains(base_dir: str,
                     continue
                 if is_blocked_domain(base_dir, post_domain):
                     if item['object'].get('url'):
-                        url = item['object']['url']
+                        url = get_url_from_post(item['object']['url'])
                     else:
-                        url = item['object']['id']
+                        url = get_url_from_post(item['object']['id'])
                     url = remove_html(url)
                     if not blocked_posts.get(post_domain):
                         blocked_posts[post_domain] = [url]
@@ -929,9 +931,9 @@ def _get_posts_for_blocked_domains(base_dir: str,
                         continue
                     if is_blocked_domain(base_dir, post_domain):
                         if item['object'].get('url'):
-                            url = item['object']['url']
+                            url = get_url_from_post(item['object']['url'])
                         else:
-                            url = item['object']['id']
+                            url = get_url_from_post(item['object']['id'])
                         url = remove_html(url)
                         if not blocked_posts.get(post_domain):
                             blocked_posts[post_domain] = [url]
@@ -2206,7 +2208,8 @@ def create_blog_post(base_dir: str,
                            low_bandwidth, content_license_url,
                            media_license_url, media_creator,
                            languages_understood, translate, buy_url, chat_url)
-    obj_url = remove_html(blog_json['object']['url'])
+    url_str = get_url_from_post(blog_json['object']['url'])
+    obj_url = remove_html(url_str)
     if '/@/' not in obj_url:
         blog_json['object']['url'] = obj_url.replace('/@', '/users/')
     _append_citations_to_blog_post(base_dir, nickname, domain, blog_json)
diff --git a/tests.py b/tests.py
index fb6161f11..90581b2f6 100644
--- a/tests.py
+++ b/tests.py
@@ -56,6 +56,7 @@ from follow import clear_followers
 from follow import send_follow_request_via_server
 from follow import send_unfollow_request_via_server
 from siteactive import site_is_active
+from utils import get_url_from_post
 from utils import date_from_string_format
 from utils import date_utcnow
 from utils import is_right_to_left_text
@@ -1470,10 +1471,11 @@ def test_post_message_between_servers(base_dir: str) -> None:
         assert attached.get('type')
         assert attached.get('url')
         assert attached['mediaType'] == 'image/png'
-        if '/system/media_attachments/files/' not in attached['url']:
-            print(attached['url'])
-        assert '/system/media_attachments/files/' in attached['url']
-        assert attached['url'].endswith('.png')
+        url_str = get_url_from_post(attached['url'])
+        if '/system/media_attachments/files/' not in url_str:
+            print(str(attached['url']))
+        assert '/system/media_attachments/files/' in url_str
+        assert url_str.endswith('.png')
         assert attached.get('width')
         assert attached.get('height')
         assert attached['width'] > 0
@@ -4255,7 +4257,7 @@ def _test_danger_svg(base_dir: str) -> None:
                             federation_list, debug,
                             svg_image_filename)
 
-    url = post_json_object['object']['attachment'][0]['url']
+    url = get_url_from_post(post_json_object['object']['attachment'][0]['url'])
     assert url == 'https://ratsratsrats.live/media/1234_wibble.svg'
 
     with open(svg_image_filename, 'rb') as fp_svg:
@@ -7317,8 +7319,8 @@ def _test_xml_podcast_dict(base_dir: str) -> None:
     assert podcast_properties.get('funding')
     assert int(podcast_properties['episode']) == 5
     assert podcast_properties['funding']['text'] == "Support the show"
-    assert podcast_properties['funding']['url'] == \
-        "https://whoframed.rodger/donate"
+    url_str = get_url_from_post(podcast_properties['funding']['url'])
+    assert url_str == "https://whoframed.rodger/donate"
     assert len(podcast_properties['transcripts']) == 3
     assert len(podcast_properties['valueRecipients']) == 2
     assert len(podcast_properties['persons']) == 5
diff --git a/utils.py b/utils.py
index 705168ab5..9785a1b00 100644
--- a/utils.py
+++ b/utils.py
@@ -110,6 +110,31 @@ def date_epoch():
     return date_from_numbers(1970, 1, 1, 0, 0)
 
 
+def get_url_from_post(url_field) -> str:
+    """Returns a url from a post object
+    """
+    if isinstance(url_field, str):
+        return url_field
+    if isinstance(url_field, list):
+        for url_dict in url_field:
+            if not isinstance(url_dict, dict):
+                continue
+            if 'href' not in url_dict:
+                continue
+            if 'mediaType' not in url_dict:
+                continue
+            if not isinstance(url_dict['href'], str):
+                continue
+            if not isinstance(url_dict['mediaType'], str):
+                continue
+            if url_dict['mediaType'] != 'text/html':
+                continue
+            if '://' not in url_dict['href']:
+                continue
+            return url_dict['href']
+    return ''
+
+
 def get_attributed_to(field) -> str:
     """Returns the actor
     """
@@ -404,7 +429,7 @@ def get_media_descriptions_from_post(post_json_object: {}) -> str:
             continue
         descriptions += attach['name'] + ' '
         if attach.get('url'):
-            descriptions += attach['url'] + ' '
+            descriptions += get_url_from_post(attach['url']) + ' '
     return descriptions.strip()
 
 
@@ -2056,7 +2081,7 @@ def _remove_attachment(base_dir: str, http_prefix: str, domain: str,
         return
     if not post_json['attachment'][0].get('url'):
         return
-    attachment_url = post_json['attachment'][0]['url']
+    attachment_url = get_url_from_post(post_json['attachment'][0]['url'])
     if not attachment_url:
         return
     attachment_url = remove_html(attachment_url)
diff --git a/video.py b/video.py
index 38edfdb7a..e2814e0ff 100644
--- a/video.py
+++ b/video.py
@@ -7,6 +7,7 @@ __email__ = "bob@libreserver.org"
 __status__ = "Production"
 __module_group__ = "Timeline"
 
+from utils import get_url_from_post
 from utils import remove_html
 from utils import get_full_domain
 from utils import get_nickname_from_actor
@@ -204,9 +205,10 @@ def convert_video_to_note(base_dir: str, nickname: str, domain: str,
                     continue
                 if not lang.get('url'):
                     continue
-                if not isinstance(lang['url'], str):
+                url_str = get_url_from_post(lang['url'])
+                if not url_str:
                     continue
-                if not lang['url'].endswith('.vtt'):
+                if not url_str.endswith('.vtt'):
                     continue
                 for understood in languages_understood:
                     if understood in lang['identifier']:
@@ -214,7 +216,7 @@ def convert_video_to_note(base_dir: str, nickname: str, domain: str,
                             "type": "Document",
                             "name": understood,
                             "mediaType": "text/vtt",
-                            "url": lang['url']
+                            "url": url_str
                         })
                         break
 
diff --git a/webapp_moderation.py b/webapp_moderation.py
index 102285150..3fb7a3eed 100644
--- a/webapp_moderation.py
+++ b/webapp_moderation.py
@@ -8,6 +8,7 @@ __status__ = "Production"
 __module_group__ = "Moderation"
 
 import os
+from utils import get_url_from_post
 from utils import remove_html
 from utils import is_artist
 from utils import is_account_dir
@@ -388,7 +389,8 @@ def html_moderation_info(translate: {}, base_dir: str,
         ext = ''
         if actor_json.get('icon'):
             if actor_json['icon'].get('url'):
-                avatar_url = remove_html(actor_json['icon']['url'])
+                url_str = get_url_from_post(actor_json['icon']['url'])
+                avatar_url = remove_html(url_str)
                 if '.' in avatar_url:
                     ext = '.' + avatar_url.split('.')[-1]
         acct_url = \
diff --git a/webapp_podcast.py b/webapp_podcast.py
index dbe932a4f..13ebe5c2e 100644
--- a/webapp_podcast.py
+++ b/webapp_podcast.py
@@ -12,6 +12,7 @@ import html
 import datetime
 import urllib.parse
 from shutil import copyfile
+from utils import get_url_from_post
 from utils import get_config_param
 from utils import remove_html
 from media import path_is_audio
@@ -39,7 +40,8 @@ def _html_podcast_chapters(link_url: str,
     if not isinstance(podcast_properties[key], dict):
         return ''
     if podcast_properties[key].get('url'):
-        chapters_url = remove_html(podcast_properties[key]['url'])
+        url_str = get_url_from_post(podcast_properties[key]['url'])
+        chapters_url = remove_html(url_str)
     elif podcast_properties[key].get('uri'):
         chapters_url = podcast_properties[key]['uri']
     else:
@@ -80,7 +82,8 @@ def _html_podcast_chapters(link_url: str,
                 chapter_title = chapter['title']
                 chapter_url = ''
                 if chapter.get('url'):
-                    chapter_url = remove_html(chapter['url'])
+                    url_str = get_url_from_post(chapter['url'])
+                    chapter_url = remove_html(url_str)
                     chapter_title = \
                         '<a href="' + chapter_url + '">' + \
                         chapter['title'] + '<\a>'
@@ -122,7 +125,8 @@ def _html_podcast_transcripts(podcast_properties: {}, translate: {}) -> str:
     for _ in podcast_properties[key]:
         transcript_url = None
         if podcast_properties[key].get('url'):
-            transcript_url = remove_html(podcast_properties[key]['url'])
+            url_str = get_url_from_post(podcast_properties[key]['url'])
+            transcript_url = remove_html(url_str)
         elif podcast_properties[key].get('uri'):
             transcript_url = podcast_properties[key]['uri']
         if not transcript_url:
@@ -155,7 +159,8 @@ def _html_podcast_social_interactions(podcast_properties: {},
     if podcast_properties[key].get('uri'):
         episode_post_url = podcast_properties[key]['uri']
     elif podcast_properties[key].get('url'):
-        episode_post_url = remove_html(podcast_properties[key]['url'])
+        url_str = get_url_from_post(podcast_properties[key]['url'])
+        episode_post_url = remove_html(url_str)
     elif podcast_properties[key].get('text'):
         episode_post_url = podcast_properties[key]['text']
     else:
@@ -439,7 +444,8 @@ def html_podcast_episode(translate: {},
     # donate button
     if podcast_properties.get('funding'):
         if podcast_properties['funding'].get('url'):
-            donate_url = remove_html(podcast_properties['funding']['url'])
+            url_str = get_url_from_post(podcast_properties['funding']['url'])
+            donate_url = remove_html(url_str)
             podcast_str += \
                 '<p><span itemprop="funding"><a href="' + donate_url + \
                 '" rel="donation"><button class="donateButton">' + \
diff --git a/webapp_post.py b/webapp_post.py
index 1490ea529..5d5f70354 100644
--- a/webapp_post.py
+++ b/webapp_post.py
@@ -24,6 +24,7 @@ from posts import post_is_muted
 from posts import get_person_box
 from posts import download_announce
 from posts import populate_replies_json
+from utils import get_url_from_post
 from utils import date_from_string_format
 from utils import remove_markup_tag
 from utils import ap_proxy_type
@@ -155,7 +156,8 @@ def _html_post_metadata_open_graph(domain: str, post_json_object: {},
                     "    <meta content=\"@" + actor_handle + \
                     "\" property=\"og:title\" />\n"
     if obj_json.get('url'):
-        obj_url = remove_html(obj_json['url'])
+        url_str = get_url_from_post(obj_json['url'])
+        obj_url = remove_html(url_str)
         metadata += \
             "    <meta content=\"" + obj_url + \
             "\" property=\"og:url\" />\n"
@@ -211,7 +213,8 @@ def _html_post_metadata_open_graph(domain: str, post_json_object: {},
             metadata += \
                 "    <meta content=\"" + description + \
                 "\" name=\"og:description\">\n"
-            attach_url = remove_html(attach_json['url'])
+            url_str = get_url_from_post(attach_json['url'])
+            attach_url = remove_html(url_str)
             metadata += \
                 "    <meta content=\"" + attach_url + \
                 "\" property=\"og:image\" />\n"
@@ -1197,7 +1200,8 @@ def _get_blog_citations_html(box_name: str,
             continue
         if not tag_json.get('url'):
             continue
-        citation_url = remove_html(tag_json['url'])
+        url_str = get_url_from_post(tag_json['url'])
+        citation_url = remove_html(url_str)
         citation_name = remove_html(tag_json['name'])
         citations_str += \
             '<li><a href="' + citation_url + '" tabindex="10">' + \
diff --git a/webapp_profile.py b/webapp_profile.py
index fe492e169..d5454f3b7 100644
--- a/webapp_profile.py
+++ b/webapp_profile.py
@@ -10,6 +10,7 @@ __module_group__ = "Web Interface"
 import os
 from pprint import pprint
 from webfinger import webfinger_handle
+from utils import get_url_from_post
 from utils import get_memorials
 from utils import text_in_file
 from utils import dangerous_markup
@@ -267,7 +268,8 @@ def html_profile_after_search(recent_posts_cache: {}, max_recent_posts: int,
     avatar_url = ''
     if profile_json.get('icon'):
         if profile_json['icon'].get('url'):
-            avatar_url = remove_html(profile_json['icon']['url'])
+            url_str = get_url_from_post(profile_json['icon']['url'])
+            avatar_url = remove_html(url_str)
     if not avatar_url:
         avatar_url = get_person_avatar_url(base_dir, person_url, person_cache)
     display_name = search_nickname
@@ -324,8 +326,8 @@ def html_profile_after_search(recent_posts_cache: {}, max_recent_posts: int,
     # profileBackgroundImage = ''
     # if profile_json.get('image'):
     #     if profile_json['image'].get('url'):
-    #         profileBackgroundImage = \
-    #             remove_html(profile_json['image']['url'])
+    #         url_str = get_url_from_post(profile_json['image']['url'])
+    #         profileBackgroundImage = remove_html(url_str)
 
     # url to return to
     back_url = path
@@ -348,7 +350,8 @@ def html_profile_after_search(recent_posts_cache: {}, max_recent_posts: int,
     image_url = ''
     if profile_json.get('image'):
         if profile_json['image'].get('url'):
-            image_url = remove_html(profile_json['image']['url'])
+            url_str = get_url_from_post(profile_json['image']['url'])
+            image_url = remove_html(url_str)
 
     also_known_as = None
     if profile_json.get('alsoKnownAs'):
@@ -1194,7 +1197,8 @@ def html_profile(signing_priv_key_pem: str,
     if profile_json.get('hasOccupation'):
         occupation_name = get_occupation_name(profile_json)
 
-    avatar_url = remove_html(profile_json['icon']['url'])
+    url_str = get_url_from_post(profile_json['icon']['url'])
+    avatar_url = remove_html(url_str)
     # use alternate path for local avatars to avoid any caching issues
     if '://' + domain_full + '/system/accounts/avatars/' in avatar_url:
         avatar_url = \
diff --git a/webapp_search.py b/webapp_search.py
index f7a84daab..9f24c7172 100644
--- a/webapp_search.py
+++ b/webapp_search.py
@@ -10,6 +10,7 @@ __module_group__ = "Web Interface"
 import os
 from shutil import copyfile
 import urllib.parse
+from utils import get_url_from_post
 from utils import date_from_string_format
 from utils import get_attributed_to
 from utils import get_actor_from_post_id
@@ -571,7 +572,8 @@ def html_skills_search(actor: str, translate: {}, base_dir: str,
                         skill_level_str = '0' + skill_level_str
                     if skill_level < 10:
                         skill_level_str = '0' + skill_level_str
-                    icon_url = remove_html(actor_json['icon']['url'])
+                    url_str = get_url_from_post(actor_json['icon']['url'])
+                    icon_url = remove_html(url_str)
                     index_str = \
                         skill_level_str + ';' + actor + ';' + \
                         actor_json['name'] + \
@@ -611,7 +613,9 @@ def html_skills_search(actor: str, translate: {}, base_dir: str,
                                 skill_level_str = '0' + skill_level_str
                             if skill_level < 10:
                                 skill_level_str = '0' + skill_level_str
-                            icon_url = remove_html(actor_json['icon']['url'])
+                            url_str = \
+                                get_url_from_post(actor_json['icon']['url'])
+                            icon_url = remove_html(url_str)
                             index_str = \
                                 skill_level_str + ';' + actor + ';' + \
                                 actor_json['name'] + \
@@ -1375,7 +1379,8 @@ def rss_hashtag_search(nickname: str, domain: str, port: int,
                     for attach in post_json_object['object']['attachment']:
                         if not attach.get('url'):
                             continue
-                        attach_url = remove_html(attach['url'])
+                        url_str = get_url_from_post(attach['url'])
+                        attach_url = remove_html(url_str)
                         hashtag_feed += \
                             '         <link>' + attach_url + '</link>'
                 hashtag_feed += '     </item>'
diff --git a/webapp_utils.py b/webapp_utils.py
index 7871608b3..9a6497799 100644
--- a/webapp_utils.py
+++ b/webapp_utils.py
@@ -12,6 +12,7 @@ from shutil import copyfile
 from collections import OrderedDict
 from session import get_json
 from session import get_json_valid
+from utils import get_url_from_post
 from utils import get_media_url_from_video
 from utils import get_attributed_to
 from utils import local_network_host
@@ -857,7 +858,8 @@ def html_header_with_person_markup(css_filename: str, instance_title: str,
     domain_full = actor_json['id'].split('://')[1].split('/')[0]
     handle = actor_json['preferredUsername'] + '@' + domain_full
 
-    icon_url = remove_html(actor_json['icon']['url'])
+    url_str = get_url_from_post(actor_json['icon']['url'])
+    icon_url = remove_html(url_str)
     person_markup = \
         '      "about": {\n' + \
         '        "@type" : "Person",\n' + \
@@ -893,7 +895,8 @@ def html_header_with_person_markup(css_filename: str, instance_title: str,
         '    </script>\n'
 
     description = remove_html(description)
-    actor2_url = remove_html(actor_json['url'])
+    url_str = get_url_from_post(actor_json['url'])
+    actor2_url = remove_html(url_str)
     og_metadata = \
         "    <meta content=\"profile\" property=\"og:type\" />\n" + \
         "    <meta content=\"" + description + \
@@ -1344,7 +1347,7 @@ def get_post_attachments_as_html(base_dir: str,
             name = attach['hreflang']
         url = None
         if attach.get('url'):
-            url = attach['url']
+            url = get_url_from_post(attach['url'])
         elif attach.get('href'):
             url = attach['href']
         if name and url:
@@ -1397,7 +1400,8 @@ def get_post_attachments_as_html(base_dir: str,
             image_description = attach['name'].replace('"', "'")
             image_description = remove_html(image_description)
         if _is_image_mime_type(media_type):
-            image_url = remove_html(attach['url'])
+            url_str = get_url_from_post(attach['url'])
+            image_url = remove_html(url_str)
             if image_url in attached_urls:
                 continue
             attached_urls.append(image_url)
@@ -1452,7 +1456,8 @@ def get_post_attachments_as_html(base_dir: str,
                                 '   ' + license_str + \
                                 '</figcaption></figure>\n'
                     if post_json_object['object'].get('url'):
-                        image_post_url = post_json_object['object']['url']
+                        url_str = post_json_object['object']['url']
+                        image_post_url = get_url_from_post(url_str)
                     else:
                         image_post_url = post_json_object['object']['id']
                     image_post_url = remove_html(image_post_url)
@@ -1554,7 +1559,8 @@ def get_post_attachments_as_html(base_dir: str,
                 if box_name == 'tlmedia':
                     gallery_str += '<div class="gallery">\n'
                     if post_json_object['object'].get('url'):
-                        video_post_url = post_json_object['object']['url']
+                        url_str = post_json_object['object']['url']
+                        video_post_url = get_url_from_post(url_str)
                     else:
                         video_post_url = post_json_object['object']['id']
                     video_post_url = remove_html(video_post_url)
@@ -1629,7 +1635,8 @@ def get_post_attachments_as_html(base_dir: str,
                 attachment_ctr += 1
         elif _is_audio_mime_type(media_type):
             extension = '.mp3'
-            audio_url = remove_html(attach['url'])
+            url_str = get_url_from_post(attach['url'])
+            audio_url = remove_html(url_str)
             if audio_url in attached_urls:
                 continue
             attached_urls.append(audio_url)
@@ -1664,7 +1671,8 @@ def get_post_attachments_as_html(base_dir: str,
                         gallery_str += '    </audio>\n'
                         gallery_str += '  </a>\n'
                     if post_json_object['object'].get('url'):
-                        audio_post_url = post_json_object['object']['url']
+                        url_str = post_json_object['object']['url']
+                        audio_post_url = get_url_from_post(url_str)
                     else:
                         audio_post_url = post_json_object['object']['id']
                     audio_post_url = remove_html(audio_post_url)
diff --git a/webfinger.py b/webfinger.py
index 87b0b9fec..005521f91 100644
--- a/webfinger.py
+++ b/webfinger.py
@@ -13,6 +13,7 @@ from session import get_json
 from session import get_json_valid
 from cache import store_webfinger_in_cache
 from cache import get_webfinger_from_cache
+from utils import get_url_from_post
 from utils import remove_html
 from utils import acct_handle_dir
 from utils import get_attachment_property_value
@@ -433,7 +434,8 @@ def _webfinger_update_avatar(wf_json: {}, actor_json: {}) -> bool:
     """Updates the avatar image link
     """
     found = False
-    avatar_url = remove_html(actor_json['icon']['url'])
+    url_str = get_url_from_post(actor_json['icon']['url'])
+    avatar_url = remove_html(url_str)
     media_type = actor_json['icon']['mediaType']
     for link in wf_json['links']:
         if not link.get('rel'):
@@ -463,7 +465,8 @@ def _webfinger_update_vcard(wf_json: {}, actor_json: {}) -> bool:
         if link.get('type'):
             if link['type'] == 'text/vcard':
                 return False
-    actor_url = remove_html(actor_json['url'])
+    url_str = get_url_from_post(actor_json['url'])
+    actor_url = remove_html(url_str)
     wf_json['links'].append({
         "href": actor_url,
         "rel": "http://webfinger.net/rel/profile-page",