Handle situations where urls are lists

main
Bob Mottram 2023-12-09 14:18:24 +00:00
parent 357896c9cf
commit 080b2ca352
23 changed files with 218 additions and 95 deletions

View File

@ -16,6 +16,7 @@ from webapp_utils import html_footer
from webapp_utils import get_post_attachments_as_html
from webapp_utils import edit_text_area
from webapp_media import add_embedded_elements
from utils import get_url_from_post
from utils import date_from_string_format
from utils import get_attributed_to
from utils import remove_eol
@ -314,7 +315,8 @@ def _html_blog_post_content(debug: bool, session, authorized: bool,
continue
if not tag_json.get('url'):
continue
citation_url = remove_html(tag_json['url'])
url_str = get_url_from_post(tag_json['url'])
citation_url = remove_html(url_str)
citation_name = remove_html(tag_json['name'])
citations_str += \
'<li><a href="' + citation_url + '">' + \
@ -482,7 +484,8 @@ def html_blog_post(session, authorized: bool,
title = post_json_object['object']['summary']
url = ''
if post_json_object['object'].get('url'):
url = remove_html(post_json_object['object']['url'])
url_str = get_url_from_post(post_json_object['object']['url'])
url = remove_html(url_str)
snippet = _get_snippet_from_blog_content(post_json_object,
system_language)
blog_str = html_header_with_blog_markup(css_filename, instance_title,

View File

@ -11,6 +11,7 @@ import os
from pprint import pprint
from webfinger import webfinger_handle
from auth import create_basic_auth_header
from utils import get_url_from_post
from utils import remove_domain_port
from utils import has_users_path
from utils import get_full_domain
@ -603,7 +604,8 @@ def outbox_bookmark(recent_posts_cache: {},
if debug:
print('DEBUG: c2s bookmark Add request arrived in outbox')
message_url = remove_id_ending(message_json['object']['url'])
url_str = get_url_from_post(message_json['object']['url'])
message_url = remove_id_ending(url_str)
message_url = remove_html(message_url)
domain = remove_domain_port(domain)
post_filename = locate_post(base_dir, nickname, domain, message_url)
@ -661,7 +663,8 @@ def outbox_undo_bookmark(recent_posts_cache: {},
if debug:
print('DEBUG: c2s unbookmark Remove request arrived in outbox')
message_url = remove_id_ending(message_json['object']['url'])
url_str = get_url_from_post(message_json['object']['url'])
message_url = remove_id_ending(url_str)
message_url = remove_html(message_url)
domain = remove_domain_port(domain)
post_filename = locate_post(base_dir, nickname, domain, message_url)

View File

@ -15,6 +15,7 @@ import email.parser
import urllib.parse
from shutil import copyfile
from dateutil.parser import parse
from utils import get_url_from_post
from utils import is_right_to_left_text
from utils import language_right_to_left
from utils import binary_is_image
@ -446,11 +447,14 @@ def replace_emoji_from_tags(session, base_dir: str,
continue
if not tag_item['icon'].get('url'):
continue
if '/' not in tag_item['icon']['url']:
url_str = get_url_from_post(tag_item['icon']['url'])
if '/' not in url_str:
continue
if tag_item['name'] not in content:
continue
tag_url = remove_html(tag_item['icon']['url'])
tag_url = remove_html(url_str)
if not tag_url:
continue
icon_name = tag_url.split('/')[-1]
if icon_name:
if len(icon_name) > 1:
@ -532,7 +536,8 @@ def replace_emoji_from_tags(session, base_dir: str,
emoji_tag_name = tag_item['name'].replace(':', '')
else:
emoji_tag_name = ''
tag_url = remove_html(tag_item['icon']['url'])
url_str = get_url_from_post(tag_item['icon']['url'])
tag_url = remove_html(url_str)
emoji_html = "<img src=\"" + tag_url + "\" alt=\"" + \
emoji_tag_name + \
"\" align=\"middle\" class=\"" + html_class + "\"/>"

View File

@ -300,6 +300,7 @@ from languages import set_actor_languages
from languages import get_understood_languages
from like import update_likes_collection
from reaction import update_reaction_collection
from utils import get_url_from_post
from utils import date_from_string_format
from utils import corp_servers
from utils import get_attributed_to
@ -2209,9 +2210,19 @@ class PubServer(BaseHTTPRequestHandler):
if has_object_dict(message_json):
if debug:
print('INBOX: checking object fields')
# check that url is a string or list
if message_json['object'].get('url'):
if not isinstance(message_json['object']['url'], str) and \
not isinstance(message_json['object']['url'], list):
print('INBOX: url should be a string or list ' +
str(message_json['object']['url']))
self._400()
self.server.postreq_busy = False
return 3
# check that some fields are strings
string_fields = (
'id', 'actor', 'type', 'content', 'published',
'summary', 'url'
'summary'
)
for check_field in string_fields:
if not message_json['object'].get(check_field):
@ -6696,7 +6707,9 @@ class PubServer(BaseHTTPRequestHandler):
for m_type, last_part in uploads:
rep_str = '/' + last_part
if m_type == 'avatar':
actor_url = remove_html(actor_json['icon']['url'])
url_str = \
get_url_from_post(actor_json['icon']['url'])
actor_url = remove_html(url_str)
last_part_of_url = actor_url.split('/')[-1]
srch_str = '/' + last_part_of_url
actor_url = actor_url.replace(srch_str, rep_str)
@ -6709,8 +6722,10 @@ class PubServer(BaseHTTPRequestHandler):
actor_json['icon']['mediaType'] = \
'image/' + img_ext
elif m_type == 'image':
url_str = \
get_url_from_post(actor_json['image']['url'])
im_url = \
remove_html(actor_json['image']['url'])
remove_html(url_str)
last_part_of_url = im_url.split('/')[-1]
srch_str = '/' + last_part_of_url
actor_json['image']['url'] = \

View File

@ -16,6 +16,7 @@ import webbrowser
import urllib.parse
from pathlib import Path
from random import randint
from utils import get_url_from_post
from utils import get_actor_languages_list
from utils import get_attributed_to
from utils import remove_html
@ -761,7 +762,8 @@ def _show_replies_on_post(post_json_object: {}, max_replies: int) -> None:
print('')
ctr = 0
for item in object_replies['items']:
item_url = remove_html(item['url'])
url_str = get_url_from_post(item['url'])
item_url = remove_html(url_str)
print('' + str(item_url))
ctr += 1
if ctr >= max_replies:

View File

@ -18,6 +18,7 @@ from languages import understood_post_language
from like import update_likes_collection
from reaction import update_reaction_collection
from reaction import valid_emoji_content
from utils import get_url_from_post
from utils import date_from_string_format
from utils import date_epoch
from utils import date_utcnow
@ -192,9 +193,10 @@ def cache_svg_images(session, base_dir: str, http_prefix: str,
continue
if not attach.get('url'):
continue
if attach['url'].endswith('.svg') or \
url_str = get_url_from_post(attach['url'])
if url_str.endswith('.svg') or \
'svg' in attach['mediaType']:
url = remove_html(attach['url'])
url = remove_html(url_str)
if not url_permitted(url, federation_list):
continue
# if this is a local image then it has already been
@ -1209,7 +1211,8 @@ def _person_receive_update(base_dir: str,
debug: bool, http_prefix: str) -> bool:
"""Changes an actor. eg: avatar or display name change
"""
person_url = remove_html(person_json['url'])
url_str = get_url_from_post(person_json['url'])
person_url = remove_html(url_str)
if debug:
print('Receiving actor update for ' + person_url +
' ' + str(person_json))
@ -1901,7 +1904,7 @@ def _receive_update_activity(recent_posts_cache: {}, session, base_dir: str,
print('Person Update: ' + str(message_json))
if debug:
print('DEBUG: Profile update was received for ' +
message_json['object']['url'])
str(message_json['object']['url']))
return True
return False
@ -2714,14 +2717,15 @@ def _receive_bookmark(recent_posts_cache: {},
if debug:
print('DEBUG: inbox bookmark Add missing url')
return False
if '/statuses/' not in message_json['object']['url']:
url_str = get_url_from_post(message_json['object']['url'])
if '/statuses/' not in url_str:
if debug:
print('DEBUG: inbox bookmark Add missing statuses un url')
return False
if debug:
print('DEBUG: c2s inbox bookmark Add request arrived in outbox')
message_url2 = remove_html(message_json['object']['url'])
message_url2 = remove_html(url_str)
message_url = remove_id_ending(message_url2)
domain = remove_domain_port(domain)
post_filename = locate_post(base_dir, nickname, domain, message_url)
@ -2840,7 +2844,8 @@ def _receive_undo_bookmark(recent_posts_cache: {},
if debug:
print('DEBUG: inbox undo bookmark Remove missing url')
return False
if '/statuses/' not in message_json['object']['url']:
url_str = get_url_from_post(message_json['object']['url'])
if '/statuses/' not in url_str:
if debug:
print('DEBUG: inbox undo bookmark Remove missing statuses un url')
return False
@ -2848,7 +2853,7 @@ def _receive_undo_bookmark(recent_posts_cache: {},
print('DEBUG: c2s inbox Remove bookmark ' +
'request arrived in outbox')
message_url2 = remove_html(message_json['object']['url'])
message_url2 = remove_html(url_str)
message_url = remove_id_ending(message_url2)
domain = remove_domain_port(domain)
post_filename = locate_post(base_dir, nickname, domain, message_url)

View File

@ -9,6 +9,7 @@ __module_group__ = "Core"
import os
from utils import get_url_from_post
from utils import is_float
from utils import acct_dir
from utils import load_json
@ -400,7 +401,8 @@ def get_map_preferences_url(base_dir: str, nickname: str, domain: str) -> str:
if os.path.isfile(maps_filename):
maps_json = load_json(maps_filename)
if maps_json.get('url'):
return remove_html(maps_json['url'])
url_str = get_url_from_post(maps_json['url'])
return remove_html(url_str)
return None

View File

@ -8,6 +8,7 @@ __status__ = "Production"
__module_group__ = "API"
import os
from utils import get_url_from_post
from utils import load_json
from utils import get_config_param
from utils import acct_dir
@ -79,8 +80,10 @@ def _meta_data_instance_v1(show_accounts: bool,
if admin_actor.get('published'):
created_at = admin_actor['published']
icon_url = remove_html(admin_actor['icon']['url'])
image_url = remove_html(admin_actor['image']['url'])
url_str = get_url_from_post(admin_actor['icon']['url'])
icon_url = remove_html(url_str)
url_str = get_url_from_post(admin_actor['image']['url'])
image_url = remove_html(url_str)
instance = {
'approval_required': False,
'invites_enabled': False,
@ -205,8 +208,10 @@ def _get_masto_api_v1account(base_dir: str, nickname: str, domain: str,
account_json = load_json(account_filename)
if not account_json:
return {}
avatar_url = remove_html(account_json['icon']['url'])
image_url = remove_html(account_json['image']['url'])
url_str = get_url_from_post(account_json['icon']['url'])
avatar_url = remove_html(url_str)
url_str = get_url_from_post(account_json['image']['url'])
image_url = remove_html(url_str)
joined_date = "2016-10-05T10:30:00Z"
if account_json.get('published'):
joined_date = account_json['published']

View File

@ -8,6 +8,7 @@ __status__ = "Production"
__module_group__ = "API"
import os
from utils import get_url_from_post
from utils import load_json
from utils import get_config_param
from utils import acct_dir
@ -85,8 +86,10 @@ def _meta_data_instance_v2(show_accounts: bool,
if admin_actor.get('published'):
created_at = admin_actor['published']
icon_url = remove_html(admin_actor['icon']['url'])
image_url = remove_html(admin_actor['image']['url'])
url_str = get_url_from_post(admin_actor['icon']['url'])
icon_url = remove_html(url_str)
url_str = get_url_from_post(admin_actor['image']['url'])
image_url = remove_html(url_str)
thumbnail_url = http_prefix + '://' + domain_full + '/login.png'
admin_email = None
noindex = True

View File

@ -19,6 +19,7 @@ from datetime import timezone
from collections import OrderedDict
from utils import valid_post_date
from categories import set_hashtag_category
from utils import get_url_from_post
from utils import remove_zero_length_strings
from utils import date_from_string_format
from utils import acct_handle_dir
@ -1143,7 +1144,8 @@ def _json_feed_v1to_dict(base_dir: str, domain: str, xml_str: str,
continue
if not json_feed_item.get('url'):
continue
if not isinstance(json_feed_item['url'], str):
url_str = get_url_from_post(json_feed_item['url'])
if not url_str:
continue
if not json_feed_item.get('date_published'):
if not json_feed_item.get('date_modified'):
@ -1182,7 +1184,7 @@ def _json_feed_v1to_dict(base_dir: str, domain: str, xml_str: str,
if tag_name not in description:
description += ' ' + tag_name
link = remove_html(json_feed_item['url'])
link = remove_html(url_str)
if '://' not in link:
continue
if len(link) > max_bytes:
@ -1602,7 +1604,9 @@ def _add_account_blogs_to_newswire(base_dir: str, nickname: str, domain: str,
description = remove_html(description)
tags_from_post = _get_hashtags_from_post(post_json_object)
summary = post_json_object['object']['summary']
url2 = remove_html(post_json_object['object']['url'])
url_str = \
get_url_from_post(post_json_object['object']['url'])
url2 = remove_html(url_str)
_add_newswire_dict_entry(base_dir, domain,
newswire, published,
summary, url2,

View File

@ -37,6 +37,7 @@ from roles import set_role
from roles import actor_roles_from_list
from roles import get_actor_roles_list
from media import process_meta_data
from utils import get_url_from_post
from utils import date_utcnow
from utils import get_memorials
from utils import is_account_dir
@ -200,7 +201,8 @@ def randomize_actor_images(person_json: {}) -> None:
This causes other instances to update their cached avatar image
"""
person_id = person_json['id']
last_part_of_filename = person_json['icon']['url'].split('/')[-1]
url_str = get_url_from_post(person_json['icon']['url'])
last_part_of_filename = url_str.split('/')[-1]
existing_extension = last_part_of_filename.split('.')[1]
# NOTE: these files don't need to have cryptographically
# secure names
@ -210,7 +212,8 @@ def randomize_actor_images(person_json: {}) -> None:
person_json['icon']['url'] = \
base_url + '/system/accounts/avatars/' + nickname + \
'/avatar' + rand_str + '.' + existing_extension
last_part_of_filename = person_json['image']['url'].split('/')[-1]
url_str = get_url_from_post(person_json['image']['url'])
last_part_of_filename = url_str.split('/')[-1]
existing_extension = last_part_of_filename.split('.')[1]
rand_str = str(randint(10000000000000, 99999999999999)) # nosec
person_json['image']['url'] = \
@ -229,6 +232,9 @@ def get_actor_update_json(actor_json: {}) -> {}:
indexable = False
if actor_json.get('indexable'):
indexable = True
actor_url = get_url_from_post(actor_json['url'])
icon_url = get_url_from_post(actor_json['icon']['url'])
image_url = get_url_from_post(actor_json['image']['url'])
return {
'@context': [
"https://www.w3.org/ns/activitystreams",
@ -311,11 +317,11 @@ def get_actor_update_json(actor_json: {}) -> {}:
'type': actor_json['type'],
'icon': {
'type': 'Image',
'url': actor_json['icon']['url']
'url': icon_url
},
'image': {
'type': 'Image',
'url': actor_json['image']['url']
'url': image_url
},
'attachment': actor_json['attachment'],
'following': actor_json['id'] + '/following',
@ -327,7 +333,7 @@ def get_actor_update_json(actor_json: {}) -> {}:
'preferredUsername': actor_json['preferredUsername'],
'name': actor_json['name'],
'summary': actor_json['summary'],
'url': actor_json['url'],
'url': actor_url,
'manuallyApprovesFollowers': manually_approves_followers,
'discoverable': actor_json['discoverable'],
'memorial': memorial,
@ -1844,8 +1850,9 @@ def get_person_avatar_url(base_dir: str, person_url: str,
if person_json.get('icon'):
if person_json['icon'].get('url'):
if '.svg' not in person_json['icon']['url'].lower():
return remove_html(person_json['icon']['url'])
url_str = get_url_from_post(person_json['icon']['url'])
if '.svg' not in url_str.lower():
return remove_html(url_str)
return None

17
pgp.py
View File

@ -12,6 +12,7 @@ import base64
import subprocess
from pathlib import Path
from person import get_actor_json
from utils import get_url_from_post
from utils import safe_system_string
from utils import contains_pgp_public_key
from utils import is_pgp_encrypted
@ -712,18 +713,20 @@ def pgp_public_key_upload(base_dir: str, session,
def actor_to_vcard(actor: {}, domain: str) -> str:
"""Returns a vcard for a given actor
"""
actor_url_str = get_url_from_post(actor['url'])
vcard_str = 'BEGIN:VCARD\n'
vcard_str += 'VERSION:4.0\n'
vcard_str += 'REV:' + actor['published'] + '\n'
vcard_str += 'FN:' + remove_html(actor['name']) + '\n'
vcard_str += 'NICKNAME:' + actor['preferredUsername'] + '\n'
vcard_str += 'URL;TYPE=profile:' + actor['url'] + '\n'
vcard_str += 'URL;TYPE=profile:' + actor_url_str + '\n'
blog_address = get_blog_address(actor)
if blog_address:
vcard_str += 'URL;TYPE=blog:' + blog_address + '\n'
vcard_str += 'NOTE:' + remove_html(actor['summary']) + '\n'
if actor['icon']['url']:
vcard_str += 'PHOTO:' + actor['icon']['url'] + '\n'
url_str = get_url_from_post(actor['icon']['url'])
if url_str:
vcard_str += 'PHOTO:' + url_str + '\n'
pgp_key = get_pgp_pub_key(actor)
if pgp_key:
vcard_str += 'KEY:data:application/pgp-keys;base64,' + \
@ -801,18 +804,20 @@ def actor_to_vcard_xml(actor: {}, domain: str) -> str:
vcard_str += ' <impp>' + \
'<parameters><type><text>cwtch</text></type></parameters>' + \
'<text>' + cwtch_address + '</text></impp>\n'
url_str = get_url_from_post(actor['url'])
vcard_str += ' <url>' + \
'<parameters><type><text>profile</text></type></parameters>' + \
'<uri>' + actor['url'] + '</uri></url>\n'
'<uri>' + url_str + '</uri></url>\n'
blog_address = get_blog_address(actor)
if blog_address:
vcard_str += ' <url>' + \
'<parameters><type><text>blog</text></type></parameters>' + \
'<uri>' + blog_address + '</uri></url>\n'
vcard_str += ' <rev>' + actor['published'] + '</rev>\n'
if actor['icon']['url']:
url_str = get_url_from_post(actor['icon']['url'])
if url_str:
vcard_str += \
' <photo><uri>' + actor['icon']['url'] + '</uri></photo>\n'
' <photo><uri>' + url_str + '</uri></photo>\n'
pgp_key = get_pgp_pub_key(actor)
if pgp_key:
pgp_key_encoded = \

View File

@ -34,6 +34,7 @@ from webfinger import webfinger_handle
from httpsig import create_signed_header
from siteactive import site_is_active
from languages import understood_post_language
from utils import get_url_from_post
from utils import date_from_string_format
from utils import date_epoch
from utils import date_utcnow
@ -406,7 +407,8 @@ def get_person_box(signing_priv_key_pem: str, origin_domain: str,
avatar_url = None
if person_json.get('icon'):
if person_json['icon'].get('url'):
avatar_url = remove_html(person_json['icon']['url'])
url_str = get_url_from_post(person_json['icon']['url'])
avatar_url = remove_html(url_str)
display_name = None
possible_display_name = None
if person_json.get('name'):
@ -652,16 +654,16 @@ def _get_posts(session, outbox_url: str, max_posts: int,
if tag_item.get('name') and tag_item.get('icon'):
if tag_item['icon'].get('url'):
# No emoji from non-permitted domains
if url_permitted(tag_item['icon']['url'],
url_str = \
get_url_from_post(tag_item['icon']['url'])
if url_permitted(url_str,
federation_list):
emoji_name = tag_item['name']
emoji_icon = \
remove_html(tag_item['icon']['url'])
emoji_icon = remove_html(url_str)
emoji[emoji_name] = emoji_icon
else:
if debug:
print('url not permitted ' +
tag_item['icon']['url'])
print('url not permitted ' + url_str)
if tag_type == 'mention':
if tag_item.get('name'):
if tag_item['name'] not in mentions:
@ -703,15 +705,15 @@ def _get_posts(session, outbox_url: str, max_posts: int,
for attach in this_item['attachment']:
if attach.get('name') and attach.get('url'):
# no attachments from non-permitted domains
attach_url = remove_html(attach['url'])
url_str = get_url_from_post(attach['url'])
attach_url = remove_html(url_str)
if url_permitted(attach_url,
federation_list):
attachment.append([attach['name'],
attach_url])
else:
if debug:
print('url not permitted ' +
attach['url'])
print('url not permitted ' + url_str)
sensitive = False
if this_item.get('sensitive'):
@ -906,9 +908,9 @@ def _get_posts_for_blocked_domains(base_dir: str,
continue
if is_blocked_domain(base_dir, post_domain):
if item['object'].get('url'):
url = item['object']['url']
url = get_url_from_post(item['object']['url'])
else:
url = item['object']['id']
url = get_url_from_post(item['object']['id'])
url = remove_html(url)
if not blocked_posts.get(post_domain):
blocked_posts[post_domain] = [url]
@ -929,9 +931,9 @@ def _get_posts_for_blocked_domains(base_dir: str,
continue
if is_blocked_domain(base_dir, post_domain):
if item['object'].get('url'):
url = item['object']['url']
url = get_url_from_post(item['object']['url'])
else:
url = item['object']['id']
url = get_url_from_post(item['object']['id'])
url = remove_html(url)
if not blocked_posts.get(post_domain):
blocked_posts[post_domain] = [url]
@ -2206,7 +2208,8 @@ def create_blog_post(base_dir: str,
low_bandwidth, content_license_url,
media_license_url, media_creator,
languages_understood, translate, buy_url, chat_url)
obj_url = remove_html(blog_json['object']['url'])
url_str = get_url_from_post(blog_json['object']['url'])
obj_url = remove_html(url_str)
if '/@/' not in obj_url:
blog_json['object']['url'] = obj_url.replace('/@', '/users/')
_append_citations_to_blog_post(base_dir, nickname, domain, blog_json)

View File

@ -56,6 +56,7 @@ from follow import clear_followers
from follow import send_follow_request_via_server
from follow import send_unfollow_request_via_server
from siteactive import site_is_active
from utils import get_url_from_post
from utils import date_from_string_format
from utils import date_utcnow
from utils import is_right_to_left_text
@ -1470,10 +1471,11 @@ def test_post_message_between_servers(base_dir: str) -> None:
assert attached.get('type')
assert attached.get('url')
assert attached['mediaType'] == 'image/png'
if '/system/media_attachments/files/' not in attached['url']:
print(attached['url'])
assert '/system/media_attachments/files/' in attached['url']
assert attached['url'].endswith('.png')
url_str = get_url_from_post(attached['url'])
if '/system/media_attachments/files/' not in url_str:
print(str(attached['url']))
assert '/system/media_attachments/files/' in url_str
assert url_str.endswith('.png')
assert attached.get('width')
assert attached.get('height')
assert attached['width'] > 0
@ -4255,7 +4257,7 @@ def _test_danger_svg(base_dir: str) -> None:
federation_list, debug,
svg_image_filename)
url = post_json_object['object']['attachment'][0]['url']
url = get_url_from_post(post_json_object['object']['attachment'][0]['url'])
assert url == 'https://ratsratsrats.live/media/1234_wibble.svg'
with open(svg_image_filename, 'rb') as fp_svg:
@ -7317,8 +7319,8 @@ def _test_xml_podcast_dict(base_dir: str) -> None:
assert podcast_properties.get('funding')
assert int(podcast_properties['episode']) == 5
assert podcast_properties['funding']['text'] == "Support the show"
assert podcast_properties['funding']['url'] == \
"https://whoframed.rodger/donate"
url_str = get_url_from_post(podcast_properties['funding']['url'])
assert url_str == "https://whoframed.rodger/donate"
assert len(podcast_properties['transcripts']) == 3
assert len(podcast_properties['valueRecipients']) == 2
assert len(podcast_properties['persons']) == 5

View File

@ -110,6 +110,31 @@ def date_epoch():
return date_from_numbers(1970, 1, 1, 0, 0)
def get_url_from_post(url_field) -> str:
"""Returns a url from a post object
"""
if isinstance(url_field, str):
return url_field
if isinstance(url_field, list):
for url_dict in url_field:
if not isinstance(url_dict, dict):
continue
if 'href' not in url_dict:
continue
if 'mediaType' not in url_dict:
continue
if not isinstance(url_dict['href'], str):
continue
if not isinstance(url_dict['mediaType'], str):
continue
if url_dict['mediaType'] != 'text/html':
continue
if '://' not in url_dict['href']:
continue
return url_dict['href']
return ''
def get_attributed_to(field) -> str:
"""Returns the actor
"""
@ -404,7 +429,7 @@ def get_media_descriptions_from_post(post_json_object: {}) -> str:
continue
descriptions += attach['name'] + ' '
if attach.get('url'):
descriptions += attach['url'] + ' '
descriptions += get_url_from_post(attach['url']) + ' '
return descriptions.strip()
@ -2056,7 +2081,7 @@ def _remove_attachment(base_dir: str, http_prefix: str, domain: str,
return
if not post_json['attachment'][0].get('url'):
return
attachment_url = post_json['attachment'][0]['url']
attachment_url = get_url_from_post(post_json['attachment'][0]['url'])
if not attachment_url:
return
attachment_url = remove_html(attachment_url)

View File

@ -7,6 +7,7 @@ __email__ = "bob@libreserver.org"
__status__ = "Production"
__module_group__ = "Timeline"
from utils import get_url_from_post
from utils import remove_html
from utils import get_full_domain
from utils import get_nickname_from_actor
@ -204,9 +205,10 @@ def convert_video_to_note(base_dir: str, nickname: str, domain: str,
continue
if not lang.get('url'):
continue
if not isinstance(lang['url'], str):
url_str = get_url_from_post(lang['url'])
if not url_str:
continue
if not lang['url'].endswith('.vtt'):
if not url_str.endswith('.vtt'):
continue
for understood in languages_understood:
if understood in lang['identifier']:
@ -214,7 +216,7 @@ def convert_video_to_note(base_dir: str, nickname: str, domain: str,
"type": "Document",
"name": understood,
"mediaType": "text/vtt",
"url": lang['url']
"url": url_str
})
break

View File

@ -8,6 +8,7 @@ __status__ = "Production"
__module_group__ = "Moderation"
import os
from utils import get_url_from_post
from utils import remove_html
from utils import is_artist
from utils import is_account_dir
@ -388,7 +389,8 @@ def html_moderation_info(translate: {}, base_dir: str,
ext = ''
if actor_json.get('icon'):
if actor_json['icon'].get('url'):
avatar_url = remove_html(actor_json['icon']['url'])
url_str = get_url_from_post(actor_json['icon']['url'])
avatar_url = remove_html(url_str)
if '.' in avatar_url:
ext = '.' + avatar_url.split('.')[-1]
acct_url = \

View File

@ -12,6 +12,7 @@ import html
import datetime
import urllib.parse
from shutil import copyfile
from utils import get_url_from_post
from utils import get_config_param
from utils import remove_html
from media import path_is_audio
@ -39,7 +40,8 @@ def _html_podcast_chapters(link_url: str,
if not isinstance(podcast_properties[key], dict):
return ''
if podcast_properties[key].get('url'):
chapters_url = remove_html(podcast_properties[key]['url'])
url_str = get_url_from_post(podcast_properties[key]['url'])
chapters_url = remove_html(url_str)
elif podcast_properties[key].get('uri'):
chapters_url = podcast_properties[key]['uri']
else:
@ -80,7 +82,8 @@ def _html_podcast_chapters(link_url: str,
chapter_title = chapter['title']
chapter_url = ''
if chapter.get('url'):
chapter_url = remove_html(chapter['url'])
url_str = get_url_from_post(chapter['url'])
chapter_url = remove_html(url_str)
chapter_title = \
'<a href="' + chapter_url + '">' + \
chapter['title'] + '<\a>'
@ -122,7 +125,8 @@ def _html_podcast_transcripts(podcast_properties: {}, translate: {}) -> str:
for _ in podcast_properties[key]:
transcript_url = None
if podcast_properties[key].get('url'):
transcript_url = remove_html(podcast_properties[key]['url'])
url_str = get_url_from_post(podcast_properties[key]['url'])
transcript_url = remove_html(url_str)
elif podcast_properties[key].get('uri'):
transcript_url = podcast_properties[key]['uri']
if not transcript_url:
@ -155,7 +159,8 @@ def _html_podcast_social_interactions(podcast_properties: {},
if podcast_properties[key].get('uri'):
episode_post_url = podcast_properties[key]['uri']
elif podcast_properties[key].get('url'):
episode_post_url = remove_html(podcast_properties[key]['url'])
url_str = get_url_from_post(podcast_properties[key]['url'])
episode_post_url = remove_html(url_str)
elif podcast_properties[key].get('text'):
episode_post_url = podcast_properties[key]['text']
else:
@ -439,7 +444,8 @@ def html_podcast_episode(translate: {},
# donate button
if podcast_properties.get('funding'):
if podcast_properties['funding'].get('url'):
donate_url = remove_html(podcast_properties['funding']['url'])
url_str = get_url_from_post(podcast_properties['funding']['url'])
donate_url = remove_html(url_str)
podcast_str += \
'<p><span itemprop="funding"><a href="' + donate_url + \
'" rel="donation"><button class="donateButton">' + \

View File

@ -24,6 +24,7 @@ from posts import post_is_muted
from posts import get_person_box
from posts import download_announce
from posts import populate_replies_json
from utils import get_url_from_post
from utils import date_from_string_format
from utils import remove_markup_tag
from utils import ap_proxy_type
@ -155,7 +156,8 @@ def _html_post_metadata_open_graph(domain: str, post_json_object: {},
" <meta content=\"@" + actor_handle + \
"\" property=\"og:title\" />\n"
if obj_json.get('url'):
obj_url = remove_html(obj_json['url'])
url_str = get_url_from_post(obj_json['url'])
obj_url = remove_html(url_str)
metadata += \
" <meta content=\"" + obj_url + \
"\" property=\"og:url\" />\n"
@ -211,7 +213,8 @@ def _html_post_metadata_open_graph(domain: str, post_json_object: {},
metadata += \
" <meta content=\"" + description + \
"\" name=\"og:description\">\n"
attach_url = remove_html(attach_json['url'])
url_str = get_url_from_post(attach_json['url'])
attach_url = remove_html(url_str)
metadata += \
" <meta content=\"" + attach_url + \
"\" property=\"og:image\" />\n"
@ -1197,7 +1200,8 @@ def _get_blog_citations_html(box_name: str,
continue
if not tag_json.get('url'):
continue
citation_url = remove_html(tag_json['url'])
url_str = get_url_from_post(tag_json['url'])
citation_url = remove_html(url_str)
citation_name = remove_html(tag_json['name'])
citations_str += \
'<li><a href="' + citation_url + '" tabindex="10">' + \

View File

@ -10,6 +10,7 @@ __module_group__ = "Web Interface"
import os
from pprint import pprint
from webfinger import webfinger_handle
from utils import get_url_from_post
from utils import get_memorials
from utils import text_in_file
from utils import dangerous_markup
@ -267,7 +268,8 @@ def html_profile_after_search(recent_posts_cache: {}, max_recent_posts: int,
avatar_url = ''
if profile_json.get('icon'):
if profile_json['icon'].get('url'):
avatar_url = remove_html(profile_json['icon']['url'])
url_str = get_url_from_post(profile_json['icon']['url'])
avatar_url = remove_html(url_str)
if not avatar_url:
avatar_url = get_person_avatar_url(base_dir, person_url, person_cache)
display_name = search_nickname
@ -324,8 +326,8 @@ def html_profile_after_search(recent_posts_cache: {}, max_recent_posts: int,
# profileBackgroundImage = ''
# if profile_json.get('image'):
# if profile_json['image'].get('url'):
# profileBackgroundImage = \
# remove_html(profile_json['image']['url'])
# url_str = get_url_from_post(profile_json['image']['url'])
# profileBackgroundImage = remove_html(url_str)
# url to return to
back_url = path
@ -348,7 +350,8 @@ def html_profile_after_search(recent_posts_cache: {}, max_recent_posts: int,
image_url = ''
if profile_json.get('image'):
if profile_json['image'].get('url'):
image_url = remove_html(profile_json['image']['url'])
url_str = get_url_from_post(profile_json['image']['url'])
image_url = remove_html(url_str)
also_known_as = None
if profile_json.get('alsoKnownAs'):
@ -1194,7 +1197,8 @@ def html_profile(signing_priv_key_pem: str,
if profile_json.get('hasOccupation'):
occupation_name = get_occupation_name(profile_json)
avatar_url = remove_html(profile_json['icon']['url'])
url_str = get_url_from_post(profile_json['icon']['url'])
avatar_url = remove_html(url_str)
# use alternate path for local avatars to avoid any caching issues
if '://' + domain_full + '/system/accounts/avatars/' in avatar_url:
avatar_url = \

View File

@ -10,6 +10,7 @@ __module_group__ = "Web Interface"
import os
from shutil import copyfile
import urllib.parse
from utils import get_url_from_post
from utils import date_from_string_format
from utils import get_attributed_to
from utils import get_actor_from_post_id
@ -571,7 +572,8 @@ def html_skills_search(actor: str, translate: {}, base_dir: str,
skill_level_str = '0' + skill_level_str
if skill_level < 10:
skill_level_str = '0' + skill_level_str
icon_url = remove_html(actor_json['icon']['url'])
url_str = get_url_from_post(actor_json['icon']['url'])
icon_url = remove_html(url_str)
index_str = \
skill_level_str + ';' + actor + ';' + \
actor_json['name'] + \
@ -611,7 +613,9 @@ def html_skills_search(actor: str, translate: {}, base_dir: str,
skill_level_str = '0' + skill_level_str
if skill_level < 10:
skill_level_str = '0' + skill_level_str
icon_url = remove_html(actor_json['icon']['url'])
url_str = \
get_url_from_post(actor_json['icon']['url'])
icon_url = remove_html(url_str)
index_str = \
skill_level_str + ';' + actor + ';' + \
actor_json['name'] + \
@ -1375,7 +1379,8 @@ def rss_hashtag_search(nickname: str, domain: str, port: int,
for attach in post_json_object['object']['attachment']:
if not attach.get('url'):
continue
attach_url = remove_html(attach['url'])
url_str = get_url_from_post(attach['url'])
attach_url = remove_html(url_str)
hashtag_feed += \
' <link>' + attach_url + '</link>'
hashtag_feed += ' </item>'

View File

@ -12,6 +12,7 @@ from shutil import copyfile
from collections import OrderedDict
from session import get_json
from session import get_json_valid
from utils import get_url_from_post
from utils import get_media_url_from_video
from utils import get_attributed_to
from utils import local_network_host
@ -857,7 +858,8 @@ def html_header_with_person_markup(css_filename: str, instance_title: str,
domain_full = actor_json['id'].split('://')[1].split('/')[0]
handle = actor_json['preferredUsername'] + '@' + domain_full
icon_url = remove_html(actor_json['icon']['url'])
url_str = get_url_from_post(actor_json['icon']['url'])
icon_url = remove_html(url_str)
person_markup = \
' "about": {\n' + \
' "@type" : "Person",\n' + \
@ -893,7 +895,8 @@ def html_header_with_person_markup(css_filename: str, instance_title: str,
' </script>\n'
description = remove_html(description)
actor2_url = remove_html(actor_json['url'])
url_str = get_url_from_post(actor_json['url'])
actor2_url = remove_html(url_str)
og_metadata = \
" <meta content=\"profile\" property=\"og:type\" />\n" + \
" <meta content=\"" + description + \
@ -1344,7 +1347,7 @@ def get_post_attachments_as_html(base_dir: str,
name = attach['hreflang']
url = None
if attach.get('url'):
url = attach['url']
url = get_url_from_post(attach['url'])
elif attach.get('href'):
url = attach['href']
if name and url:
@ -1397,7 +1400,8 @@ def get_post_attachments_as_html(base_dir: str,
image_description = attach['name'].replace('"', "'")
image_description = remove_html(image_description)
if _is_image_mime_type(media_type):
image_url = remove_html(attach['url'])
url_str = get_url_from_post(attach['url'])
image_url = remove_html(url_str)
if image_url in attached_urls:
continue
attached_urls.append(image_url)
@ -1452,7 +1456,8 @@ def get_post_attachments_as_html(base_dir: str,
' ' + license_str + \
'</figcaption></figure>\n'
if post_json_object['object'].get('url'):
image_post_url = post_json_object['object']['url']
url_str = post_json_object['object']['url']
image_post_url = get_url_from_post(url_str)
else:
image_post_url = post_json_object['object']['id']
image_post_url = remove_html(image_post_url)
@ -1554,7 +1559,8 @@ def get_post_attachments_as_html(base_dir: str,
if box_name == 'tlmedia':
gallery_str += '<div class="gallery">\n'
if post_json_object['object'].get('url'):
video_post_url = post_json_object['object']['url']
url_str = post_json_object['object']['url']
video_post_url = get_url_from_post(url_str)
else:
video_post_url = post_json_object['object']['id']
video_post_url = remove_html(video_post_url)
@ -1629,7 +1635,8 @@ def get_post_attachments_as_html(base_dir: str,
attachment_ctr += 1
elif _is_audio_mime_type(media_type):
extension = '.mp3'
audio_url = remove_html(attach['url'])
url_str = get_url_from_post(attach['url'])
audio_url = remove_html(url_str)
if audio_url in attached_urls:
continue
attached_urls.append(audio_url)
@ -1664,7 +1671,8 @@ def get_post_attachments_as_html(base_dir: str,
gallery_str += ' </audio>\n'
gallery_str += ' </a>\n'
if post_json_object['object'].get('url'):
audio_post_url = post_json_object['object']['url']
url_str = post_json_object['object']['url']
audio_post_url = get_url_from_post(url_str)
else:
audio_post_url = post_json_object['object']['id']
audio_post_url = remove_html(audio_post_url)

View File

@ -13,6 +13,7 @@ from session import get_json
from session import get_json_valid
from cache import store_webfinger_in_cache
from cache import get_webfinger_from_cache
from utils import get_url_from_post
from utils import remove_html
from utils import acct_handle_dir
from utils import get_attachment_property_value
@ -433,7 +434,8 @@ def _webfinger_update_avatar(wf_json: {}, actor_json: {}) -> bool:
"""Updates the avatar image link
"""
found = False
avatar_url = remove_html(actor_json['icon']['url'])
url_str = get_url_from_post(actor_json['icon']['url'])
avatar_url = remove_html(url_str)
media_type = actor_json['icon']['mediaType']
for link in wf_json['links']:
if not link.get('rel'):
@ -463,7 +465,8 @@ def _webfinger_update_vcard(wf_json: {}, actor_json: {}) -> bool:
if link.get('type'):
if link['type'] == 'text/vcard':
return False
actor_url = remove_html(actor_json['url'])
url_str = get_url_from_post(actor_json['url'])
actor_url = remove_html(url_str)
wf_json['links'].append({
"href": actor_url,
"rel": "http://webfinger.net/rel/profile-page",