From eca0fa17366caaf1471a61872ba0d43d5123b500 Mon Sep 17 00:00:00 2001
From: Bob Mottram ' + translate['Citations'] + \
':"
content = content.replace(tag_item['name'], emoji_html)
diff --git a/daemon.py b/daemon.py
index d966c1b3a..9ea3248e1 100644
--- a/daemon.py
+++ b/daemon.py
@@ -6419,7 +6419,7 @@ class PubServer(BaseHTTPRequestHandler):
for m_type, last_part in uploads:
rep_str = '/' + last_part
if m_type == 'avatar':
- actor_url = actor_json['icon']['url']
+ actor_url = remove_html(actor_json['icon']['url'])
last_part_of_url = actor_url.split('/')[-1]
srch_str = '/' + last_part_of_url
actor_url = actor_url.replace(srch_str, rep_str)
@@ -6432,15 +6432,14 @@ class PubServer(BaseHTTPRequestHandler):
actor_json['icon']['mediaType'] = \
'image/' + img_ext
elif m_type == 'image':
- last_part_of_url = \
- actor_json['image']['url'].split('/')[-1]
+ im_url = \
+ remove_html(actor_json['image']['url'])
+ last_part_of_url = im_url.split('/')[-1]
srch_str = '/' + last_part_of_url
actor_json['image']['url'] = \
- actor_json['image']['url'].replace(srch_str,
- rep_str)
- if '.' in actor_json['image']['url']:
- img_ext = \
- actor_json['image']['url'].split('.')[-1]
+ im_url.replace(srch_str, rep_str)
+ if '.' in im_url:
+ img_ext = im_url.split('.')[-1]
if img_ext == 'jpg':
img_ext = 'jpeg'
actor_json['image']['mediaType'] = \
diff --git a/desktop_client.py b/desktop_client.py
index 67175b4bb..79f27224e 100644
--- a/desktop_client.py
+++ b/desktop_client.py
@@ -742,7 +742,8 @@ def _show_replies_on_post(post_json_object: {}, max_replies: int) -> None:
print('')
ctr = 0
for item in object_replies['items']:
- print(' ↰ ' + str(item['url']))
+ item_url = remove_html(item['url'])
+ print(' ↰ ' + str(item_url))
ctr += 1
if ctr >= max_replies:
break
diff --git a/inbox.py b/inbox.py
index 7957a1ad0..f52ef1d66 100644
--- a/inbox.py
+++ b/inbox.py
@@ -187,7 +187,7 @@ def cache_svg_images(session, base_dir: str, http_prefix: str,
continue
if attach['url'].endswith('.svg') or \
'svg' in attach['mediaType']:
- url = attach['url']
+ url = remove_html(attach['url'])
if not url_permitted(url, federation_list):
continue
# if this is a local image then it has already been
@@ -1175,8 +1175,9 @@ def _person_receive_update(base_dir: str,
debug: bool, http_prefix: str) -> bool:
"""Changes an actor. eg: avatar or display name change
"""
+ person_url = remove_html(person_json['url'])
if debug:
- print('Receiving actor update for ' + person_json['url'] +
+ print('Receiving actor update for ' + person_url +
' ' + str(person_json))
domain_full = get_full_domain(domain, port)
update_domain_full = get_full_domain(update_domain, update_port)
@@ -2581,7 +2582,8 @@ def _receive_bookmark(recent_posts_cache: {},
if debug:
print('DEBUG: c2s inbox bookmark Add request arrived in outbox')
- message_url = remove_id_ending(message_json['object']['url'])
+ message_url2 = remove_html(message_json['object']['url'])
+ message_url = remove_id_ending(message_url2)
domain = remove_domain_port(domain)
post_filename = locate_post(base_dir, nickname, domain, message_url)
if not post_filename:
@@ -2591,7 +2593,7 @@ def _receive_bookmark(recent_posts_cache: {},
return True
update_bookmarks_collection(recent_posts_cache, base_dir, post_filename,
- message_json['object']['url'],
+ message_url2,
message_json['actor'], domain, debug)
# regenerate the html
bookmarked_post_json = load_json(post_filename, 0, 1)
@@ -2707,7 +2709,8 @@ def _receive_undo_bookmark(recent_posts_cache: {},
print('DEBUG: c2s inbox Remove bookmark ' +
'request arrived in outbox')
- message_url = remove_id_ending(message_json['object']['url'])
+ message_url2 = remove_html(message_json['object']['url'])
+ message_url = remove_id_ending(message_url2)
domain = remove_domain_port(domain)
post_filename = locate_post(base_dir, nickname, domain, message_url)
if not post_filename:
diff --git a/maps.py b/maps.py
index c9fe0a20b..48489db6f 100644
--- a/maps.py
+++ b/maps.py
@@ -15,6 +15,7 @@ from utils import acct_dir
from utils import load_json
from utils import save_json
from utils import locate_post
+from utils import remove_html
def get_location_from_tags(tags: []) -> str:
@@ -340,7 +341,7 @@ def get_map_preferences_url(base_dir: str, nickname: str, domain: str) -> str:
if os.path.isfile(maps_filename):
maps_json = load_json(maps_filename)
if maps_json.get('url'):
- return maps_json['url']
+ return remove_html(maps_json['url'])
return None
diff --git a/mastoapiv1.py b/mastoapiv1.py
index 3b6c42129..d165fe87a 100644
--- a/mastoapiv1.py
+++ b/mastoapiv1.py
@@ -11,6 +11,7 @@ import os
from utils import load_json
from utils import get_config_param
from utils import acct_dir
+from utils import remove_html
from metadata import meta_data_instance
@@ -62,6 +63,8 @@ def _get_masto_api_v1account(base_dir: str, nickname: str, domain: str) -> {}:
account_json = load_json(account_filename)
if not account_json:
return {}
+ avatar_url = remove_html(account_json['icon']['url'])
+ image_url = remove_html(account_json['image']['url'])
masto_account_json = {
"id": get_masto_api_v1id_from_nickname(nickname),
"username": nickname,
@@ -74,10 +77,10 @@ def _get_masto_api_v1account(base_dir: str, nickname: str, domain: str) -> {}:
"statuses_count": 0,
"note": account_json['summary'],
"url": account_json['id'],
- "avatar": account_json['icon']['url'],
- "avatar_static": account_json['icon']['url'],
- "header": account_json['image']['url'],
- "header_static": account_json['image']['url']
+ "avatar": avatar_url,
+ "avatar_static": avatar_url,
+ "header": image_url,
+ "header_static": image_url
}
return masto_account_json
diff --git a/metadata.py b/metadata.py
index f42a00806..2c2b5f401 100644
--- a/metadata.py
+++ b/metadata.py
@@ -12,6 +12,7 @@ from utils import is_account_dir
from utils import load_json
from utils import no_of_accounts
from utils import no_of_active_accounts_monthly
+from utils import remove_html
def _get_status_count(base_dir: str) -> int:
@@ -152,6 +153,8 @@ def meta_data_instance(show_accounts: bool,
if admin_actor.get('published'):
created_at = admin_actor['published']
+ icon_url = remove_html(admin_actor['icon']['url'])
+ image_url = remove_html(admin_actor['image']['url'])
instance = {
'approval_required': False,
'invites_enabled': False,
@@ -159,10 +162,10 @@ def meta_data_instance(show_accounts: bool,
'contact_account': {
'acct': admin_actor['preferredUsername'],
'created_at': created_at,
- 'avatar': admin_actor['icon']['url'],
- 'avatar_static': admin_actor['icon']['url'],
- 'header': admin_actor['image']['url'],
- 'header_static': admin_actor['image']['url'],
+ 'avatar': icon_url,
+ 'avatar_static': icon_url,
+ 'header': image_url,
+ 'header_static': image_url,
'bot': is_bot,
'discoverable': True,
'group': is_group,
diff --git a/newswire.py b/newswire.py
index d416e6237..8026c3fcb 100644
--- a/newswire.py
+++ b/newswire.py
@@ -492,9 +492,9 @@ def _valid_podcast_entry(base_dir: str, key: str, entry: {}) -> bool:
if entry['protocol'].tolower() != 'activitypub':
return False
if entry.get('uri'):
- post_url = entry['uri']
+ post_url = remove_html(entry['uri'])
elif entry.get('url'):
- post_url = entry['uri']
+ post_url = remove_html(entry['uri'])
else:
post_url = entry['text']
if '://' not in post_url:
@@ -1133,7 +1133,7 @@ def _json_feed_v1to_dict(base_dir: str, domain: str, xml_str: str,
if tag_name not in description:
description += ' ' + tag_name
- link = json_feed_item['url']
+ link = remove_html(json_feed_item['url'])
if '://' not in link:
continue
if len(link) > max_bytes:
@@ -1551,10 +1551,10 @@ def _add_account_blogs_to_newswire(base_dir: str, nickname: str, domain: str,
description = remove_html(description)
tags_from_post = _get_hashtags_from_post(post_json_object)
summary = post_json_object['object']['summary']
+ url2 = remove_html(post_json_object['object']['url'])
_add_newswire_dict_entry(base_dir, domain,
newswire, published,
- summary,
- post_json_object['object']['url'],
+ summary, url2,
votes, full_post_filename,
description, moderated, False,
tags_from_post,
diff --git a/person.py b/person.py
index 3cba8d394..7d816b2c2 100644
--- a/person.py
+++ b/person.py
@@ -1789,7 +1789,7 @@ def get_person_avatar_url(base_dir: str, person_url: str,
if person_json.get('icon'):
if person_json['icon'].get('url'):
if '.svg' not in person_json['icon']['url'].lower():
- return person_json['icon']['url']
+ return remove_html(person_json['icon']['url'])
return None
@@ -1971,7 +1971,7 @@ def get_featured_hashtags(actor_json: {}) -> str:
tag_name = tag_name[1:]
if not tag_name:
continue
- tag_url = tag_dict['href']
+ tag_url = remove_html(tag_dict['href'])
if '://' not in tag_url:
continue
if not valid_hash_tag(tag_name):
@@ -2019,13 +2019,13 @@ def get_featured_hashtags_as_html(actor_json: {},
continue
if ' #' + tag_name in profile_description:
continue
- tag_url = tag_dict['href']
+ tag_url = remove_html(tag_dict['href'])
if '://' not in tag_url:
continue
if not valid_hash_tag(tag_name):
continue
result += \
- '#' + tag_name + ' '
ctr += 1
diff --git a/posts.py b/posts.py
index fd8553d54..01e960b1b 100644
--- a/posts.py
+++ b/posts.py
@@ -216,10 +216,10 @@ def get_user_url(wf_request: {}, source_id: int, debug: bool) -> str:
else:
url = link['href']
if not contains_invalid_actor_url_chars(url):
- return url
+ return remove_html(url)
url = link['href']
if not contains_invalid_actor_url_chars(url):
- return url
+ return remove_html(url)
return None
@@ -404,7 +404,7 @@ def get_person_box(signing_priv_key_pem: str, origin_domain: str,
avatar_url = None
if person_json.get('icon'):
if person_json['icon'].get('url'):
- avatar_url = person_json['icon']['url']
+ avatar_url = remove_html(person_json['icon']['url'])
display_name = None
if person_json.get('name'):
display_name = person_json['name']
@@ -628,7 +628,8 @@ def _get_posts(session, outbox_url: str, max_posts: int,
if url_permitted(tag_item['icon']['url'],
federation_list):
emoji_name = tag_item['name']
- emoji_icon = tag_item['icon']['url']
+ emoji_icon = \
+ remove_html(tag_item['icon']['url'])
emoji[emoji_name] = emoji_icon
else:
if debug:
@@ -675,10 +676,11 @@ def _get_posts(session, outbox_url: str, max_posts: int,
for attach in this_item['attachment']:
if attach.get('name') and attach.get('url'):
# no attachments from non-permitted domains
- if url_permitted(attach['url'],
+ attach_url = remove_html(attach['url'])
+ if url_permitted(attach_url,
federation_list):
attachment.append([attach['name'],
- attach['url']])
+ attach_url])
else:
if debug:
print('url not permitted ' +
@@ -820,8 +822,9 @@ def get_post_domains(session, outbox_url: str, max_posts: int, debug: bool,
tag_type = tag_item['type'].lower()
if tag_type == 'mention':
if tag_item.get('href'):
+ tag_url = remove_html(tag_item['href'])
post_domain, _ = \
- get_domain_from_actor(tag_item['href'])
+ get_domain_from_actor(tag_url)
if post_domain:
if post_domain not in post_domains:
post_domains.append(post_domain)
@@ -879,6 +882,7 @@ def _get_posts_for_blocked_domains(base_dir: str,
url = item['object']['url']
else:
url = item['object']['id']
+ url = remove_html(url)
if not blocked_posts.get(post_domain):
blocked_posts[post_domain] = [url]
else:
@@ -891,8 +895,9 @@ def _get_posts_for_blocked_domains(base_dir: str,
continue
tag_type = tag_item['type'].lower()
if tag_type == 'mention' and tag_item.get('href'):
+ tag_url = remove_html(tag_item['href'])
post_domain, _ = \
- get_domain_from_actor(tag_item['href'])
+ get_domain_from_actor(tag_url)
if not post_domain:
continue
if is_blocked_domain(base_dir, post_domain):
@@ -900,6 +905,7 @@ def _get_posts_for_blocked_domains(base_dir: str,
url = item['object']['url']
else:
url = item['object']['id']
+ url = remove_html(url)
if not blocked_posts.get(post_domain):
blocked_posts[post_domain] = [url]
else:
@@ -1496,7 +1502,8 @@ def _create_post_mentions(cc_url: str, new_post: {},
if tag['type'] != 'Mention':
continue
if tag['href'] not in to_cc:
- new_post['object']['cc'].append(tag['href'])
+ tag_url = remove_html(tag['href'])
+ new_post['object']['cc'].append(tag_url)
_consolidate_actors_list(new_post['object']['cc'])
new_post['cc'] = new_post['object']['cc']
@@ -2099,9 +2106,9 @@ def create_blog_post(base_dir: str,
low_bandwidth, content_license_url,
media_license_url, media_creator,
languages_understood, translate, buy_url, chat_url)
- if '/@/' not in blog_json['object']['url']:
- blog_json['object']['url'] = \
- blog_json['object']['url'].replace('/@', '/users/')
+ obj_url = remove_html(blog_json['object']['url'])
+ if '/@/' not in obj_url:
+ blog_json['object']['url'] = obj_url.replace('/@', '/users/')
_append_citations_to_blog_post(base_dir, nickname, domain, blog_json)
return blog_json
diff --git a/utils.py b/utils.py
index 9a4ab922d..ad8e1f1bf 100644
--- a/utils.py
+++ b/utils.py
@@ -1862,6 +1862,7 @@ def _remove_attachment(base_dir: str, http_prefix: str, domain: str,
attachment_url = post_json['attachment'][0]['url']
if not attachment_url:
return
+ attachment_url = remove_html(attachment_url)
media_filename = base_dir + '/' + \
attachment_url.replace(http_prefix + '://' + domain + '/', '')
if os.path.isfile(media_filename):
diff --git a/video.py b/video.py
index 77974f482..9e06ddb68 100644
--- a/video.py
+++ b/video.py
@@ -7,6 +7,7 @@ __email__ = "bob@libreserver.org"
__status__ = "Production"
__module_group__ = "Timeline"
+from utils import remove_html
from utils import get_full_domain
from utils import get_nickname_from_actor
from utils import get_domain_from_actor
@@ -110,15 +111,15 @@ def convert_video_to_note(base_dir: str, nickname: str, domain: str,
if not media_link.get('href'):
continue
if media_link['mediaType'] == 'application/x-bittorrent':
- media_torrent = media_link['href']
+ media_torrent = remove_html(media_link['href'])
if media_link['href'].startswith('magnet:'):
- media_magnet = media_link['href']
+ media_magnet = remove_html(media_link['href'])
if media_link['mediaType'] != 'video/mp4' and \
media_link['mediaType'] != 'video/ogv':
continue
if not media_url:
media_type = media_link['mediaType']
- media_url = media_link['href']
+ media_url = remove_html(media_link['href'])
if not media_url:
return None
@@ -138,7 +139,8 @@ def convert_video_to_note(base_dir: str, nickname: str, domain: str,
content += '🧲'
content += '