2020-04-02 09:02:33 +00:00
|
|
|
__filename__ = "cache.py"
|
|
|
|
__author__ = "Bob Mottram"
|
|
|
|
__license__ = "AGPL3+"
|
2024-12-22 23:37:30 +00:00
|
|
|
__version__ = "1.6.0"
|
2020-04-02 09:02:33 +00:00
|
|
|
__maintainer__ = "Bob Mottram"
|
2021-09-10 16:14:50 +00:00
|
|
|
__email__ = "bob@libreserver.org"
|
2020-04-02 09:02:33 +00:00
|
|
|
__status__ = "Production"
|
2021-06-26 11:16:41 +00:00
|
|
|
__module_group__ = "Core"
|
2019-06-30 15:03:26 +00:00
|
|
|
|
2019-08-20 09:16:03 +00:00
|
|
|
import os
|
2024-08-31 09:31:08 +00:00
|
|
|
from session import download_image
|
2021-12-29 21:55:09 +00:00
|
|
|
from session import url_exists
|
|
|
|
from session import get_json
|
2023-08-13 09:58:02 +00:00
|
|
|
from session import get_json_valid
|
2024-09-13 13:58:14 +00:00
|
|
|
from flags import url_permitted
|
2024-08-31 09:31:08 +00:00
|
|
|
from utils import remove_html
|
|
|
|
from utils import get_url_from_post
|
|
|
|
from utils import data_dir
|
|
|
|
from utils import get_attributed_to
|
|
|
|
from utils import remove_id_ending
|
|
|
|
from utils import get_post_attachments
|
|
|
|
from utils import has_object_dict
|
2024-05-26 11:57:06 +00:00
|
|
|
from utils import contains_statuses
|
2021-12-26 15:13:34 +00:00
|
|
|
from utils import load_json
|
2021-12-26 14:47:21 +00:00
|
|
|
from utils import save_json
|
2021-12-29 21:55:09 +00:00
|
|
|
from utils import get_file_case_insensitive
|
2021-12-26 12:24:40 +00:00
|
|
|
from utils import get_user_paths
|
2023-11-20 22:27:58 +00:00
|
|
|
from utils import date_utcnow
|
|
|
|
from utils import date_from_string_format
|
2024-08-31 09:31:08 +00:00
|
|
|
from content import remove_script
|
2020-04-02 09:02:33 +00:00
|
|
|
|
2020-05-04 19:16:11 +00:00
|
|
|
|
2023-08-23 11:17:25 +00:00
|
|
|
def remove_person_from_cache(base_dir: str, person_url: str,
|
|
|
|
person_cache: {}) -> bool:
|
2021-02-14 13:01:52 +00:00
|
|
|
"""Removes an actor from the cache
|
|
|
|
"""
|
2021-12-30 18:38:36 +00:00
|
|
|
cache_filename = base_dir + '/cache/actors/' + \
|
|
|
|
person_url.replace('/', '#') + '.json'
|
|
|
|
if os.path.isfile(cache_filename):
|
2021-02-14 13:01:52 +00:00
|
|
|
try:
|
2021-12-30 18:38:36 +00:00
|
|
|
os.remove(cache_filename)
|
2021-11-25 18:42:38 +00:00
|
|
|
except OSError:
|
2021-12-30 18:38:36 +00:00
|
|
|
print('EX: unable to delete cached actor ' + str(cache_filename))
|
|
|
|
if person_cache.get(person_url):
|
|
|
|
del person_cache[person_url]
|
2021-02-14 13:01:52 +00:00
|
|
|
|
|
|
|
|
2023-07-11 09:25:43 +00:00
|
|
|
def clear_actor_cache(base_dir: str, person_cache: {},
|
|
|
|
clear_domain: str) -> None:
|
|
|
|
"""Clears the actor cache for the given domain
|
|
|
|
This is useful if you know that a given instance has rotated their
|
|
|
|
signing keys after a security incident
|
|
|
|
"""
|
|
|
|
if not clear_domain:
|
|
|
|
return
|
|
|
|
if '.' not in clear_domain:
|
|
|
|
return
|
|
|
|
|
|
|
|
actor_cache_dir = base_dir + '/cache/actors'
|
|
|
|
for subdir, _, files in os.walk(actor_cache_dir):
|
|
|
|
for fname in files:
|
|
|
|
filename = os.path.join(subdir, fname)
|
|
|
|
if not filename.endswith('.json'):
|
|
|
|
continue
|
|
|
|
if clear_domain not in fname:
|
|
|
|
continue
|
|
|
|
person_url = fname.replace('#', '/').replace('.json', '')
|
2023-08-23 11:17:25 +00:00
|
|
|
remove_person_from_cache(base_dir, person_url,
|
|
|
|
person_cache)
|
2023-07-11 09:25:43 +00:00
|
|
|
break
|
|
|
|
|
|
|
|
|
2021-12-29 21:55:09 +00:00
|
|
|
def check_for_changed_actor(session, base_dir: str,
|
|
|
|
http_prefix: str, domain_full: str,
|
2022-03-02 12:01:05 +00:00
|
|
|
person_url: str, avatar_url: str, person_cache: {},
|
2021-12-30 18:38:36 +00:00
|
|
|
timeout_sec: int):
|
2021-02-14 13:43:05 +00:00
|
|
|
"""Checks if the avatar url exists and if not then
|
|
|
|
the actor has probably changed without receiving an actor/Person Update.
|
|
|
|
So clear the actor from the cache and it will be refreshed when the next
|
|
|
|
post from them is sent
|
|
|
|
"""
|
2022-03-02 12:01:05 +00:00
|
|
|
if not session or not avatar_url:
|
2021-02-14 13:43:05 +00:00
|
|
|
return
|
2022-03-02 12:01:05 +00:00
|
|
|
if domain_full in avatar_url:
|
2021-02-14 13:43:05 +00:00
|
|
|
return
|
2022-03-02 12:01:05 +00:00
|
|
|
if url_exists(session, avatar_url, timeout_sec, http_prefix, domain_full):
|
2021-02-14 13:43:05 +00:00
|
|
|
return
|
2023-08-23 11:17:25 +00:00
|
|
|
remove_person_from_cache(base_dir, person_url, person_cache)
|
2021-02-14 13:43:05 +00:00
|
|
|
|
|
|
|
|
2021-12-30 18:38:36 +00:00
|
|
|
def store_person_in_cache(base_dir: str, person_url: str,
|
|
|
|
person_json: {}, person_cache: {},
|
|
|
|
allow_write_to_file: bool) -> None:
|
2019-06-30 15:03:26 +00:00
|
|
|
"""Store an actor in the cache
|
|
|
|
"""
|
2024-05-26 11:57:06 +00:00
|
|
|
if contains_statuses(person_url) or person_url.endswith('/actor'):
|
2020-12-12 09:54:42 +00:00
|
|
|
# This is not an actor or person account
|
|
|
|
return
|
|
|
|
|
2023-11-20 22:27:58 +00:00
|
|
|
curr_time = date_utcnow()
|
2021-12-30 18:38:36 +00:00
|
|
|
person_cache[person_url] = {
|
|
|
|
"actor": person_json,
|
2021-12-26 13:17:46 +00:00
|
|
|
"timestamp": curr_time.strftime("%Y-%m-%dT%H:%M:%SZ")
|
2019-07-06 17:00:22 +00:00
|
|
|
}
|
2021-12-25 16:17:53 +00:00
|
|
|
if not base_dir:
|
2019-08-20 09:16:03 +00:00
|
|
|
return
|
|
|
|
|
|
|
|
# store to file
|
2021-12-30 18:38:36 +00:00
|
|
|
if not allow_write_to_file:
|
2021-06-22 11:25:28 +00:00
|
|
|
return
|
2021-12-25 16:17:53 +00:00
|
|
|
if os.path.isdir(base_dir + '/cache/actors'):
|
2021-12-30 18:38:36 +00:00
|
|
|
cache_filename = base_dir + '/cache/actors/' + \
|
|
|
|
person_url.replace('/', '#') + '.json'
|
|
|
|
if not os.path.isfile(cache_filename):
|
|
|
|
save_json(person_json, cache_filename)
|
2020-04-02 09:02:33 +00:00
|
|
|
|
2019-06-30 15:03:26 +00:00
|
|
|
|
2022-06-09 16:54:44 +00:00
|
|
|
def get_person_from_cache(base_dir: str, person_url: str,
|
|
|
|
person_cache: {}) -> {}:
|
2019-06-30 15:03:26 +00:00
|
|
|
"""Get an actor from the cache
|
|
|
|
"""
|
2019-08-20 09:37:09 +00:00
|
|
|
# if the actor is not in memory then try to load it from file
|
2021-12-30 18:38:36 +00:00
|
|
|
loaded_from_file = False
|
|
|
|
if not person_cache.get(person_url):
|
2020-08-29 10:21:29 +00:00
|
|
|
# does the person exist as a cached file?
|
2021-12-30 18:38:36 +00:00
|
|
|
cache_filename = base_dir + '/cache/actors/' + \
|
|
|
|
person_url.replace('/', '#') + '.json'
|
|
|
|
actor_filename = get_file_case_insensitive(cache_filename)
|
|
|
|
if actor_filename:
|
|
|
|
person_json = load_json(actor_filename)
|
|
|
|
if person_json:
|
|
|
|
store_person_in_cache(base_dir, person_url, person_json,
|
2021-12-29 21:55:09 +00:00
|
|
|
person_cache, False)
|
2021-12-30 18:38:36 +00:00
|
|
|
loaded_from_file = True
|
2020-03-22 21:16:02 +00:00
|
|
|
|
2021-12-30 18:38:36 +00:00
|
|
|
if person_cache.get(person_url):
|
|
|
|
if not loaded_from_file:
|
2019-08-20 09:50:27 +00:00
|
|
|
# update the timestamp for the last time the actor was retrieved
|
2023-11-20 22:27:58 +00:00
|
|
|
curr_time = date_utcnow()
|
2021-12-30 18:38:36 +00:00
|
|
|
curr_time_str = curr_time.strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
|
|
person_cache[person_url]['timestamp'] = curr_time_str
|
|
|
|
return person_cache[person_url]['actor']
|
2019-08-20 09:37:09 +00:00
|
|
|
return None
|
|
|
|
|
2020-04-02 09:02:33 +00:00
|
|
|
|
2021-12-29 21:55:09 +00:00
|
|
|
def expire_person_cache(person_cache: {}):
|
2019-08-20 09:37:09 +00:00
|
|
|
"""Expires old entries from the cache in memory
|
|
|
|
"""
|
2023-11-20 22:27:58 +00:00
|
|
|
curr_time = date_utcnow()
|
2024-12-23 15:39:55 +00:00
|
|
|
removals: list[str] = []
|
2021-12-30 18:38:36 +00:00
|
|
|
for person_url, cache_json in person_cache.items():
|
2023-11-20 22:27:58 +00:00
|
|
|
cache_time = date_from_string_format(cache_json['timestamp'],
|
|
|
|
["%Y-%m-%dT%H:%M:%S%z"])
|
2021-12-30 18:38:36 +00:00
|
|
|
days_since_cached = (curr_time - cache_time).days
|
|
|
|
if days_since_cached > 2:
|
|
|
|
removals.append(person_url)
|
2020-04-02 09:02:33 +00:00
|
|
|
if len(removals) > 0:
|
2021-12-30 18:38:36 +00:00
|
|
|
for person_url in removals:
|
|
|
|
del person_cache[person_url]
|
2020-04-02 09:02:33 +00:00
|
|
|
print(str(len(removals)) + ' actors were expired from the cache')
|
2019-08-20 09:37:09 +00:00
|
|
|
|
2020-04-02 09:02:33 +00:00
|
|
|
|
2022-01-01 15:11:42 +00:00
|
|
|
def store_webfinger_in_cache(handle: str, webfing,
|
|
|
|
cached_webfingers: {}) -> None:
|
2019-08-20 09:37:09 +00:00
|
|
|
"""Store a webfinger endpoint in the cache
|
|
|
|
"""
|
2022-01-01 15:11:42 +00:00
|
|
|
cached_webfingers[handle] = webfing
|
2020-04-02 09:02:33 +00:00
|
|
|
|
2019-06-30 15:03:26 +00:00
|
|
|
|
2021-12-29 21:55:09 +00:00
|
|
|
def get_webfinger_from_cache(handle: str, cached_webfingers: {}) -> {}:
|
2019-06-30 15:03:26 +00:00
|
|
|
"""Get webfinger endpoint from the cache
|
|
|
|
"""
|
2021-12-25 22:28:18 +00:00
|
|
|
if cached_webfingers.get(handle):
|
|
|
|
return cached_webfingers[handle]
|
2019-06-30 15:03:26 +00:00
|
|
|
return None
|
2021-07-31 11:56:28 +00:00
|
|
|
|
|
|
|
|
2023-07-10 22:30:05 +00:00
|
|
|
def get_actor_public_key_from_id(person_json: {}, key_id: str) -> (str, str):
|
|
|
|
"""Returns the public key referenced by the given id
|
|
|
|
https://codeberg.org/fediverse/fep/src/branch/main/fep/521a/fep-521a.md
|
|
|
|
"""
|
|
|
|
pub_key = None
|
|
|
|
pub_key_id = None
|
|
|
|
if person_json.get('publicKey'):
|
|
|
|
if person_json['publicKey'].get('publicKeyPem'):
|
|
|
|
pub_key = person_json['publicKey']['publicKeyPem']
|
|
|
|
if person_json['publicKey'].get('id'):
|
|
|
|
pub_key_id = person_json['publicKey']['id']
|
2023-10-19 13:30:47 +00:00
|
|
|
elif person_json.get('assertionMethod'):
|
|
|
|
if isinstance(person_json['assertionMethod'], list):
|
|
|
|
for key_dict in person_json['assertionMethod']:
|
2023-07-10 22:30:05 +00:00
|
|
|
if not key_dict.get('id') or \
|
|
|
|
not key_dict.get('publicKeyMultibase'):
|
|
|
|
continue
|
|
|
|
if key_id is None or key_dict['id'] == key_id:
|
|
|
|
pub_key = key_dict['publicKeyMultibase']
|
|
|
|
pub_key_id = key_dict['id']
|
|
|
|
break
|
|
|
|
if not pub_key and person_json.get('publicKeyPem'):
|
|
|
|
pub_key = person_json['publicKeyPem']
|
|
|
|
if person_json.get('id'):
|
|
|
|
pub_key_id = person_json['id']
|
|
|
|
return pub_key, pub_key_id
|
|
|
|
|
|
|
|
|
2021-12-30 18:38:36 +00:00
|
|
|
def get_person_pub_key(base_dir: str, session, person_url: str,
|
2021-12-29 21:55:09 +00:00
|
|
|
person_cache: {}, debug: bool,
|
|
|
|
project_version: str, http_prefix: str,
|
|
|
|
domain: str, onion_domain: str,
|
2022-03-11 13:27:54 +00:00
|
|
|
i2p_domain: str,
|
2024-12-17 13:50:48 +00:00
|
|
|
signing_priv_key_pem: str,
|
|
|
|
mitm_servers: []) -> str:
|
2022-05-27 09:20:37 +00:00
|
|
|
"""Get the public key for an actor
|
|
|
|
"""
|
2023-07-10 22:30:05 +00:00
|
|
|
original_person_url = person_url
|
2021-12-30 18:38:36 +00:00
|
|
|
if not person_url:
|
2021-07-31 11:56:28 +00:00
|
|
|
return None
|
2022-08-27 19:10:38 +00:00
|
|
|
if '#/publicKey' in person_url:
|
|
|
|
person_url = person_url.replace('#/publicKey', '')
|
2022-08-27 19:15:58 +00:00
|
|
|
elif '/main-key' in person_url:
|
|
|
|
person_url = person_url.replace('/main-key', '')
|
2022-08-27 19:10:38 +00:00
|
|
|
else:
|
|
|
|
person_url = person_url.replace('#main-key', '')
|
2021-12-30 18:38:36 +00:00
|
|
|
users_paths = get_user_paths()
|
|
|
|
for possible_users_path in users_paths:
|
|
|
|
if person_url.endswith(possible_users_path + 'inbox'):
|
2021-07-31 11:56:28 +00:00
|
|
|
if debug:
|
|
|
|
print('DEBUG: Obtaining public key for shared inbox')
|
2021-12-30 18:38:36 +00:00
|
|
|
person_url = \
|
|
|
|
person_url.replace(possible_users_path + 'inbox', '/inbox')
|
2021-07-31 11:56:28 +00:00
|
|
|
break
|
2021-12-30 18:38:36 +00:00
|
|
|
person_json = \
|
2022-06-09 16:54:44 +00:00
|
|
|
get_person_from_cache(base_dir, person_url, person_cache)
|
2021-12-30 18:38:36 +00:00
|
|
|
if not person_json:
|
2021-07-31 11:56:28 +00:00
|
|
|
if debug:
|
2021-12-30 18:38:36 +00:00
|
|
|
print('DEBUG: Obtaining public key for ' + person_url)
|
|
|
|
person_domain = domain
|
2021-12-25 20:43:43 +00:00
|
|
|
if onion_domain:
|
2021-12-30 18:38:36 +00:00
|
|
|
if '.onion/' in person_url:
|
|
|
|
person_domain = onion_domain
|
2022-03-11 13:27:54 +00:00
|
|
|
elif i2p_domain:
|
|
|
|
if '.i2p/' in person_url:
|
|
|
|
person_domain = i2p_domain
|
2021-12-30 18:38:36 +00:00
|
|
|
profile_str = 'https://www.w3.org/ns/activitystreams'
|
|
|
|
accept_str = \
|
|
|
|
'application/activity+json; profile="' + profile_str + '"'
|
|
|
|
as_header = {
|
|
|
|
'Accept': accept_str
|
2021-07-31 11:56:28 +00:00
|
|
|
}
|
2021-12-30 18:38:36 +00:00
|
|
|
person_json = \
|
2021-12-29 21:55:09 +00:00
|
|
|
get_json(signing_priv_key_pem,
|
2021-12-30 18:38:36 +00:00
|
|
|
session, person_url, as_header, None, debug,
|
2024-12-17 13:50:48 +00:00
|
|
|
mitm_servers, project_version, http_prefix, person_domain)
|
2023-08-13 09:58:02 +00:00
|
|
|
if not get_json_valid(person_json):
|
2023-08-13 10:46:38 +00:00
|
|
|
if person_json is not None:
|
|
|
|
if isinstance(person_json, dict):
|
|
|
|
# return the error code
|
|
|
|
return person_json
|
2021-07-31 11:56:28 +00:00
|
|
|
return None
|
2023-07-10 22:30:05 +00:00
|
|
|
pub_key, _ = get_actor_public_key_from_id(person_json, original_person_url)
|
2021-12-30 18:38:36 +00:00
|
|
|
if not pub_key:
|
2021-07-31 11:56:28 +00:00
|
|
|
if debug:
|
2021-12-30 18:38:36 +00:00
|
|
|
print('DEBUG: Public key not found for ' + person_url)
|
2021-07-31 11:56:28 +00:00
|
|
|
|
2021-12-30 18:38:36 +00:00
|
|
|
store_person_in_cache(base_dir, person_url, person_json,
|
|
|
|
person_cache, True)
|
|
|
|
return pub_key
|
2024-08-31 09:31:08 +00:00
|
|
|
|
|
|
|
|
|
|
|
def cache_svg_images(session, base_dir: str, http_prefix: str,
|
|
|
|
domain: str, domain_full: str,
|
|
|
|
onion_domain: str, i2p_domain: str,
|
|
|
|
post_json_object: {},
|
|
|
|
federation_list: [], debug: bool,
|
|
|
|
test_image_filename: str) -> bool:
|
|
|
|
"""Creates a local copy of a remote svg file
|
|
|
|
"""
|
|
|
|
if has_object_dict(post_json_object):
|
|
|
|
obj = post_json_object['object']
|
|
|
|
else:
|
|
|
|
obj = post_json_object
|
|
|
|
if not obj.get('id'):
|
|
|
|
return False
|
|
|
|
post_attachments = get_post_attachments(obj)
|
|
|
|
if not post_attachments:
|
|
|
|
return False
|
|
|
|
cached = False
|
|
|
|
post_id = remove_id_ending(obj['id']).replace('/', '--')
|
|
|
|
actor = 'unknown'
|
|
|
|
if post_attachments and obj.get('attributedTo'):
|
|
|
|
actor = get_attributed_to(obj['attributedTo'])
|
|
|
|
log_filename = data_dir(base_dir) + '/svg_scripts_log.txt'
|
|
|
|
for index in range(len(post_attachments)):
|
|
|
|
attach = post_attachments[index]
|
|
|
|
if not attach.get('mediaType'):
|
|
|
|
continue
|
|
|
|
if not attach.get('url'):
|
|
|
|
continue
|
|
|
|
url_str = get_url_from_post(attach['url'])
|
|
|
|
if url_str.endswith('.svg') or \
|
|
|
|
'svg' in attach['mediaType']:
|
|
|
|
url = remove_html(url_str)
|
|
|
|
if not url_permitted(url, federation_list):
|
|
|
|
continue
|
|
|
|
# if this is a local image then it has already been
|
|
|
|
# validated on upload
|
|
|
|
if '://' + domain in url:
|
|
|
|
continue
|
|
|
|
if onion_domain:
|
|
|
|
if '://' + onion_domain in url:
|
|
|
|
continue
|
|
|
|
if i2p_domain:
|
|
|
|
if '://' + i2p_domain in url:
|
|
|
|
continue
|
|
|
|
if '/' in url:
|
|
|
|
filename = url.split('/')[-1]
|
|
|
|
else:
|
|
|
|
filename = url
|
|
|
|
if not test_image_filename:
|
|
|
|
image_filename = \
|
|
|
|
base_dir + '/media/' + post_id + '_' + filename
|
|
|
|
if not download_image(session, url,
|
|
|
|
image_filename, debug):
|
|
|
|
continue
|
|
|
|
else:
|
|
|
|
image_filename = test_image_filename
|
|
|
|
image_data = None
|
|
|
|
try:
|
|
|
|
with open(image_filename, 'rb') as fp_svg:
|
|
|
|
image_data = fp_svg.read()
|
|
|
|
except OSError:
|
|
|
|
print('EX: unable to read svg file data')
|
|
|
|
if not image_data:
|
|
|
|
continue
|
|
|
|
image_data = image_data.decode()
|
|
|
|
cleaned_up = \
|
|
|
|
remove_script(image_data, log_filename, actor, url)
|
|
|
|
if cleaned_up != image_data:
|
|
|
|
# write the cleaned up svg image
|
|
|
|
svg_written = False
|
|
|
|
cleaned_up = cleaned_up.encode('utf-8')
|
|
|
|
try:
|
|
|
|
with open(image_filename, 'wb') as fp_im:
|
|
|
|
fp_im.write(cleaned_up)
|
|
|
|
svg_written = True
|
|
|
|
except OSError:
|
|
|
|
print('EX: unable to write cleaned up svg ' + url)
|
|
|
|
if svg_written:
|
|
|
|
# convert to list if needed
|
|
|
|
if isinstance(obj['attachment'], dict):
|
|
|
|
obj['attachment'] = [obj['attachment']]
|
|
|
|
# change the url to be the local version
|
|
|
|
obj['attachment'][index]['url'] = \
|
|
|
|
http_prefix + '://' + domain_full + '/media/' + \
|
|
|
|
post_id + '_' + filename
|
|
|
|
cached = True
|
|
|
|
else:
|
|
|
|
cached = True
|
|
|
|
return cached
|