epicyon/daemon_utils.py

884 lines
36 KiB
Python
Raw Normal View History

2024-03-01 17:10:04 +00:00
__filename__ = "daemon_utils.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
2024-12-22 23:37:30 +00:00
__version__ = "1.6.0"
2024-03-01 17:10:04 +00:00
__maintainer__ = "Bob Mottram"
__email__ = "bob@libreserver.org"
__status__ = "Production"
__module_group__ = "Core"
import os
2024-03-01 17:10:04 +00:00
import time
from auth import authorize
from threads import thread_with_trace
from threads import begin_thread
from outbox import post_message_to_outbox
from city import get_spoofed_city
from httpcodes import http_404
from httpcodes import http_503
from httpcodes import http_400
from httpcodes import write2
from context import has_valid_context
from inbox import save_post_to_inbox_queue
from inbox import clear_queue_items
from blocking import update_blocked_cache
from blocking import is_blocked_nickname
from blocking import is_blocked_domain
from content import valid_url_lengths
from posts import add_to_field
2024-12-17 13:50:48 +00:00
from utils import detect_mitm
2024-11-16 17:13:56 +00:00
from utils import data_dir
from utils import load_json
from utils import save_json
2024-03-01 17:10:04 +00:00
from utils import get_instance_url
from utils import remove_html
from utils import get_locked_account
from utils import post_summary_contains_links
from utils import local_only_is_local
from utils import get_local_network_addresses
from utils import has_object_dict
from utils import get_nickname_from_actor
from utils import get_domain_from_actor
from utils import get_actor_from_post
from utils import has_actor
from utils import resembles_url
from flags import is_system_account
2024-03-01 17:10:04 +00:00
from cache import check_for_changed_actor
from cache import get_person_from_cache
from website import get_website
from website import get_gemini_link
2024-03-01 17:10:04 +00:00
from donate import get_donation_url
from pronouns import get_pronouns
from discord import get_discord
2024-08-15 19:58:26 +00:00
from art import get_art_site_url
from music import get_music_site_url
from youtube import get_youtube
from pixelfed import get_pixelfed
from peertube import get_peertube
2024-03-01 17:10:04 +00:00
from xmpp import get_xmpp_address
from matrix import get_matrix_address
from ssb import get_ssb_address
from blog import get_blog_address
from tox import get_tox_address
from briar import get_briar_address
from cwtch import get_cwtch_address
from pgp import get_pgp_fingerprint
from pgp import get_email_address
from pgp import get_pgp_pub_key
from enigma import get_enigma_pub_key
from git import get_repo_url
from webapp_person_options import html_person_options
from httpheaders import redirect_headers
from httpheaders import set_headers
from fitnessFunctions import fitness_performance
def post_to_outbox(self, message_json: {}, version: str,
post_to_nickname: str,
curr_session, proxy_type: str) -> bool:
"""post is received by the outbox
Client to server message post
https://www.w3.org/TR/activitypub/#client-to-server-outbox-delivery
"""
if not curr_session:
return False
city = self.server.city
if post_to_nickname:
print('Posting to nickname ' + post_to_nickname)
self.post_to_nickname = post_to_nickname
city = get_spoofed_city(self.server.city,
self.server.base_dir,
post_to_nickname, self.server.domain)
shared_items_federated_domains = \
self.server.shared_items_federated_domains
shared_item_federation_tokens = \
self.server.shared_item_federation_tokens
return post_message_to_outbox(curr_session,
self.server.translate,
2024-04-15 14:05:38 +00:00
message_json,
self.post_to_nickname,
self.server,
self.server.base_dir,
2024-03-01 17:10:04 +00:00
self.server.http_prefix,
self.server.domain,
self.server.domain_full,
self.server.onion_domain,
self.server.i2p_domain,
self.server.port,
self.server.recent_posts_cache,
self.server.followers_threads,
self.server.federation_list,
self.server.send_threads,
2024-12-23 15:43:26 +00:00
self.server.post_log,
2024-03-01 17:10:04 +00:00
self.server.cached_webfingers,
self.server.person_cache,
self.server.allow_deletion,
proxy_type, version,
self.server.debug,
self.server.yt_replace_domain,
self.server.twitter_replacement_domain,
self.server.show_published_date_only,
self.server.allow_local_network_access,
city, self.server.system_language,
shared_items_federated_domains,
shared_item_federation_tokens,
self.server.low_bandwidth,
self.server.signing_priv_key_pem,
self.server.peertube_instances,
self.server.theme_name,
self.server.max_like_count,
self.server.max_recent_posts,
self.server.cw_lists,
self.server.lists_enabled,
self.server.content_license_url,
self.server.dogwhistles,
self.server.min_images_for_accounts,
self.server.buy_sites,
self.server.sites_unavailable,
self.server.max_recent_books,
self.server.books_cache,
self.server.max_cached_readers,
self.server.auto_cw_cache,
2024-12-17 13:50:48 +00:00
self.server.block_federated,
self.server.mitm_servers)
2024-03-01 17:10:04 +00:00
def _get_outbox_thread_index(self, nickname: str,
max_outbox_threads_per_account: int) -> int:
"""Returns the outbox thread index for the given account
This is a ring buffer used to store the thread objects which
are sending out posts
"""
account_outbox_thread_name = nickname
if not account_outbox_thread_name:
account_outbox_thread_name = '*'
# create the buffer for the given account
if not self.server.outboxThread.get(account_outbox_thread_name):
self.server.outboxThread[account_outbox_thread_name] = \
[None] * max_outbox_threads_per_account
self.server.outbox_thread_index[account_outbox_thread_name] = 0
return 0
# increment the ring buffer index
index = self.server.outbox_thread_index[account_outbox_thread_name] + 1
if index >= max_outbox_threads_per_account:
index = 0
self.server.outbox_thread_index[account_outbox_thread_name] = index
# remove any existing thread from the current index in the buffer
acct = account_outbox_thread_name
if self.server.outboxThread.get(acct):
if len(self.server.outboxThread[acct]) > index:
try:
if self.server.outboxThread[acct][index].is_alive():
self.server.outboxThread[acct][index].kill()
except BaseException:
pass
return index
def post_to_outbox_thread(self, message_json: {},
curr_session, proxy_type: str) -> bool:
"""Creates a thread to send a post
"""
account_outbox_thread_name = self.post_to_nickname
if not account_outbox_thread_name:
account_outbox_thread_name = '*'
index = _get_outbox_thread_index(self, account_outbox_thread_name, 8)
print('Creating outbox thread ' +
account_outbox_thread_name + '/' +
str(self.server.outbox_thread_index[account_outbox_thread_name]))
2024-03-05 13:20:46 +00:00
print('THREAD: post_to_outbox')
2024-03-01 17:10:04 +00:00
self.server.outboxThread[account_outbox_thread_name][index] = \
thread_with_trace(target=post_to_outbox,
2024-03-05 13:20:46 +00:00
args=(self, message_json.copy(),
2024-03-01 17:10:04 +00:00
self.server.project_version, None,
curr_session, proxy_type),
daemon=True)
print('Starting outbox thread')
outbox_thread = \
self.server.outboxThread[account_outbox_thread_name][index]
begin_thread(outbox_thread, '_post_to_outbox_thread')
return True
def update_inbox_queue(self, nickname: str, message_json: {},
message_bytes: str, debug: bool) -> int:
"""Update the inbox queue
"""
if debug:
print('INBOX: checking inbox queue restart')
if self.server.restart_inbox_queue_in_progress:
http_503(self)
print('INBOX: ' +
'message arrived but currently restarting inbox queue')
self.server.postreq_busy = False
return 2
# check that the incoming message has a fully recognized
# linked data context
if debug:
print('INBOX: checking valid context')
if not has_valid_context(message_json):
print('INBOX: ' +
'message arriving at inbox queue has no valid context ' +
str(message_json))
http_400(self)
self.server.postreq_busy = False
return 3
# check for blocked domains so that they can be rejected early
if debug:
print('INBOX: checking for actor')
message_domain = None
2024-04-08 14:00:47 +00:00
if not has_actor(message_json, debug):
2024-03-01 17:10:04 +00:00
print('INBOX: message arriving at inbox queue has no actor')
http_400(self)
self.server.postreq_busy = False
return 3
# actor should be a string
if debug:
print('INBOX: checking that actor is string')
actor_url = get_actor_from_post(message_json)
if not isinstance(actor_url, str):
print('INBOX: ' +
'actor should be a string ' + str(actor_url))
http_400(self)
self.server.postreq_busy = False
return 3
# check that some additional fields are strings
if debug:
print('INBOX: checking fields 1')
string_fields = ('id', 'type', 'published')
for check_field in string_fields:
if not message_json.get(check_field):
continue
if not isinstance(message_json[check_field], str):
print('INBOX: ' +
'id, type and published fields should be strings ' +
check_field + ' ' + str(message_json[check_field]))
http_400(self)
self.server.postreq_busy = False
return 3
# check that to/cc fields are lists
if debug:
print('INBOX: checking to and cc fields')
list_fields = ('to', 'cc')
for check_field in list_fields:
if not message_json.get(check_field):
continue
if not isinstance(message_json[check_field], list):
print('INBOX: WARN: To and Cc fields should be lists, ' +
check_field + '=' + str(message_json[check_field]))
# NOTE: this does not prevent further processing
if has_object_dict(message_json):
if debug:
print('INBOX: checking object fields')
# check that some fields are a string or list
string_or_list_fields = ('url', 'attributedTo')
for check_field in string_or_list_fields:
if not message_json['object'].get(check_field):
continue
field_value = message_json['object'][check_field]
if not isinstance(field_value, str) and \
not isinstance(field_value, list):
print('INBOX: ' +
check_field + ' should be a string or list ' +
str(message_json['object'][check_field]))
http_400(self)
self.server.postreq_busy = False
return 3
# check that some fields are strings
string_fields = (
'id', 'actor', 'type', 'content', 'published',
'summary'
)
for check_field in string_fields:
if not message_json['object'].get(check_field):
continue
if not isinstance(message_json['object'][check_field], str):
print('INBOX: ' +
check_field + ' should be a string ' +
str(message_json['object'][check_field]))
http_400(self)
self.server.postreq_busy = False
return 3
2024-03-29 14:55:38 +00:00
# check attachment is a list or dict
2024-03-01 17:10:04 +00:00
if debug:
2024-03-29 14:55:38 +00:00
print('INBOX: checking attachment is a list or dict')
if message_json['object'].get('attachment'):
if not isinstance(message_json['object']['attachment'], list) and \
not isinstance(message_json['object']['attachment'], dict):
print('INBOX: attachment should be a list or dict ' +
str(message_json['object']['attachment']))
http_400(self)
self.server.postreq_busy = False
return 3
2024-03-29 14:55:38 +00:00
# check that some fields are lists
if debug:
print('INBOX: checking object to and cc fields')
# check to and cc fields
list_fields = ('to', 'cc')
2024-03-01 17:10:04 +00:00
for check_field in list_fields:
if not message_json['object'].get(check_field):
continue
if not isinstance(message_json['object'][check_field], list):
print('INBOX: ' +
check_field + ' should be a list ' +
str(message_json['object'][check_field]))
http_400(self)
self.server.postreq_busy = False
return 3
# check that the content does not contain impossibly long urls
if message_json['object'].get('content'):
content_str = message_json['object']['content']
if not valid_url_lengths(content_str, 2048):
actor_url = get_actor_from_post(message_json)
print('INBOX: content contains urls which are too long ' +
actor_url)
http_400(self)
self.server.postreq_busy = False
return 3
# check that the summary does not contain links
if post_summary_contains_links(message_json):
http_400(self)
self.server.postreq_busy = False
return 3
# if this is a local only post, is it really local?
if 'localOnly' in message_json['object'] and \
message_json['object'].get('to') and \
message_json['object'].get('attributedTo'):
if not local_only_is_local(message_json,
self.server.domain_full):
http_400(self)
self.server.postreq_busy = False
return 3
# actor should look like a url
if debug:
print('INBOX: checking that actor looks like a url')
actor_url = get_actor_from_post(message_json)
if not resembles_url(actor_url):
print('INBOX: POST actor does not look like a url ' +
actor_url)
http_400(self)
self.server.postreq_busy = False
return 3
# sent by an actor on a local network address?
if debug:
print('INBOX: checking for local network access')
if not self.server.allow_local_network_access:
local_network_pattern_list = get_local_network_addresses()
actor_url = get_actor_from_post(message_json)
for local_network_pattern in local_network_pattern_list:
if local_network_pattern in actor_url:
print('INBOX: POST actor contains local network address ' +
actor_url)
http_400(self)
self.server.postreq_busy = False
return 3
actor_url = get_actor_from_post(message_json)
message_domain, _ = get_domain_from_actor(actor_url)
if not message_domain:
print('INBOX: POST from unknown domain ' + actor_url)
http_400(self)
self.server.postreq_busy = False
return 3
self.server.blocked_cache_last_updated = \
update_blocked_cache(self.server.base_dir,
self.server.blocked_cache,
self.server.blocked_cache_last_updated,
self.server.blocked_cache_update_secs)
if debug:
print('INBOX: checking for blocked domain ' + message_domain)
if is_blocked_domain(self.server.base_dir, message_domain,
self.server.blocked_cache,
self.server.block_federated):
print('INBOX: POST from blocked domain ' + message_domain)
http_400(self)
self.server.postreq_busy = False
return 3
message_nickname = get_nickname_from_actor(actor_url)
if not message_nickname:
print('INBOX: POST from unknown nickname ' + actor_url)
http_400(self)
self.server.postreq_busy = False
return 3
if debug:
print('INBOX: checking for blocked nickname ' + message_nickname)
if is_blocked_nickname(self.server.base_dir, message_nickname,
self.server.blocked_cache):
print('INBOX: POST from blocked nickname ' + message_nickname)
http_400(self)
self.server.postreq_busy = False
return 3
# if the inbox queue is full then return a busy code
if debug:
print('INBOX: checking for full queue')
if len(self.server.inbox_queue) >= self.server.max_queue_length:
if message_domain:
print('INBOX: Queue: ' +
'Inbox queue is full. Incoming post from ' +
actor_url)
else:
print('INBOX: Queue: Inbox queue is full')
http_503(self)
clear_queue_items(self.server.base_dir, self.server.inbox_queue)
if not self.server.restart_inbox_queue_in_progress:
self.server.restart_inbox_queue = True
self.server.postreq_busy = False
return 2
# follower synchronization endpoint information
if self.headers.get('Collection-Synchronization'):
if debug:
print('Collection-Synchronization: ' +
str(self.headers['Collection-Synchronization']))
# Convert the headers needed for signature verification to dict
headers_dict = {}
headers_dict['host'] = self.headers['host']
headers_dict['signature'] = self.headers['signature']
if self.headers.get('Date'):
headers_dict['Date'] = self.headers['Date']
elif self.headers.get('date'):
headers_dict['Date'] = self.headers['date']
if self.headers.get('digest'):
headers_dict['digest'] = self.headers['digest']
if self.headers.get('Collection-Synchronization'):
headers_dict['Collection-Synchronization'] = \
self.headers['Collection-Synchronization']
if self.headers.get('Content-type'):
headers_dict['Content-type'] = self.headers['Content-type']
if self.headers.get('Content-Length'):
headers_dict['Content-Length'] = self.headers['Content-Length']
elif self.headers.get('content-length'):
headers_dict['content-length'] = self.headers['content-length']
original_message_json = message_json.copy()
# whether to add a 'to' field to the message
add_to_field_types = (
'Follow', 'Like', 'EmojiReact', 'Add', 'Remove', 'Ignore', 'Move'
)
for add_to_type in add_to_field_types:
message_json, _ = \
2024-04-08 14:00:47 +00:00
add_to_field(add_to_type, message_json, debug)
2024-03-01 17:10:04 +00:00
begin_save_time = time.time()
# save the json for later queue processing
message_bytes_decoded = message_bytes.decode('utf-8')
self.server.blocked_cache_last_updated = \
update_blocked_cache(self.server.base_dir,
self.server.blocked_cache,
self.server.blocked_cache_last_updated,
self.server.blocked_cache_update_secs)
2024-12-16 13:19:51 +00:00
mitm = detect_mitm(self)
2024-03-01 17:10:04 +00:00
if debug:
print('INBOX: saving post to queue')
queue_filename = \
save_post_to_inbox_queue(self.server.base_dir,
self.server.http_prefix,
nickname,
self.server.domain_full,
message_json, original_message_json,
message_bytes_decoded,
headers_dict,
self.path,
2024-04-08 14:00:47 +00:00
debug,
2024-03-01 17:10:04 +00:00
self.server.blocked_cache,
self.server.block_federated,
self.server.system_language,
2024-11-02 12:51:51 +00:00
mitm,
self.server.maxMessageLength)
2024-03-01 17:10:04 +00:00
if queue_filename:
# add json to the queue
if queue_filename not in self.server.inbox_queue:
self.server.inbox_queue.append(queue_filename)
2024-04-08 14:00:47 +00:00
if debug:
2024-03-01 17:10:04 +00:00
time_diff = int((time.time() - begin_save_time) * 1000)
if time_diff > 200:
print('SLOW: slow save of inbox queue item ' +
queue_filename + ' took ' + str(time_diff) + ' mS')
self.send_response(201)
self.end_headers()
self.server.postreq_busy = False
return 0
http_503(self)
self.server.postreq_busy = False
return 1
def is_authorized(self) -> bool:
self.authorized_nickname = None
not_auth_paths = (
'/icons/', '/avatars/', '/favicons/',
'/system/accounts/avatars/',
'/system/accounts/headers/',
'/system/media_attachments/files/',
'/accounts/avatars/', '/accounts/headers/',
'/favicon.ico', '/newswire.xml',
'/newswire_favicon.ico', '/categories.xml'
)
for not_auth_str in not_auth_paths:
if self.path.startswith(not_auth_str):
return False
# token based authenticated used by the web interface
if self.headers.get('Cookie'):
if self.headers['Cookie'].startswith('epicyon='):
token_str = self.headers['Cookie'].split('=', 1)[1].strip()
if ';' in token_str:
token_str = token_str.split(';')[0].strip()
if self.server.tokens_lookup.get(token_str):
nickname = self.server.tokens_lookup[token_str]
if not is_system_account(nickname):
self.authorized_nickname = nickname
# default to the inbox of the person
if self.path == '/':
self.path = '/users/' + nickname + '/inbox'
# check that the path contains the same nickname
# as the cookie otherwise it would be possible
# to be authorized to use an account you don't own
if '/' + nickname + '/' in self.path:
return True
if '/' + nickname + '?' in self.path:
return True
if self.path.endswith('/' + nickname):
return True
if self.server.debug:
print('AUTH: nickname ' + nickname +
' was not found in path ' + self.path)
return False
print('AUTH: epicyon cookie ' +
'authorization failed, header=' +
self.headers['Cookie'].replace('epicyon=', '') +
' token_str=' + token_str)
return False
print('AUTH: Header cookie was not authorized')
return False
# basic auth for c2s
if self.headers.get('Authorization'):
if authorize(self.server.base_dir, self.path,
self.headers['Authorization'],
self.server.debug):
return True
print('AUTH: C2S Basic auth did not authorize ' +
self.headers['Authorization'])
return False
def show_person_options(self, calling_domain: str, path: str,
base_dir: str,
domain: str, domain_full: str,
getreq_start_time,
cookie: str, debug: bool,
authorized: bool,
curr_session) -> None:
"""Show person options screen
"""
back_to_path = ''
options_str = path.split('?options=')[1]
origin_path_str = path.split('?options=')[0]
if ';' in options_str and '/users/news/' not in path:
page_number = 1
options_list = options_str.split(';')
options_actor = options_list[0]
options_page_number = 1
if len(options_list) > 1:
options_page_number = options_list[1]
options_profile_url = ''
if len(options_list) > 2:
options_profile_url = options_list[2]
if '.' in options_profile_url and \
options_profile_url.startswith('/members/'):
ext = options_profile_url.split('.')[-1]
options_profile_url = options_profile_url.split('/members/')[1]
options_profile_url = \
options_profile_url.replace('.' + ext, '')
options_profile_url = \
'/users/' + options_profile_url + '/avatar.' + ext
back_to_path = 'moderation'
if len(options_page_number) > 5:
options_page_number = "1"
if options_page_number.isdigit():
page_number = int(options_page_number)
options_link = None
if len(options_list) > 3:
options_link = options_list[3]
is_group = False
donate_url = None
website_url = None
gemini_link = None
enigma_pub_key = None
pgp_pub_key = None
pgp_fingerprint = None
pronouns = None
pixelfed = None
discord = None
2024-08-15 19:58:26 +00:00
art_site_url = None
music_site_url = None
youtube = None
peertube = None
2024-03-01 17:10:04 +00:00
xmpp_address = None
matrix_address = None
blog_address = None
tox_address = None
briar_address = None
cwtch_address = None
ssb_address = None
email_address = None
locked_account = False
also_known_as = None
moved_to = ''
repo_url = None
actor_json = \
get_person_from_cache(base_dir,
options_actor,
self.server.person_cache)
if actor_json:
if actor_json.get('movedTo'):
moved_to = actor_json['movedTo']
if '"' in moved_to:
moved_to = moved_to.split('"')[1]
if actor_json.get('type'):
if actor_json['type'] == 'Group':
is_group = True
locked_account = get_locked_account(actor_json)
donate_url = get_donation_url(actor_json)
website_url = get_website(actor_json, self.server.translate)
gemini_link = get_gemini_link(actor_json)
pronouns = get_pronouns(actor_json)
pixelfed = get_pixelfed(actor_json)
discord = get_discord(actor_json)
2024-08-15 19:58:26 +00:00
art_site_url = get_art_site_url(actor_json)
music_site_url = get_music_site_url(actor_json)
youtube = get_youtube(actor_json)
peertube = get_peertube(actor_json)
2024-03-01 17:10:04 +00:00
xmpp_address = get_xmpp_address(actor_json)
matrix_address = get_matrix_address(actor_json)
ssb_address = get_ssb_address(actor_json)
blog_address = get_blog_address(actor_json)
tox_address = get_tox_address(actor_json)
briar_address = get_briar_address(actor_json)
cwtch_address = get_cwtch_address(actor_json)
email_address = get_email_address(actor_json)
enigma_pub_key = get_enigma_pub_key(actor_json)
pgp_pub_key = get_pgp_pub_key(actor_json)
pgp_fingerprint = get_pgp_fingerprint(actor_json)
if actor_json.get('alsoKnownAs'):
also_known_as = remove_html(actor_json['alsoKnownAs'])
repo_url = get_repo_url(actor_json)
access_keys = self.server.access_keys
nickname = 'instance'
if '/users/' in path:
nickname = path.split('/users/')[1]
if '/' in nickname:
nickname = nickname.split('/')[0]
if self.server.key_shortcuts.get(nickname):
access_keys = self.server.key_shortcuts[nickname]
if curr_session:
# because this is slow, do it in a separate thread
if self.server.thrCheckActor.get(nickname):
# kill existing thread
self.server.thrCheckActor[nickname].kill()
self.server.thrCheckActor[nickname] = \
thread_with_trace(target=check_for_changed_actor,
args=(curr_session,
2024-04-08 14:00:47 +00:00
base_dir,
2024-03-01 17:10:04 +00:00
self.server.http_prefix,
2024-04-08 14:00:47 +00:00
domain_full,
2024-03-01 17:10:04 +00:00
options_actor, options_profile_url,
self.server.person_cache,
self.server.check_actor_timeout),
daemon=True)
begin_thread(self.server.thrCheckActor[nickname],
'_show_person_options')
msg = \
html_person_options(self.server.default_timeline,
self.server.translate,
base_dir, domain,
domain_full,
origin_path_str,
options_actor,
options_profile_url,
options_link,
page_number, donate_url, website_url,
gemini_link, pronouns,
2024-03-01 17:10:04 +00:00
xmpp_address, matrix_address,
ssb_address, blog_address,
tox_address, briar_address,
cwtch_address,
enigma_pub_key,
pgp_pub_key, pgp_fingerprint,
email_address,
self.server.dormant_months,
back_to_path,
locked_account,
moved_to, also_known_as,
self.server.text_mode_banner,
self.server.news_instance,
authorized,
access_keys, is_group,
self.server.theme_name,
self.server.blocked_cache,
repo_url,
self.server.sites_unavailable,
youtube, peertube, pixelfed,
2024-08-15 19:58:26 +00:00
discord, music_site_url,
art_site_url,
self.server.mitm_servers)
2024-03-01 17:10:04 +00:00
if msg:
msg = msg.encode('utf-8')
msglen = len(msg)
set_headers(self, 'text/html', msglen,
cookie, calling_domain, False)
write2(self, msg)
fitness_performance(getreq_start_time, self.server.fitness,
'_GET', '_show_person_options', debug)
else:
http_404(self, 31)
return
if '/users/news/' in path:
redirect_headers(self, origin_path_str + '/tlfeatures',
2024-04-16 13:47:21 +00:00
cookie, calling_domain, 303)
2024-03-01 17:10:04 +00:00
return
origin_path_str_absolute = \
get_instance_url(calling_domain,
self.server.http_prefix,
2024-04-08 14:00:47 +00:00
domain_full,
2024-03-01 17:10:04 +00:00
self.server.onion_domain,
self.server.i2p_domain) + \
origin_path_str
redirect_headers(self, origin_path_str_absolute, cookie,
2024-04-16 13:47:21 +00:00
calling_domain, 303)
2024-03-01 17:10:04 +00:00
def get_user_agent(self) -> str:
"""Returns the user agent string from the headers
"""
ua_str = None
if self.headers.get('User-Agent'):
ua_str = self.headers['User-Agent']
elif self.headers.get('user-agent'):
ua_str = self.headers['user-agent']
elif self.headers.get('User-agent'):
ua_str = self.headers['User-agent']
return ua_str
def has_accept(self, calling_domain: str) -> bool:
"""Do the http headers have an Accept field?
"""
if not self.headers.get('Accept'):
if self.headers.get('accept'):
print('Upper case Accept')
self.headers['Accept'] = self.headers['accept']
if self.headers.get('Accept') or calling_domain.endswith('.b32.i2p'):
if not self.headers.get('Accept'):
self.headers['Accept'] = \
'text/html,application/xhtml+xml,' \
'application/xml;q=0.9,image/webp,*/*;q=0.8'
return True
return False
def etag_exists(self, media_filename: str) -> bool:
"""Does an etag header exist for the given file?
"""
etag_header = 'If-None-Match'
if not self.headers.get(etag_header):
etag_header = 'if-none-match'
if not self.headers.get(etag_header):
etag_header = 'If-none-match'
if self.headers.get(etag_header):
old_etag = self.headers[etag_header].replace('"', '')
if os.path.isfile(media_filename + '.etag'):
# load the etag from file
curr_etag = ''
try:
with open(media_filename + '.etag', 'r',
2024-07-16 11:21:28 +00:00
encoding='utf-8') as fp_media:
curr_etag = fp_media.read()
except OSError:
print('EX: _etag_exists unable to read ' +
str(media_filename))
if curr_etag and old_etag == curr_etag:
# The file has not changed
return True
return False
2024-11-16 17:13:56 +00:00
2024-11-18 14:06:50 +00:00
def _get_epicyon_domain_from_user_agent(ua_str: str) -> str:
"""Extracts the epicyon domain from the user agent
"""
if 'Epicyon/' not in ua_str:
return ''
ua_text = ua_str.split('Epicyon/')[1]
if '://' not in ua_text:
return ''
domain = ua_text.split('://')[1]
if '/' in domain:
domain = domain.split('/')[0]
return domain
def log_epicyon_instances(base_dir: str, ua_str: str,
2024-11-16 17:13:56 +00:00
known_epicyon_instances: []) -> None:
"""Saves a log of known epicyon instances
"""
2024-11-18 14:06:50 +00:00
calling_domain = _get_epicyon_domain_from_user_agent(ua_str)
if not calling_domain:
return
2024-11-16 17:13:56 +00:00
if calling_domain in known_epicyon_instances:
return
known_epicyon_instances.append(calling_domain)
known_epicyon_instances.sort()
epicyon_instances_filename = \
2024-11-18 13:00:51 +00:00
data_dir(base_dir) + '/known_epicyon_instances.json'
2024-11-18 12:53:39 +00:00
print('DEBUG: log_epicyon_instances: ' +
epicyon_instances_filename + ' ' + str(known_epicyon_instances))
2024-11-16 17:13:56 +00:00
save_json(known_epicyon_instances, epicyon_instances_filename)
def load_known_epicyon_instances(base_dir: str) -> []:
"""Loads a list of known epicyon instances
"""
epicyon_instances_filename = \
2024-11-18 13:00:51 +00:00
data_dir(base_dir) + '/known_epicyon_instances.json'
2024-11-16 17:13:56 +00:00
if not os.path.isfile(epicyon_instances_filename):
return []
known_epicyon_instances = load_json(epicyon_instances_filename)
if not known_epicyon_instances:
return []
return known_epicyon_instances