Log epicyon instances seen

merge-requests/30/head
Bob Mottram 2024-11-16 17:13:56 +00:00
parent 711171de9c
commit d416ba02f7
5 changed files with 54 additions and 0 deletions

View File

@ -94,6 +94,7 @@ from httpcodes import http_304
from httpcodes import http_400
from httpcodes import write2
from httpheaders import set_headers
from daemon_utils import load_known_epicyon_instances
from daemon_utils import has_accept
from daemon_utils import is_authorized
from poison import load_dictionary
@ -482,6 +483,8 @@ class EpicyonServer(ThreadingHTTPServer):
headers_catalog = {}
dictionary = []
twograms = {}
searchable_by_default = {}
known_epicyon_instances = []
def handle_error(self, request, client_address):
# surpress connection reset errors
@ -706,6 +709,9 @@ def run_daemon(accounts_data_dir: str,
# default "searchable by" for new posts for each account
httpd.searchable_by_default = load_searchable_by_default(base_dir)
# load the list of known Epicyon instances
httpd.known_epicyon_instances = load_known_epicyon_instances(base_dir)
# if a custom robots.txt exists then read it
robots_txt_filename = data_dir(base_dir) + '/robots.txt'
httpd.robots_txt = None

View File

@ -67,6 +67,7 @@ from daemon_utils import has_accept
from daemon_utils import show_person_options
from daemon_utils import is_authorized
from daemon_utils import get_user_agent
from daemon_utils import log_epicyon_instances
from httpheaders import update_headers_catalog
from httpheaders import set_headers_etag
from httpheaders import login_headers
@ -338,6 +339,10 @@ def daemon_http_get(self) -> None:
ua_str = get_user_agent(self)
if 'Epicyon/' in ua_str:
log_epicyon_instances(self.server.base_dir, calling_domain,
self.server.known_epicyon_instances)
if not _permitted_crawler_path(self.path):
block, self.server.blocked_cache_last_updated, llm = \
blocked_user_agent(calling_domain, ua_str,

View File

@ -21,6 +21,8 @@ from httpcodes import http_404
from httpheaders import set_headers_head
from media import path_is_video
from media import path_is_audio
from daemon_utils import get_user_agent
from daemon_utils import log_epicyon_instances
def daemon_http_head(self) -> None:
@ -33,6 +35,13 @@ def daemon_http_head(self) -> None:
return
calling_domain = self.server.domain_full
ua_str = get_user_agent(self)
if 'Epicyon/' in ua_str:
log_epicyon_instances(self.server.base_dir, calling_domain,
self.server.known_epicyon_instances)
if self.headers.get('Host'):
calling_domain = decoded_host(self.headers['Host'])
if self.server.onion_domain:

View File

@ -39,6 +39,7 @@ from httpcodes import http_503
from httpheaders import contains_suspicious_headers
from httpheaders import update_headers_catalog
from httpheaders import redirect_headers
from daemon_utils import log_epicyon_instances
from daemon_utils import get_user_agent
from daemon_utils import post_to_outbox
from daemon_utils import update_inbox_queue
@ -163,6 +164,10 @@ def daemon_http_post(self) -> None:
ua_str = get_user_agent(self)
if 'Epicyon/' in ua_str:
log_epicyon_instances(self.server.base_dir, calling_domain,
self.server.known_epicyon_instances)
block, self.server.blocked_cache_last_updated, _ = \
blocked_user_agent(calling_domain, ua_str,
self.server.news_instance,

View File

@ -26,6 +26,9 @@ from blocking import is_blocked_nickname
from blocking import is_blocked_domain
from content import valid_url_lengths
from posts import add_to_field
from utils import data_dir
from utils import load_json
from utils import save_json
from utils import get_instance_url
from utils import remove_html
from utils import get_locked_account
@ -863,3 +866,29 @@ def etag_exists(self, media_filename: str) -> bool:
# The file has not changed
return True
return False
def log_epicyon_instances(base_dir: str, calling_domain: str,
known_epicyon_instances: []) -> None:
"""Saves a log of known epicyon instances
"""
if calling_domain in known_epicyon_instances:
return
known_epicyon_instances.append(calling_domain)
known_epicyon_instances.sort()
epicyon_instances_filename = \
data_dir(base_dir) + '/known_epicyon_instances.txt'
save_json(known_epicyon_instances, epicyon_instances_filename)
def load_known_epicyon_instances(base_dir: str) -> []:
"""Loads a list of known epicyon instances
"""
epicyon_instances_filename = \
data_dir(base_dir) + '/known_epicyon_instances.txt'
if not os.path.isfile(epicyon_instances_filename):
return []
known_epicyon_instances = load_json(epicyon_instances_filename)
if not known_epicyon_instances:
return []
return known_epicyon_instances