mirror of https://gitlab.com/bashrc2/epicyon
Log epicyon instances seen
parent
711171de9c
commit
d416ba02f7
|
@ -94,6 +94,7 @@ from httpcodes import http_304
|
|||
from httpcodes import http_400
|
||||
from httpcodes import write2
|
||||
from httpheaders import set_headers
|
||||
from daemon_utils import load_known_epicyon_instances
|
||||
from daemon_utils import has_accept
|
||||
from daemon_utils import is_authorized
|
||||
from poison import load_dictionary
|
||||
|
@ -482,6 +483,8 @@ class EpicyonServer(ThreadingHTTPServer):
|
|||
headers_catalog = {}
|
||||
dictionary = []
|
||||
twograms = {}
|
||||
searchable_by_default = {}
|
||||
known_epicyon_instances = []
|
||||
|
||||
def handle_error(self, request, client_address):
|
||||
# surpress connection reset errors
|
||||
|
@ -706,6 +709,9 @@ def run_daemon(accounts_data_dir: str,
|
|||
# default "searchable by" for new posts for each account
|
||||
httpd.searchable_by_default = load_searchable_by_default(base_dir)
|
||||
|
||||
# load the list of known Epicyon instances
|
||||
httpd.known_epicyon_instances = load_known_epicyon_instances(base_dir)
|
||||
|
||||
# if a custom robots.txt exists then read it
|
||||
robots_txt_filename = data_dir(base_dir) + '/robots.txt'
|
||||
httpd.robots_txt = None
|
||||
|
|
|
@ -67,6 +67,7 @@ from daemon_utils import has_accept
|
|||
from daemon_utils import show_person_options
|
||||
from daemon_utils import is_authorized
|
||||
from daemon_utils import get_user_agent
|
||||
from daemon_utils import log_epicyon_instances
|
||||
from httpheaders import update_headers_catalog
|
||||
from httpheaders import set_headers_etag
|
||||
from httpheaders import login_headers
|
||||
|
@ -338,6 +339,10 @@ def daemon_http_get(self) -> None:
|
|||
|
||||
ua_str = get_user_agent(self)
|
||||
|
||||
if 'Epicyon/' in ua_str:
|
||||
log_epicyon_instances(self.server.base_dir, calling_domain,
|
||||
self.server.known_epicyon_instances)
|
||||
|
||||
if not _permitted_crawler_path(self.path):
|
||||
block, self.server.blocked_cache_last_updated, llm = \
|
||||
blocked_user_agent(calling_domain, ua_str,
|
||||
|
|
|
@ -21,6 +21,8 @@ from httpcodes import http_404
|
|||
from httpheaders import set_headers_head
|
||||
from media import path_is_video
|
||||
from media import path_is_audio
|
||||
from daemon_utils import get_user_agent
|
||||
from daemon_utils import log_epicyon_instances
|
||||
|
||||
|
||||
def daemon_http_head(self) -> None:
|
||||
|
@ -33,6 +35,13 @@ def daemon_http_head(self) -> None:
|
|||
return
|
||||
|
||||
calling_domain = self.server.domain_full
|
||||
|
||||
ua_str = get_user_agent(self)
|
||||
|
||||
if 'Epicyon/' in ua_str:
|
||||
log_epicyon_instances(self.server.base_dir, calling_domain,
|
||||
self.server.known_epicyon_instances)
|
||||
|
||||
if self.headers.get('Host'):
|
||||
calling_domain = decoded_host(self.headers['Host'])
|
||||
if self.server.onion_domain:
|
||||
|
|
|
@ -39,6 +39,7 @@ from httpcodes import http_503
|
|||
from httpheaders import contains_suspicious_headers
|
||||
from httpheaders import update_headers_catalog
|
||||
from httpheaders import redirect_headers
|
||||
from daemon_utils import log_epicyon_instances
|
||||
from daemon_utils import get_user_agent
|
||||
from daemon_utils import post_to_outbox
|
||||
from daemon_utils import update_inbox_queue
|
||||
|
@ -163,6 +164,10 @@ def daemon_http_post(self) -> None:
|
|||
|
||||
ua_str = get_user_agent(self)
|
||||
|
||||
if 'Epicyon/' in ua_str:
|
||||
log_epicyon_instances(self.server.base_dir, calling_domain,
|
||||
self.server.known_epicyon_instances)
|
||||
|
||||
block, self.server.blocked_cache_last_updated, _ = \
|
||||
blocked_user_agent(calling_domain, ua_str,
|
||||
self.server.news_instance,
|
||||
|
|
|
@ -26,6 +26,9 @@ from blocking import is_blocked_nickname
|
|||
from blocking import is_blocked_domain
|
||||
from content import valid_url_lengths
|
||||
from posts import add_to_field
|
||||
from utils import data_dir
|
||||
from utils import load_json
|
||||
from utils import save_json
|
||||
from utils import get_instance_url
|
||||
from utils import remove_html
|
||||
from utils import get_locked_account
|
||||
|
@ -863,3 +866,29 @@ def etag_exists(self, media_filename: str) -> bool:
|
|||
# The file has not changed
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def log_epicyon_instances(base_dir: str, calling_domain: str,
|
||||
known_epicyon_instances: []) -> None:
|
||||
"""Saves a log of known epicyon instances
|
||||
"""
|
||||
if calling_domain in known_epicyon_instances:
|
||||
return
|
||||
known_epicyon_instances.append(calling_domain)
|
||||
known_epicyon_instances.sort()
|
||||
epicyon_instances_filename = \
|
||||
data_dir(base_dir) + '/known_epicyon_instances.txt'
|
||||
save_json(known_epicyon_instances, epicyon_instances_filename)
|
||||
|
||||
|
||||
def load_known_epicyon_instances(base_dir: str) -> []:
|
||||
"""Loads a list of known epicyon instances
|
||||
"""
|
||||
epicyon_instances_filename = \
|
||||
data_dir(base_dir) + '/known_epicyon_instances.txt'
|
||||
if not os.path.isfile(epicyon_instances_filename):
|
||||
return []
|
||||
known_epicyon_instances = load_json(epicyon_instances_filename)
|
||||
if not known_epicyon_instances:
|
||||
return []
|
||||
return known_epicyon_instances
|
||||
|
|
Loading…
Reference in New Issue