mirror of https://gitlab.com/bashrc2/epicyon
Log epicyon instances seen
parent
711171de9c
commit
d416ba02f7
|
@ -94,6 +94,7 @@ from httpcodes import http_304
|
||||||
from httpcodes import http_400
|
from httpcodes import http_400
|
||||||
from httpcodes import write2
|
from httpcodes import write2
|
||||||
from httpheaders import set_headers
|
from httpheaders import set_headers
|
||||||
|
from daemon_utils import load_known_epicyon_instances
|
||||||
from daemon_utils import has_accept
|
from daemon_utils import has_accept
|
||||||
from daemon_utils import is_authorized
|
from daemon_utils import is_authorized
|
||||||
from poison import load_dictionary
|
from poison import load_dictionary
|
||||||
|
@ -482,6 +483,8 @@ class EpicyonServer(ThreadingHTTPServer):
|
||||||
headers_catalog = {}
|
headers_catalog = {}
|
||||||
dictionary = []
|
dictionary = []
|
||||||
twograms = {}
|
twograms = {}
|
||||||
|
searchable_by_default = {}
|
||||||
|
known_epicyon_instances = []
|
||||||
|
|
||||||
def handle_error(self, request, client_address):
|
def handle_error(self, request, client_address):
|
||||||
# surpress connection reset errors
|
# surpress connection reset errors
|
||||||
|
@ -706,6 +709,9 @@ def run_daemon(accounts_data_dir: str,
|
||||||
# default "searchable by" for new posts for each account
|
# default "searchable by" for new posts for each account
|
||||||
httpd.searchable_by_default = load_searchable_by_default(base_dir)
|
httpd.searchable_by_default = load_searchable_by_default(base_dir)
|
||||||
|
|
||||||
|
# load the list of known Epicyon instances
|
||||||
|
httpd.known_epicyon_instances = load_known_epicyon_instances(base_dir)
|
||||||
|
|
||||||
# if a custom robots.txt exists then read it
|
# if a custom robots.txt exists then read it
|
||||||
robots_txt_filename = data_dir(base_dir) + '/robots.txt'
|
robots_txt_filename = data_dir(base_dir) + '/robots.txt'
|
||||||
httpd.robots_txt = None
|
httpd.robots_txt = None
|
||||||
|
|
|
@ -67,6 +67,7 @@ from daemon_utils import has_accept
|
||||||
from daemon_utils import show_person_options
|
from daemon_utils import show_person_options
|
||||||
from daemon_utils import is_authorized
|
from daemon_utils import is_authorized
|
||||||
from daemon_utils import get_user_agent
|
from daemon_utils import get_user_agent
|
||||||
|
from daemon_utils import log_epicyon_instances
|
||||||
from httpheaders import update_headers_catalog
|
from httpheaders import update_headers_catalog
|
||||||
from httpheaders import set_headers_etag
|
from httpheaders import set_headers_etag
|
||||||
from httpheaders import login_headers
|
from httpheaders import login_headers
|
||||||
|
@ -338,6 +339,10 @@ def daemon_http_get(self) -> None:
|
||||||
|
|
||||||
ua_str = get_user_agent(self)
|
ua_str = get_user_agent(self)
|
||||||
|
|
||||||
|
if 'Epicyon/' in ua_str:
|
||||||
|
log_epicyon_instances(self.server.base_dir, calling_domain,
|
||||||
|
self.server.known_epicyon_instances)
|
||||||
|
|
||||||
if not _permitted_crawler_path(self.path):
|
if not _permitted_crawler_path(self.path):
|
||||||
block, self.server.blocked_cache_last_updated, llm = \
|
block, self.server.blocked_cache_last_updated, llm = \
|
||||||
blocked_user_agent(calling_domain, ua_str,
|
blocked_user_agent(calling_domain, ua_str,
|
||||||
|
|
|
@ -21,6 +21,8 @@ from httpcodes import http_404
|
||||||
from httpheaders import set_headers_head
|
from httpheaders import set_headers_head
|
||||||
from media import path_is_video
|
from media import path_is_video
|
||||||
from media import path_is_audio
|
from media import path_is_audio
|
||||||
|
from daemon_utils import get_user_agent
|
||||||
|
from daemon_utils import log_epicyon_instances
|
||||||
|
|
||||||
|
|
||||||
def daemon_http_head(self) -> None:
|
def daemon_http_head(self) -> None:
|
||||||
|
@ -33,6 +35,13 @@ def daemon_http_head(self) -> None:
|
||||||
return
|
return
|
||||||
|
|
||||||
calling_domain = self.server.domain_full
|
calling_domain = self.server.domain_full
|
||||||
|
|
||||||
|
ua_str = get_user_agent(self)
|
||||||
|
|
||||||
|
if 'Epicyon/' in ua_str:
|
||||||
|
log_epicyon_instances(self.server.base_dir, calling_domain,
|
||||||
|
self.server.known_epicyon_instances)
|
||||||
|
|
||||||
if self.headers.get('Host'):
|
if self.headers.get('Host'):
|
||||||
calling_domain = decoded_host(self.headers['Host'])
|
calling_domain = decoded_host(self.headers['Host'])
|
||||||
if self.server.onion_domain:
|
if self.server.onion_domain:
|
||||||
|
|
|
@ -39,6 +39,7 @@ from httpcodes import http_503
|
||||||
from httpheaders import contains_suspicious_headers
|
from httpheaders import contains_suspicious_headers
|
||||||
from httpheaders import update_headers_catalog
|
from httpheaders import update_headers_catalog
|
||||||
from httpheaders import redirect_headers
|
from httpheaders import redirect_headers
|
||||||
|
from daemon_utils import log_epicyon_instances
|
||||||
from daemon_utils import get_user_agent
|
from daemon_utils import get_user_agent
|
||||||
from daemon_utils import post_to_outbox
|
from daemon_utils import post_to_outbox
|
||||||
from daemon_utils import update_inbox_queue
|
from daemon_utils import update_inbox_queue
|
||||||
|
@ -163,6 +164,10 @@ def daemon_http_post(self) -> None:
|
||||||
|
|
||||||
ua_str = get_user_agent(self)
|
ua_str = get_user_agent(self)
|
||||||
|
|
||||||
|
if 'Epicyon/' in ua_str:
|
||||||
|
log_epicyon_instances(self.server.base_dir, calling_domain,
|
||||||
|
self.server.known_epicyon_instances)
|
||||||
|
|
||||||
block, self.server.blocked_cache_last_updated, _ = \
|
block, self.server.blocked_cache_last_updated, _ = \
|
||||||
blocked_user_agent(calling_domain, ua_str,
|
blocked_user_agent(calling_domain, ua_str,
|
||||||
self.server.news_instance,
|
self.server.news_instance,
|
||||||
|
|
|
@ -26,6 +26,9 @@ from blocking import is_blocked_nickname
|
||||||
from blocking import is_blocked_domain
|
from blocking import is_blocked_domain
|
||||||
from content import valid_url_lengths
|
from content import valid_url_lengths
|
||||||
from posts import add_to_field
|
from posts import add_to_field
|
||||||
|
from utils import data_dir
|
||||||
|
from utils import load_json
|
||||||
|
from utils import save_json
|
||||||
from utils import get_instance_url
|
from utils import get_instance_url
|
||||||
from utils import remove_html
|
from utils import remove_html
|
||||||
from utils import get_locked_account
|
from utils import get_locked_account
|
||||||
|
@ -863,3 +866,29 @@ def etag_exists(self, media_filename: str) -> bool:
|
||||||
# The file has not changed
|
# The file has not changed
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def log_epicyon_instances(base_dir: str, calling_domain: str,
|
||||||
|
known_epicyon_instances: []) -> None:
|
||||||
|
"""Saves a log of known epicyon instances
|
||||||
|
"""
|
||||||
|
if calling_domain in known_epicyon_instances:
|
||||||
|
return
|
||||||
|
known_epicyon_instances.append(calling_domain)
|
||||||
|
known_epicyon_instances.sort()
|
||||||
|
epicyon_instances_filename = \
|
||||||
|
data_dir(base_dir) + '/known_epicyon_instances.txt'
|
||||||
|
save_json(known_epicyon_instances, epicyon_instances_filename)
|
||||||
|
|
||||||
|
|
||||||
|
def load_known_epicyon_instances(base_dir: str) -> []:
|
||||||
|
"""Loads a list of known epicyon instances
|
||||||
|
"""
|
||||||
|
epicyon_instances_filename = \
|
||||||
|
data_dir(base_dir) + '/known_epicyon_instances.txt'
|
||||||
|
if not os.path.isfile(epicyon_instances_filename):
|
||||||
|
return []
|
||||||
|
known_epicyon_instances = load_json(epicyon_instances_filename)
|
||||||
|
if not known_epicyon_instances:
|
||||||
|
return []
|
||||||
|
return known_epicyon_instances
|
||||||
|
|
Loading…
Reference in New Issue