Extract epicyon domain from user agent

main
Bob Mottram 2024-11-18 14:06:50 +00:00
parent 4f5bda9a70
commit 7ae8e795e5
4 changed files with 21 additions and 4 deletions

View File

@ -342,7 +342,7 @@ def daemon_http_get(self) -> None:
if ua_str: if ua_str:
if 'Epicyon/' in ua_str: if 'Epicyon/' in ua_str:
log_epicyon_instances(self.server.base_dir, calling_domain, log_epicyon_instances(self.server.base_dir, ua_str,
self.server.known_epicyon_instances) self.server.known_epicyon_instances)
if not _permitted_crawler_path(self.path): if not _permitted_crawler_path(self.path):

View File

@ -40,7 +40,7 @@ def daemon_http_head(self) -> None:
if ua_str: if ua_str:
if 'Epicyon/' in ua_str: if 'Epicyon/' in ua_str:
log_epicyon_instances(self.server.base_dir, calling_domain, log_epicyon_instances(self.server.base_dir, ua_str,
self.server.known_epicyon_instances) self.server.known_epicyon_instances)
if self.headers.get('Host'): if self.headers.get('Host'):

View File

@ -166,7 +166,7 @@ def daemon_http_post(self) -> None:
if ua_str: if ua_str:
if 'Epicyon/' in ua_str: if 'Epicyon/' in ua_str:
log_epicyon_instances(self.server.base_dir, calling_domain, log_epicyon_instances(self.server.base_dir, ua_str,
self.server.known_epicyon_instances) self.server.known_epicyon_instances)
block, self.server.blocked_cache_last_updated, _ = \ block, self.server.blocked_cache_last_updated, _ = \

View File

@ -868,10 +868,27 @@ def etag_exists(self, media_filename: str) -> bool:
return False return False
def log_epicyon_instances(base_dir: str, calling_domain: str, def _get_epicyon_domain_from_user_agent(ua_str: str) -> str:
"""Extracts the epicyon domain from the user agent
"""
if 'Epicyon/' not in ua_str:
return ''
ua_text = ua_str.split('Epicyon/')[1]
if '://' not in ua_text:
return ''
domain = ua_text.split('://')[1]
if '/' in domain:
domain = domain.split('/')[0]
return domain
def log_epicyon_instances(base_dir: str, ua_str: str,
known_epicyon_instances: []) -> None: known_epicyon_instances: []) -> None:
"""Saves a log of known epicyon instances """Saves a log of known epicyon instances
""" """
calling_domain = _get_epicyon_domain_from_user_agent(ua_str)
if not calling_domain:
return
if calling_domain in known_epicyon_instances: if calling_domain in known_epicyon_instances:
return return
known_epicyon_instances.append(calling_domain) known_epicyon_instances.append(calling_domain)