diff --git a/daemon_get.py b/daemon_get.py index b5088fbab..d3b088ef1 100644 --- a/daemon_get.py +++ b/daemon_get.py @@ -342,7 +342,7 @@ def daemon_http_get(self) -> None: if ua_str: if 'Epicyon/' in ua_str: - log_epicyon_instances(self.server.base_dir, calling_domain, + log_epicyon_instances(self.server.base_dir, ua_str, self.server.known_epicyon_instances) if not _permitted_crawler_path(self.path): diff --git a/daemon_head.py b/daemon_head.py index dc05a2462..8717439ca 100644 --- a/daemon_head.py +++ b/daemon_head.py @@ -40,7 +40,7 @@ def daemon_http_head(self) -> None: if ua_str: if 'Epicyon/' in ua_str: - log_epicyon_instances(self.server.base_dir, calling_domain, + log_epicyon_instances(self.server.base_dir, ua_str, self.server.known_epicyon_instances) if self.headers.get('Host'): diff --git a/daemon_post.py b/daemon_post.py index 5eccc4f62..2876e80a0 100644 --- a/daemon_post.py +++ b/daemon_post.py @@ -166,7 +166,7 @@ def daemon_http_post(self) -> None: if ua_str: if 'Epicyon/' in ua_str: - log_epicyon_instances(self.server.base_dir, calling_domain, + log_epicyon_instances(self.server.base_dir, ua_str, self.server.known_epicyon_instances) block, self.server.blocked_cache_last_updated, _ = \ diff --git a/daemon_utils.py b/daemon_utils.py index 831bf663c..94fb4ba4e 100644 --- a/daemon_utils.py +++ b/daemon_utils.py @@ -868,10 +868,27 @@ def etag_exists(self, media_filename: str) -> bool: return False -def log_epicyon_instances(base_dir: str, calling_domain: str, +def _get_epicyon_domain_from_user_agent(ua_str: str) -> str: + """Extracts the epicyon domain from the user agent + """ + if 'Epicyon/' not in ua_str: + return '' + ua_text = ua_str.split('Epicyon/')[1] + if '://' not in ua_text: + return '' + domain = ua_text.split('://')[1] + if '/' in domain: + domain = domain.split('/')[0] + return domain + + +def log_epicyon_instances(base_dir: str, ua_str: str, known_epicyon_instances: []) -> None: """Saves a log of known epicyon instances """ + calling_domain = _get_epicyon_domain_from_user_agent(ua_str) + if not calling_domain: + return if calling_domain in known_epicyon_instances: return known_epicyon_instances.append(calling_domain)