diff --git a/daemon.py b/daemon.py index 5d5a1b72c..87a0730d4 100644 --- a/daemon.py +++ b/daemon.py @@ -13983,7 +13983,8 @@ class PubServer(BaseHTTPRequestHandler): self.server.blocked_cache_last_updated, self.server.base_dir, self.server.blocked_cache, - self.server.blocked_cache_update_secs) + self.server.blocked_cache_update_secs, + self.server.crawlers_allowed) if block: self._400() return @@ -18524,7 +18525,8 @@ class PubServer(BaseHTTPRequestHandler): self.server.blocked_cache_last_updated, self.server.base_dir, self.server.blocked_cache, - self.server.blocked_cache_update_secs) + self.server.blocked_cache_update_secs, + self.server.crawlers_allowed) if block: self._400() self.server.postreq_busy = False @@ -19457,7 +19459,8 @@ def load_tokens(base_dir: str, tokens_dict: {}, tokens_lookup: {}) -> None: break -def run_daemon(dyslexic_font: bool, +def run_daemon(crawlers_allowed: [], + dyslexic_font: bool, content_license_url: str, lists_enabled: str, default_reply_interval_hrs: int, @@ -19636,6 +19639,9 @@ def run_daemon(dyslexic_font: bool, # list of blocked user agent types within the User-Agent header httpd.user_agents_blocked = user_agents_blocked + # list of crawler bots permitted within the User-Agent header + httpd.crawlers_allowed = crawlers_allowed + httpd.unit_test = unit_test httpd.allow_local_network_access = allow_local_network_access if unit_test: diff --git a/epicyon.py b/epicyon.py index b38ca64fc..87bcbbaf6 100644 --- a/epicyon.py +++ b/epicyon.py @@ -141,6 +141,10 @@ parser.add_argument('--lists_enabled', type=str, parser.add_argument('--userAgentBlocks', type=str, default=None, help='List of blocked user agents, separated by commas') +parser.add_argument('--crawlersAllowed', type=str, + default=None, + help='List of permitted web crawler user agents, ' + + 'separated by commas') parser.add_argument('--libretranslate', dest='libretranslateUrl', type=str, default=None, help='URL for LibreTranslate service') @@ -3301,8 +3305,20 @@ else: get_config_param(base_dir, 'userAgentsBlocked') if user_agents_blocked_str: agent_blocks_list = user_agents_blocked_str.split(',') - for agentBlockStr in agent_blocks_list: - user_agents_blocked.append(agentBlockStr.strip()) + for user_agents_blocked_str2 in agent_blocks_list: + user_agents_blocked.append(user_agents_blocked_str2.strip()) + +crawlers_allowed = [] +if args.crawlersAllowed: + crawlers_allowed_str = args.crawlersAllowed + set_config_param(base_dir, 'crawlersAllowed', crawlers_allowed_str) +else: + crawlers_allowed_str = \ + get_config_param(base_dir, 'crawlersAllowed') +if crawlers_allowed_str: + crawlers_allowed_list = crawlers_allowed_str.split(',') + for crawlers_allowed_str2 in crawlers_allowed_list: + crawlers_allowed.append(crawlers_allowed_str2.strip()) lists_enabled = '' if args.lists_enabled: @@ -3365,7 +3381,8 @@ if args.defaultCurrency: print('Default currency set to ' + args.defaultCurrency) if __name__ == "__main__": - run_daemon(args.dyslexic_font, + run_daemon(crawlers_allowed, + args.dyslexic_font, content_license_url, lists_enabled, args.default_reply_interval_hrs, diff --git a/tests.py b/tests.py index 14734a87b..f4121d76a 100644 --- a/tests.py +++ b/tests.py @@ -822,8 +822,10 @@ def create_server_alice(path: str, domain: str, port: int, lists_enabled = '' content_license_url = 'https://creativecommons.org/licenses/by/4.0' dyslexic_font = False + crawlers_allowed = [] print('Server running: Alice') - run_daemon(dyslexic_font, + run_daemon(crawlers_allowed, + dyslexic_font, content_license_url, lists_enabled, default_reply_interval_hrs, low_bandwidth, max_like_count, @@ -975,8 +977,10 @@ def create_server_bob(path: str, domain: str, port: int, lists_enabled = '' content_license_url = 'https://creativecommons.org/licenses/by/4.0' dyslexic_font = False + crawlers_allowed = [] print('Server running: Bob') - run_daemon(dyslexic_font, + run_daemon(crawlers_allowed, + dyslexic_font, content_license_url, lists_enabled, default_reply_interval_hrs, low_bandwidth, max_like_count, @@ -1051,8 +1055,10 @@ def create_server_eve(path: str, domain: str, port: int, federation_list: [], lists_enabled = '' content_license_url = 'https://creativecommons.org/licenses/by/4.0' dyslexic_font = False + crawlers_allowed = [] print('Server running: Eve') - run_daemon(dyslexic_font, + run_daemon(crawlers_allowed, + dyslexic_font, content_license_url, lists_enabled, default_reply_interval_hrs, low_bandwidth, max_like_count, @@ -1129,8 +1135,10 @@ def create_server_group(path: str, domain: str, port: int, lists_enabled = '' content_license_url = 'https://creativecommons.org/licenses/by/4.0' dyslexic_font = False + crawlers_allowed = [] print('Server running: Group') - run_daemon(dyslexic_font, + run_daemon(crawlers_allowed, + dyslexic_font, content_license_url, lists_enabled, default_reply_interval_hrs, low_bandwidth, max_like_count,