From 8070d2a27614565fb27bbde72b5b727eca4752a2 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Tue, 8 Mar 2022 12:40:15 +0000 Subject: [PATCH] More robot strings --- crawlers.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/crawlers.py b/crawlers.py index 952f3ffdd..8bdffa1c2 100644 --- a/crawlers.py +++ b/crawlers.py @@ -122,7 +122,13 @@ def blocked_user_agent(calling_domain: str, agent_str: str, if agent_str: # is this a web crawler? If so then block it by default # unless this is a news instance or if it is in the allowed list - if 'bot/' in agent_str_lower or 'bot-' in agent_str_lower: + bot_strings = ('bot/', 'bot-', '/bot', '/robot') + contains_bot_string = False + for bot_str in bot_strings: + if bot_str in agent_str_lower: + contains_bot_string = True + break + if contains_bot_string: if agent_str_lower not in known_bots: known_bots.append(agent_str_lower) known_bots.sort()