diff --git a/crawlers.py b/crawlers.py index 5aec5e51d..9176a22b2 100644 --- a/crawlers.py +++ b/crawlers.py @@ -125,7 +125,9 @@ def blocked_user_agent(calling_domain: str, agent_str: str, if agent_str: # is this a web crawler? If so then block it by default # unless this is a news instance or if it is in the allowed list - bot_strings = ('bot/', 'bot-', '/bot', '/robot', 'gptbot') + bot_strings = ('bot/', 'bot-', '/bot', '/robot', 'gptbot', + '-ai/', ' ai/', '-ai ', ' ai ', 'spider/', + 'externalhit/') contains_bot_string = False for bot_str in bot_strings: if bot_str in agent_str_lower: