From 62b893dd8d403241d30d178a4735487d7eb1c539 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Fri, 12 Jan 2024 12:03:44 +0000 Subject: [PATCH] Block some ai crawlers --- crawlers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crawlers.py b/crawlers.py index 5aec5e51d..9176a22b2 100644 --- a/crawlers.py +++ b/crawlers.py @@ -125,7 +125,9 @@ def blocked_user_agent(calling_domain: str, agent_str: str, if agent_str: # is this a web crawler? If so then block it by default # unless this is a news instance or if it is in the allowed list - bot_strings = ('bot/', 'bot-', '/bot', '/robot', 'gptbot') + bot_strings = ('bot/', 'bot-', '/bot', '/robot', 'gptbot', + '-ai/', ' ai/', '-ai ', ' ai ', 'spider/', + 'externalhit/') contains_bot_string = False for bot_str in bot_strings: if bot_str in agent_str_lower: