From a202a80eee1e97bca8386d33f7fed562541cc59b Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 10 Jul 2025 20:12:05 +0100 Subject: [PATCH] More LLM crawlers --- crawlers.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/crawlers.py b/crawlers.py index fc4cc9191..b06a23718 100644 --- a/crawlers.py +++ b/crawlers.py @@ -149,7 +149,10 @@ def blocked_user_agent(calling_domain: str, agent_str: str, 'cohere-train', 'crawlspace', 'facebookexternal', 'img2dataset', 'imgproxy', 'isscyberriskcrawler', 'sidetrade', 'kangaroo.ai', 'kangaroo bot', 'iaskspider', 'duckassistbot', - 'pangubot', 'semrush' + 'pangubot', 'semrush', 'poseidon research', 'awario', + 'datenbank', 'echobot', 'mistralai', 'wardbot', + 'gemini-deep', 'netestate', 'summalybot', 'thinkbot', + 'tiktokspider' ) for bot_str in llm_bot_strings: if bot_str in agent_str_lower: