More LLM crawlers

main
Bob Mottram 2025-07-10 20:12:05 +01:00
parent 9a223b69ae
commit a202a80eee
1 changed files with 4 additions and 1 deletions

View File

@ -149,7 +149,10 @@ def blocked_user_agent(calling_domain: str, agent_str: str,
'cohere-train', 'crawlspace', 'facebookexternal', 'cohere-train', 'crawlspace', 'facebookexternal',
'img2dataset', 'imgproxy', 'isscyberriskcrawler', 'sidetrade', 'img2dataset', 'imgproxy', 'isscyberriskcrawler', 'sidetrade',
'kangaroo.ai', 'kangaroo bot', 'iaskspider', 'duckassistbot', 'kangaroo.ai', 'kangaroo bot', 'iaskspider', 'duckassistbot',
'pangubot', 'semrush' 'pangubot', 'semrush', 'poseidon research', 'awario',
'datenbank', 'echobot', 'mistralai', 'wardbot',
'gemini-deep', 'netestate', 'summalybot', 'thinkbot',
'tiktokspider'
) )
for bot_str in llm_bot_strings: for bot_str in llm_bot_strings:
if bot_str in agent_str_lower: if bot_str in agent_str_lower: