Extra LLM crawlers

main
Bob Mottram 2024-09-01 21:05:34 +01:00
parent 0ec43c6323
commit 60c15d2358
1 changed files with 5 additions and 2 deletions

View File

@ -133,9 +133,12 @@ def blocked_user_agent(calling_domain: str, agent_str: str,
'gptbot', '-ai/', ' ai/', '-ai ', ' ai ', 'chatgpt',
'anthropic', 'mlbot', 'claude-web', 'claudebot', 'ccbot',
'facebookbot', 'google-extended', 'piplbot', 'oai-search',
'applebot-extended', 'meta-external', 'diffbot', 'perplexitybot',
'applebot', 'meta-external', 'diffbot', 'perplexitybot',
'omgili', 'imagesiftbot', 'bytespider', 'amazonbot', 'youbot',
'petalbot', 'ai2bot', 'allenai'
'petalbot', 'ai2bot', 'allenai', 'firecrawl', 'friendlycrawler',
'googleother', 'icc-crawler', 'scrapy', 'timpibot',
'velenpublic', 'webzio-extended', 'cohere-ai', 'facebookexternal',
'img2dataset'
)
for bot_str in llm_bot_strings:
if bot_str in agent_str_lower: