mirror of https://gitlab.com/bashrc2/epicyon
More LLM crawlers
parent
60ea8bfbec
commit
d4084f6e9e
|
@ -139,7 +139,8 @@ def blocked_user_agent(calling_domain: str, agent_str: str,
|
||||||
'petalbot', 'ai2bot', 'allenai', 'firecrawl', 'friendlycrawler',
|
'petalbot', 'ai2bot', 'allenai', 'firecrawl', 'friendlycrawler',
|
||||||
'googleother', 'icc-crawler', 'scrapy', 'timpibot',
|
'googleother', 'icc-crawler', 'scrapy', 'timpibot',
|
||||||
'velenpublic', 'webzio-extended', 'cohere-ai', 'facebookexternal',
|
'velenpublic', 'webzio-extended', 'cohere-ai', 'facebookexternal',
|
||||||
'img2dataset'
|
'img2dataset', 'isscyberriskcrawler', 'sidetrade', 'kangaroo',
|
||||||
|
'iaskspider'
|
||||||
)
|
)
|
||||||
for bot_str in llm_bot_strings:
|
for bot_str in llm_bot_strings:
|
||||||
if bot_str in agent_str_lower:
|
if bot_str in agent_str_lower:
|
||||||
|
|
Loading…
Reference in New Issue