mirror of https://gitlab.com/bashrc2/epicyon
Less indentation
parent
6a9aea9a3c
commit
a215fe2e6a
31
crawlers.py
31
crawlers.py
|
@ -126,21 +126,22 @@ def blocked_user_agent(calling_domain: str, agent_str: str,
|
|||
if agent_str:
|
||||
# is this a web crawler? If so then block it by default
|
||||
# unless this is a news instance or if it is in the allowed list
|
||||
bot_strings = ('bot/', 'bot-', '/bot', '_bot', 'bot_', 'bot;', ' bot ',
|
||||
'/robot', 'gptbot', '-ai/', ' ai/', '-ai ',
|
||||
' ai ', 'spider/', 'spider.ht', '/spider.', '-spider',
|
||||
'externalhit/', 'chatgpt', 'google', 'anthropic',
|
||||
'facebook', 'slurp', 'crawler', 'crawling', ' crawl ',
|
||||
'gigablast', 'archive.org', 'httrack',
|
||||
'spider-', ' spider ', 'findlink', 'ips-agent',
|
||||
'woriobot', 'mlbot', 'webbot', 'webcrawl',
|
||||
'voilabot', 'rank/', 'ezooms', 'heritrix', 'indeedbot',
|
||||
'woobot', 'infobot', 'viewbot', 'swimgbot', 'eright',
|
||||
'apercite', 'bot (', 'summify', 'ccbot', 'linkfind',
|
||||
'linkanalyze', 'analyzer', 'wotbox', 'ichiro',
|
||||
'drupact', 'searchengine', 'coccoc',
|
||||
'explorer/', 'explorer;', 'crystalsemantics',
|
||||
'scraper/', ' scraper ', ' scrape ', 'scraping')
|
||||
bot_strings = (
|
||||
'bot/', 'bot-', '/bot', '_bot', 'bot_', 'bot;', ' bot ',
|
||||
'/robot', 'gptbot', '-ai/', ' ai/', '-ai ',
|
||||
' ai ', 'spider/', 'spider.ht', '/spider.', '-spider',
|
||||
'externalhit/', 'chatgpt', 'google', 'anthropic',
|
||||
'facebook', 'slurp', 'crawler', 'crawling', ' crawl ',
|
||||
'gigablast', 'archive.org', 'httrack',
|
||||
'spider-', ' spider ', 'findlink', 'ips-agent',
|
||||
'woriobot', 'mlbot', 'webbot', 'webcrawl',
|
||||
'voilabot', 'rank/', 'ezooms', 'heritrix', 'indeedbot',
|
||||
'woobot', 'infobot', 'viewbot', 'swimgbot', 'eright',
|
||||
'apercite', 'bot (', 'summify', 'ccbot', 'linkfind',
|
||||
'linkanalyze', 'analyzer', 'wotbox', 'ichiro',
|
||||
'drupact', 'searchengine', 'coccoc',
|
||||
'explorer/', 'explorer;', 'crystalsemantics',
|
||||
'scraper/', ' scraper ', ' scrape ', 'scraping')
|
||||
contains_bot_string = False
|
||||
for bot_str in bot_strings:
|
||||
if bot_str in agent_str_lower:
|
||||
|
|
Loading…
Reference in New Issue