mirror of https://gitlab.com/bashrc2/epicyon
Tidying of user agent blocks
parent
7f0d897299
commit
3aab054c04
82
daemon.py
82
daemon.py
|
@ -379,6 +379,7 @@ from fitnessFunctions import html_watch_points_graph
|
|||
from siteactive import referer_is_active
|
||||
from webapp_likers import html_likers_of_post
|
||||
from crawlers import update_known_crawlers
|
||||
from crawlers import blocked_user_agent
|
||||
import os
|
||||
|
||||
|
||||
|
@ -560,65 +561,6 @@ class PubServer(BaseHTTPRequestHandler):
|
|||
else:
|
||||
print('ERROR: unable to create vote')
|
||||
|
||||
def _blocked_user_agent(self, calling_domain: str, agent_str: str) -> bool:
|
||||
"""Should a GET or POST be blocked based upon its user agent?
|
||||
"""
|
||||
if not agent_str:
|
||||
return False
|
||||
|
||||
agent_str_lower = agent_str.lower()
|
||||
default_agent_blocks = [
|
||||
'fedilist'
|
||||
]
|
||||
for ua_block in default_agent_blocks:
|
||||
if ua_block in agent_str_lower:
|
||||
print('Blocked User agent: ' + ua_block)
|
||||
return True
|
||||
|
||||
agent_domain = None
|
||||
|
||||
if agent_str:
|
||||
# is this a web crawler? If so the block it
|
||||
if 'bot/' in agent_str_lower or 'bot-' in agent_str_lower:
|
||||
if self.server.news_instance:
|
||||
return False
|
||||
print('Blocked Crawler: ' + agent_str)
|
||||
return True
|
||||
# get domain name from User-Agent
|
||||
agent_domain = user_agent_domain(agent_str, self.server.debug)
|
||||
else:
|
||||
# no User-Agent header is present
|
||||
return True
|
||||
|
||||
# is the User-Agent type blocked? eg. "Mastodon"
|
||||
if self.server.user_agents_blocked:
|
||||
blocked_ua = False
|
||||
for agent_name in self.server.user_agents_blocked:
|
||||
if agent_name in agent_str:
|
||||
blocked_ua = True
|
||||
break
|
||||
if blocked_ua:
|
||||
return True
|
||||
|
||||
if not agent_domain:
|
||||
return False
|
||||
|
||||
# is the User-Agent domain blocked
|
||||
blocked_ua = False
|
||||
if not agent_domain.startswith(calling_domain):
|
||||
self.server.blocked_cache_last_updated = \
|
||||
update_blocked_cache(self.server.base_dir,
|
||||
self.server.blocked_cache,
|
||||
self.server.blocked_cache_last_updated,
|
||||
self.server.blocked_cache_update_secs)
|
||||
|
||||
blocked_ua = is_blocked_domain(self.server.base_dir, agent_domain,
|
||||
self.server.blocked_cache)
|
||||
# if self.server.debug:
|
||||
if blocked_ua:
|
||||
print('Blocked User agent: ' + agent_domain)
|
||||
return blocked_ua
|
||||
|
||||
def _request_csv(self) -> bool:
|
||||
"""Should a csv response be given?
|
||||
"""
|
||||
|
@ -14033,7 +13975,16 @@ class PubServer(BaseHTTPRequestHandler):
|
|||
ua_str = self._get_user_agent()
|
||||
|
||||
if not self._permitted_crawler_path(self.path):
|
||||
if self._blocked_user_agent(calling_domain, ua_str):
|
||||
block, self.server.blocked_cache_last_updated = \
|
||||
blocked_user_agent(calling_domain, ua_str,
|
||||
self.server.news_instance,
|
||||
self.server.debug,
|
||||
self.server.user_agents_blocked,
|
||||
self.server.blocked_cache_last_updated,
|
||||
self.server.base_dir,
|
||||
self.server.blocked_cache,
|
||||
self.server.blocked_cache_update_secs)
|
||||
if block:
|
||||
self._400()
|
||||
return
|
||||
|
||||
|
@ -18565,7 +18516,16 @@ class PubServer(BaseHTTPRequestHandler):
|
|||
|
||||
ua_str = self._get_user_agent()
|
||||
|
||||
if self._blocked_user_agent(calling_domain, ua_str):
|
||||
block, self.server.blocked_cache_last_updated = \
|
||||
blocked_user_agent(calling_domain, ua_str,
|
||||
self.server.news_instance,
|
||||
self.server.debug,
|
||||
self.server.user_agents_blocked,
|
||||
self.server.blocked_cache_last_updated,
|
||||
self.server.base_dir,
|
||||
self.server.blocked_cache,
|
||||
self.server.blocked_cache_update_secs)
|
||||
if block:
|
||||
self._400()
|
||||
self.server.postreq_busy = False
|
||||
return
|
||||
|
|
Loading…
Reference in New Issue