mirror of https://gitlab.com/bashrc2/epicyon
Tidying of user agent blocks
parent
7f0d897299
commit
3aab054c04
82
daemon.py
82
daemon.py
|
@ -379,6 +379,7 @@ from fitnessFunctions import html_watch_points_graph
|
||||||
from siteactive import referer_is_active
|
from siteactive import referer_is_active
|
||||||
from webapp_likers import html_likers_of_post
|
from webapp_likers import html_likers_of_post
|
||||||
from crawlers import update_known_crawlers
|
from crawlers import update_known_crawlers
|
||||||
|
from crawlers import blocked_user_agent
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
@ -560,65 +561,6 @@ class PubServer(BaseHTTPRequestHandler):
|
||||||
else:
|
else:
|
||||||
print('ERROR: unable to create vote')
|
print('ERROR: unable to create vote')
|
||||||
|
|
||||||
def _blocked_user_agent(self, calling_domain: str, agent_str: str) -> bool:
|
|
||||||
"""Should a GET or POST be blocked based upon its user agent?
|
|
||||||
"""
|
|
||||||
if not agent_str:
|
|
||||||
return False
|
|
||||||
|
|
||||||
agent_str_lower = agent_str.lower()
|
|
||||||
default_agent_blocks = [
|
|
||||||
'fedilist'
|
|
||||||
]
|
|
||||||
for ua_block in default_agent_blocks:
|
|
||||||
if ua_block in agent_str_lower:
|
|
||||||
print('Blocked User agent: ' + ua_block)
|
|
||||||
return True
|
|
||||||
|
|
||||||
agent_domain = None
|
|
||||||
|
|
||||||
if agent_str:
|
|
||||||
# is this a web crawler? If so the block it
|
|
||||||
if 'bot/' in agent_str_lower or 'bot-' in agent_str_lower:
|
|
||||||
if self.server.news_instance:
|
|
||||||
return False
|
|
||||||
print('Blocked Crawler: ' + agent_str)
|
|
||||||
return True
|
|
||||||
# get domain name from User-Agent
|
|
||||||
agent_domain = user_agent_domain(agent_str, self.server.debug)
|
|
||||||
else:
|
|
||||||
# no User-Agent header is present
|
|
||||||
return True
|
|
||||||
|
|
||||||
# is the User-Agent type blocked? eg. "Mastodon"
|
|
||||||
if self.server.user_agents_blocked:
|
|
||||||
blocked_ua = False
|
|
||||||
for agent_name in self.server.user_agents_blocked:
|
|
||||||
if agent_name in agent_str:
|
|
||||||
blocked_ua = True
|
|
||||||
break
|
|
||||||
if blocked_ua:
|
|
||||||
return True
|
|
||||||
|
|
||||||
if not agent_domain:
|
|
||||||
return False
|
|
||||||
|
|
||||||
# is the User-Agent domain blocked
|
|
||||||
blocked_ua = False
|
|
||||||
if not agent_domain.startswith(calling_domain):
|
|
||||||
self.server.blocked_cache_last_updated = \
|
|
||||||
update_blocked_cache(self.server.base_dir,
|
|
||||||
self.server.blocked_cache,
|
|
||||||
self.server.blocked_cache_last_updated,
|
|
||||||
self.server.blocked_cache_update_secs)
|
|
||||||
|
|
||||||
blocked_ua = is_blocked_domain(self.server.base_dir, agent_domain,
|
|
||||||
self.server.blocked_cache)
|
|
||||||
# if self.server.debug:
|
|
||||||
if blocked_ua:
|
|
||||||
print('Blocked User agent: ' + agent_domain)
|
|
||||||
return blocked_ua
|
|
||||||
|
|
||||||
def _request_csv(self) -> bool:
|
def _request_csv(self) -> bool:
|
||||||
"""Should a csv response be given?
|
"""Should a csv response be given?
|
||||||
"""
|
"""
|
||||||
|
@ -14033,7 +13975,16 @@ class PubServer(BaseHTTPRequestHandler):
|
||||||
ua_str = self._get_user_agent()
|
ua_str = self._get_user_agent()
|
||||||
|
|
||||||
if not self._permitted_crawler_path(self.path):
|
if not self._permitted_crawler_path(self.path):
|
||||||
if self._blocked_user_agent(calling_domain, ua_str):
|
block, self.server.blocked_cache_last_updated = \
|
||||||
|
blocked_user_agent(calling_domain, ua_str,
|
||||||
|
self.server.news_instance,
|
||||||
|
self.server.debug,
|
||||||
|
self.server.user_agents_blocked,
|
||||||
|
self.server.blocked_cache_last_updated,
|
||||||
|
self.server.base_dir,
|
||||||
|
self.server.blocked_cache,
|
||||||
|
self.server.blocked_cache_update_secs)
|
||||||
|
if block:
|
||||||
self._400()
|
self._400()
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -18565,7 +18516,16 @@ class PubServer(BaseHTTPRequestHandler):
|
||||||
|
|
||||||
ua_str = self._get_user_agent()
|
ua_str = self._get_user_agent()
|
||||||
|
|
||||||
if self._blocked_user_agent(calling_domain, ua_str):
|
block, self.server.blocked_cache_last_updated = \
|
||||||
|
blocked_user_agent(calling_domain, ua_str,
|
||||||
|
self.server.news_instance,
|
||||||
|
self.server.debug,
|
||||||
|
self.server.user_agents_blocked,
|
||||||
|
self.server.blocked_cache_last_updated,
|
||||||
|
self.server.base_dir,
|
||||||
|
self.server.blocked_cache,
|
||||||
|
self.server.blocked_cache_update_secs)
|
||||||
|
if block:
|
||||||
self._400()
|
self._400()
|
||||||
self.server.postreq_busy = False
|
self.server.postreq_busy = False
|
||||||
return
|
return
|
||||||
|
|
Loading…
Reference in New Issue