Change variable name to avoid confusion

merge-requests/30/head
Bob Mottram 2022-03-06 14:20:25 +00:00
parent 5f1f973d85
commit 5b218919b3
2 changed files with 30 additions and 30 deletions

View File

@ -52,22 +52,22 @@ def update_known_crawlers(ua_str: str,
return curr_time return curr_time
def load_known_web_crawlers(base_dir: str) -> []: def load_known_web_bots(base_dir: str) -> []:
"""Returns a list of known web crawlers """Returns a list of known web bots
""" """
known_crawlers_filename = base_dir + '/accounts/known_web_bots.txt' known_bots_filename = base_dir + '/accounts/known_web_bots.txt'
if not os.path.isfile(known_crawlers_filename): if not os.path.isfile(known_bots_filename):
return [] return []
crawlers_str = None crawlers_str = None
try: try:
with open(known_crawlers_filename, 'r') as fp_crawlers: with open(known_bots_filename, 'r') as fp_crawlers:
crawlers_str = fp_crawlers.read() crawlers_str = fp_crawlers.read()
except OSError: except OSError:
print('EX: unable to load web crawlers from ' + print('EX: unable to load web bots from ' +
known_crawlers_filename) known_bots_filename)
if not crawlers_str: if not crawlers_str:
return [] return []
known_crawlers = [] known_bots = []
crawlers_list = crawlers_str.split('\n') crawlers_list = crawlers_str.split('\n')
for crawler in crawlers_list: for crawler in crawlers_list:
if not crawler: if not crawler:
@ -75,24 +75,24 @@ def load_known_web_crawlers(base_dir: str) -> []:
crawler = crawler.replace('\n', '').strip() crawler = crawler.replace('\n', '').strip()
if not crawler: if not crawler:
continue continue
if crawler not in known_crawlers: if crawler not in known_bots:
known_crawlers.append(crawler) known_bots.append(crawler)
return known_crawlers return known_bots
def _save_known_web_crawlers(base_dir: str, known_crawlers: []) -> bool: def _save_known_web_bots(base_dir: str, known_bots: []) -> bool:
"""Saves a list of known web crawlers """Saves a list of known web bots
""" """
known_crawlers_filename = base_dir + '/accounts/known_web_bots.txt' known_bots_filename = base_dir + '/accounts/known_web_bots.txt'
known_crawlers_str = '' known_bots_str = ''
for crawler in known_crawlers: for crawler in known_bots:
known_crawlers_str += crawler.strip() + '\n' known_bots_str += crawler.strip() + '\n'
try: try:
with open(known_crawlers_filename, 'w+') as fp_crawlers: with open(known_bots_filename, 'w+') as fp_crawlers:
fp_crawlers.write(known_crawlers_str) fp_crawlers.write(known_bots_str)
except OSError: except OSError:
print("EX: unable to save known web crawlers to " + print("EX: unable to save known web bots to " +
known_crawlers_filename) known_bots_filename)
return False return False
return True return True
@ -105,7 +105,7 @@ def blocked_user_agent(calling_domain: str, agent_str: str,
blocked_cache: [], blocked_cache: [],
blocked_cache_update_secs: int, blocked_cache_update_secs: int,
crawlers_allowed: [], crawlers_allowed: [],
known_crawlers: []): known_bots: []):
"""Should a GET or POST be blocked based upon its user agent? """Should a GET or POST be blocked based upon its user agent?
""" """
if not agent_str: if not agent_str:
@ -123,10 +123,10 @@ def blocked_user_agent(calling_domain: str, agent_str: str,
# is this a web crawler? If so then block it by default # is this a web crawler? If so then block it by default
# unless this is a news instance or if it is in the allowed list # unless this is a news instance or if it is in the allowed list
if 'bot/' in agent_str_lower or 'bot-' in agent_str_lower: if 'bot/' in agent_str_lower or 'bot-' in agent_str_lower:
if agent_str_lower not in known_crawlers: if agent_str_lower not in known_bots:
known_crawlers.append(agent_str_lower) known_bots.append(agent_str_lower)
known_crawlers.sort() known_bots.sort()
_save_known_web_crawlers(base_dir, known_crawlers) _save_known_web_bots(base_dir, known_bots)
# if this is a news instance then we want it # if this is a news instance then we want it
# to be indexed by search engines # to be indexed by search engines
if news_instance: if news_instance:

View File

@ -380,7 +380,7 @@ from siteactive import referer_is_active
from webapp_likers import html_likers_of_post from webapp_likers import html_likers_of_post
from crawlers import update_known_crawlers from crawlers import update_known_crawlers
from crawlers import blocked_user_agent from crawlers import blocked_user_agent
from crawlers import load_known_web_crawlers from crawlers import load_known_web_bots
import os import os
@ -14010,7 +14010,7 @@ class PubServer(BaseHTTPRequestHandler):
self.server.blocked_cache, self.server.blocked_cache,
self.server.blocked_cache_update_secs, self.server.blocked_cache_update_secs,
self.server.crawlers_allowed, self.server.crawlers_allowed,
self.server.known_crawlers) self.server.known_bots)
if block: if block:
self._400() self._400()
return return
@ -18553,7 +18553,7 @@ class PubServer(BaseHTTPRequestHandler):
self.server.blocked_cache, self.server.blocked_cache,
self.server.blocked_cache_update_secs, self.server.blocked_cache_update_secs,
self.server.crawlers_allowed, self.server.crawlers_allowed,
self.server.known_crawlers) self.server.known_bots)
if block: if block:
self._400() self._400()
self.server.postreq_busy = False self.server.postreq_busy = False
@ -19670,7 +19670,7 @@ def run_daemon(crawlers_allowed: [],
httpd.crawlers_allowed = crawlers_allowed httpd.crawlers_allowed = crawlers_allowed
# list of web crawlers known to the system # list of web crawlers known to the system
httpd.known_crawlers = load_known_web_crawlers(base_dir) httpd.known_bots = load_known_web_bots(base_dir)
httpd.unit_test = unit_test httpd.unit_test = unit_test
httpd.allow_local_network_access = allow_local_network_access httpd.allow_local_network_access = allow_local_network_access