Check if LLM crawlers are encountered too frequently

main
Bob Mottram 2024-09-01 21:35:33 +01:00
parent 60c15d2358
commit fc7feaf1cd
2 changed files with 14 additions and 1 deletions

View File

@ -687,6 +687,9 @@ def run_daemon(accounts_data_dir: str,
httpd.starting_daemon = True
# the last time when an LLM scraper was replied to
httpd.last_llm_time = None
# width, position and opacity of watermark applied to attached images
# as a percentage of the attached image width
httpd.watermark_width_percent = watermark_width_percent

View File

@ -87,6 +87,7 @@ from httpcodes import http_304
from httpcodes import http_400
from httpcodes import http_503
from httpcodes import write2
from utils import date_utcnow
from utils import replace_strings
from utils import contains_invalid_chars
from utils import save_json
@ -349,8 +350,16 @@ def daemon_http_get(self) -> None:
self.path, self.server.block_military)
if block:
if llm:
# check if LLM is too frequent
if self.server.last_llm_time:
curr_date = date_utcnow()
time_diff = curr_date - self.server.last_llm_time
diff_secs = time_diff.total_seconds()
if diff_secs < 60:
http_402(self)
return
if is_image_file(self.path):
http_404(self, 723)
http_402(self)
return
# if this is an LLM crawler then feed it some trash
print('GET HTTP LLM scraper poisoned: ' + str(self.headers))
@ -361,6 +370,7 @@ def daemon_http_get(self) -> None:
set_headers(self, 'text/html', msglen,
'', calling_domain, False)
write2(self, msg)
self.server.last_llm_time = date_utcnow()
return
http_400(self)
return