Reject header used by LLM scrapers

main
Bob Mottram 2024-07-21 10:52:03 +01:00
parent a88aecf564
commit 47ec4dfccb
2 changed files with 12 additions and 0 deletions

View File

@ -256,6 +256,12 @@ def daemon_http_get(self) -> None:
http_402(self)
return
# headers used by LLM scrapers
if 'oai-host-hash' in self.headers:
print('GET HTTP LLM scraper bounced: ' + str(self.headers))
http_402(self)
return
if contains_invalid_chars(str(self.headers)):
print('GET HTTP headers contain invalid characters ' +
str(self.headers))

View File

@ -92,6 +92,12 @@ def daemon_http_post(self) -> None:
self.server.headers_catalog,
self.headers)
# headers used by LLM scrapers
if 'oai-host-hash' in self.headers:
print('POST HTTP LLM scraper bounced: ' + str(self.headers))
http_402(self)
return
calling_domain = self.server.domain_full
if self.headers.get('Host'):
calling_domain = decoded_host(self.headers['Host'])