Return 404 for images requested by LLM crawlers

main
Bob Mottram 2024-08-20 10:08:04 +01:00
parent 434234412a
commit e200f822d3
1 changed files with 6 additions and 0 deletions

View File

@ -273,6 +273,9 @@ def daemon_http_get(self) -> None:
# oai-host-hash requests come from Microsoft Corporation,
# which has a long term partnership with OpenAI
if 'oai-host-hash' in self.headers:
if is_image_file(self.path):
http_404(self)
return
print('GET HTTP LLM scraper poisoned: ' + str(self.headers))
msg = html_poisoned(self.server.dictionary,
self.server.twograms)
@ -346,6 +349,9 @@ def daemon_http_get(self) -> None:
self.path, self.server.block_military)
if block:
if llm:
if is_image_file(self.path):
http_404(self)
return
# if this is an LLM crawler then feed it some trash
print('GET HTTP LLM scraper poisoned: ' + str(self.headers))
msg = html_poisoned(self.server.dictionary,