mirror of https://gitlab.com/bashrc2/epicyon
Return 404 for images requested by LLM crawlers
parent
434234412a
commit
e200f822d3
|
@ -273,6 +273,9 @@ def daemon_http_get(self) -> None:
|
||||||
# oai-host-hash requests come from Microsoft Corporation,
|
# oai-host-hash requests come from Microsoft Corporation,
|
||||||
# which has a long term partnership with OpenAI
|
# which has a long term partnership with OpenAI
|
||||||
if 'oai-host-hash' in self.headers:
|
if 'oai-host-hash' in self.headers:
|
||||||
|
if is_image_file(self.path):
|
||||||
|
http_404(self)
|
||||||
|
return
|
||||||
print('GET HTTP LLM scraper poisoned: ' + str(self.headers))
|
print('GET HTTP LLM scraper poisoned: ' + str(self.headers))
|
||||||
msg = html_poisoned(self.server.dictionary,
|
msg = html_poisoned(self.server.dictionary,
|
||||||
self.server.twograms)
|
self.server.twograms)
|
||||||
|
@ -346,6 +349,9 @@ def daemon_http_get(self) -> None:
|
||||||
self.path, self.server.block_military)
|
self.path, self.server.block_military)
|
||||||
if block:
|
if block:
|
||||||
if llm:
|
if llm:
|
||||||
|
if is_image_file(self.path):
|
||||||
|
http_404(self)
|
||||||
|
return
|
||||||
# if this is an LLM crawler then feed it some trash
|
# if this is an LLM crawler then feed it some trash
|
||||||
print('GET HTTP LLM scraper poisoned: ' + str(self.headers))
|
print('GET HTTP LLM scraper poisoned: ' + str(self.headers))
|
||||||
msg = html_poisoned(self.server.dictionary,
|
msg = html_poisoned(self.server.dictionary,
|
||||||
|
|
Loading…
Reference in New Issue