From 515b9243cc467005c2dc74e668520db34fd17bae Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 18 Aug 2024 10:44:51 +0100 Subject: [PATCH] Log before setting headers --- daemon_get.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/daemon_get.py b/daemon_get.py index b60a6403c..3bd2a1580 100644 --- a/daemon_get.py +++ b/daemon_get.py @@ -263,13 +263,13 @@ def daemon_http_get(self) -> None: # oai-host-hash requests come from Microsoft Corporation, # which has a long term partnership with OpenAI if 'oai-host-hash' in self.headers: + print('GET HTTP LLM scraper poisoned: ' + str(self.headers)) msg = html_poisoned(self.server.dictionary) msg = msg.encode('utf-8') msglen = len(msg) set_headers(self, 'text/html', msglen, '', calling_domain, False) write2(self, msg) - print('GET HTTP LLM scraper poisoned: ' + str(self.headers)) return # replace invalid .well-known path, prior to checking for suspicious paths @@ -336,13 +336,13 @@ def daemon_http_get(self) -> None: if block: if llm: # if this is an LLM crawler then feed it some trash + print('GET HTTP LLM scraper poisoned: ' + str(self.headers)) msg = html_poisoned(self.server.dictionary) msg = msg.encode('utf-8') msglen = len(msg) set_headers(self, 'text/html', msglen, '', calling_domain, False) write2(self, msg) - print('GET HTTP LLM scraper poisoned: ' + str(self.headers)) return http_400(self) return