Block corporate LLM scrapers

main
Bob Mottram 2023-10-12 15:34:49 +01:00
parent e1ff487aaf
commit 9ccc3af807
2 changed files with 24 additions and 0 deletions

View File

@ -300,6 +300,7 @@ from languages import set_actor_languages
from languages import get_understood_languages
from like import update_likes_collection
from reaction import update_reaction_collection
from utils import corp_servers
from utils import get_attributed_to
from utils import get_memorials
from utils import set_memorials
@ -1243,6 +1244,15 @@ class PubServer(BaseHTTPRequestHandler):
self._http_return_code(401, 'Unauthorized',
post_msg, None)
def _402(self, post_msg: str) -> None:
if self.server.translate:
ok_str = self.server.translate[post_msg]
self._http_return_code(402, self.server.translate['Unauthorized'],
ok_str, None)
else:
self._http_return_code(402, 'Unauthorized',
post_msg, None)
def _201(self, etag: str) -> None:
if self.server.translate:
done_str = self.server.translate['It is done']
@ -17233,6 +17243,13 @@ class PubServer(BaseHTTPRequestHandler):
calling_domain = self.server.domain_full
if self.headers.get('Server'):
if self.headers['Server'] in corp_servers():
self._402("If you are a BigTech corp trying to steal " +
"data then it's time to see the color of " +
"your money")
return
if self.headers.get('Host'):
calling_domain = decoded_host(self.headers['Host'])
if self.server.onion_domain:

View File

@ -4657,3 +4657,10 @@ def lines_in_file(filename: str) -> int:
except OSError:
print('EX: lines_in_file error reading ' + filename)
return 0
def corp_servers() -> ():
"""Returns a list of despised corporate thieves
"""
return ('GitHub.com', 'github.com', 'cloudflare', 'microsoft.com',
'google.com')