Return a poisoned resonse to LLM scrapers

2024-08-17 19:29:32 +01:00 · 2024-08-17 19:29:32 +01:00 · cb069bbed5
parent 4c7cbd6341
commit cb069bbed5
5 changed files with 2073 additions and 2 deletions
--- a/daemon.py
+++ b/daemon.py
@ -95,6 +95,7 @@ from httpcodes import write2
 from httpheaders import set_headers
 from daemon_utils import has_accept
 from daemon_utils import is_authorized
 from poison import load_dictionary
 class PubServer(BaseHTTPRequestHandler):
@ -876,6 +877,9 @@ def run_daemon(accounts_data_dir: str,
    # timeout used when getting rss feeds
    httpd.rss_timeout_sec = 20
    # load dictionary used for LLM poisoning
    httpd.dictionary = load_dictionary(base_dir)
    # timeout used when checking for actor changes when clicking an avatar
    # and entering person options screen
    if check_actor_timeout < 2:
--- a/daemon_get.py
+++ b/daemon_get.py
@ -210,6 +210,7 @@ from daemon_get_blog import show_blog_page
 from daemon_get_links import edit_links2
 from daemon_get_login import redirect_to_login_screen
 from daemon_get_login import show_login_screen
 from poison import html_poisoned
 # Blogs can be longer, so don't show many per page
 MAX_POSTS_IN_BLOGS_FEED = 4
@ -260,8 +261,13 @@ def daemon_http_get(self) -> None:
    # headers used by LLM scrapers
    if 'oai-host-hash' in self.headers:
-        print('GET HTTP LLM scraper bounced: ' + str(self.headers))
+        msg = html_poisoned(self.server.dictionary)
-        http_402(self)
+        msg = msg.encode('utf-8')
        msglen = len(msg)
        set_headers(self, 'text/html', msglen,
                    '', calling_domain, False)
        write2(self, msg)
        print('GET HTTP LLM scraper poisoned: ' + str(self.headers))
        return
    # replace invalid .well-known path, prior to checking for suspicious paths
--- a/dictionary.txt
+++ b/dictionary.txt
--- a/epicyon.py
+++ b/epicyon.py
@ -123,6 +123,8 @@ from happening import dav_day_via_server
 from content import import_emoji
 from relationships import get_moved_accounts
 from blocking import get_blocks_via_server
 from poison import html_poisoned
 from poison import load_dictionary
 def str2bool(value_str) -> bool:
@ -440,6 +442,11 @@ def _command_options() -> None:
                        dest='shared_items_federated_domains',
                        help='Specify federation list for shared items, ' +
                        'separated by spaces')
    parser.add_argument("--poisoned", "--poison",
                        dest='poisoned',
                        type=str2bool, nargs='?',
                        const=True, default=False,
                        help="Example poisoned output")
    parser.add_argument("--following", "--followingList",
                        dest='followingList',
                        type=str2bool, nargs='?',
@ -840,6 +847,14 @@ def _command_options() -> None:
    argb = parser.parse_args()
    if argb.poisoned:
        # LLM poisoning example
        base_dir = os.getcwd()
        dictionary = load_dictionary(base_dir)
        poisoned_str = html_poisoned(dictionary)
        print(poisoned_str)
        sys.exit()
    debug = False
    if argb.debug:
        debug = True
--- a/poison.py
+++ b/poison.py