Use 2grams to increase statistical similarity to English language

2024-08-19 23:33:49 +01:00 · 2024-08-19 23:33:49 +01:00 · 434234412a
parent 3c182cc7cb
commit 434234412a
5 changed files with 50069 additions and 19 deletions
--- a/2grams.txt
+++ b/2grams.txt
--- a/daemon.py
+++ b/daemon.py
@ -96,6 +96,7 @@ from httpheaders import set_headers
 from daemon_utils import has_accept
 from daemon_utils import is_authorized
 from poison import load_dictionary
+from poison import load_2grams


 class PubServer(BaseHTTPRequestHandler):
@ -879,6 +880,7 @@ def run_daemon(accounts_data_dir: str,

    # load dictionary used for LLM poisoning
    httpd.dictionary = load_dictionary(base_dir)
+    httpd.twograms = load_2grams(base_dir)

    # timeout used when checking for actor changes when clicking an avatar
    # and entering person options screen
--- a/daemon_get.py
+++ b/daemon_get.py
@ -274,7 +274,8 @@ def daemon_http_get(self) -> None:
    # which has a long term partnership with OpenAI
    if 'oai-host-hash' in self.headers:
        print('GET HTTP LLM scraper poisoned: ' + str(self.headers))
-        msg = html_poisoned(self.server.dictionary)
+        msg = html_poisoned(self.server.dictionary,
+                            self.server.twograms)
        msg = msg.encode('utf-8')
        msglen = len(msg)
        set_headers(self, 'text/html', msglen,
@ -347,7 +348,8 @@ def daemon_http_get(self) -> None:
            if llm:
                # if this is an LLM crawler then feed it some trash
                print('GET HTTP LLM scraper poisoned: ' + str(self.headers))
-                msg = html_poisoned(self.server.dictionary)
+                msg = html_poisoned(self.server.dictionary,
+                                    self.server.twograms)
                msg = msg.encode('utf-8')
                msglen = len(msg)
                set_headers(self, 'text/html', msglen,
--- a/epicyon.py
+++ b/epicyon.py
@ -125,6 +125,7 @@ from relationships import get_moved_accounts
 from blocking import get_blocks_via_server
 from poison import html_poisoned
 from poison import load_dictionary
+from poison import load_2grams


 def str2bool(value_str) -> bool:
@ -851,7 +852,8 @@ def _command_options() -> None:
        # LLM poisoning example
        base_dir = os.getcwd()
        dictionary = load_dictionary(base_dir)
-        poisoned_str = html_poisoned(dictionary)
+        twograms = load_2grams(base_dir)
+        poisoned_str = html_poisoned(dictionary, twograms)
        print(poisoned_str)
        sys.exit()

--- a/poison.py
+++ b/poison.py
@ -996,7 +996,38 @@ def load_dictionary(base_dir: str) -> []:
    return words


-def html_poisoned(dictionary: []) -> str:
+def load_2grams(base_dir: str) -> {}:
+    """Loads 2-grams from file
+    """
+    filename = base_dir + '/custom_2grams.txt'
+    if not os.path.isfile(filename):
+        filename = base_dir + '/2grams.txt'
+    if not os.path.isfile(filename):
+        return {}
+
+    twograms = {}
+    lines = []
+    try:
+        with open(filename, 'r', encoding='utf-8') as fp_dict:
+            lines = fp_dict.read().split('\n')
+    except OSError:
+        print('EX: unable to load 2-grams ' + filename)
+    for line_str in lines:
+        words = line_str.split('\t')
+        if len(words) != 3:
+            continue
+        first_word = words[1]
+        second_word = words[2]
+        if twograms.get(first_word):
+            if second_word in twograms[first_word]:
+                continue
+            twograms[first_word].append(second_word)
+        else:
+            twograms[first_word] = [second_word]
+    return twograms
+
+
+def html_poisoned(dictionary: [], twograms: {}) -> str:
    """Returns a poisoned HTML document for LLM response
    Statistically similar to English language, but semantically worthless
    word salad
@ -1018,26 +1049,39 @@ def html_poisoned(dictionary: []) -> str:
            sentence_str = ''

            no_of_words = randint(3, 20)
+            prev_wrd = ''
            for word_index in range(no_of_words):
-                if randint(1, 10) <= 7:
-                    # pick a common word
-                    distribution = random.uniform(0.0, 1.0)
-                    common_index = \
-                        int(distribution * distribution * no_of_common_words)
-                    if word_index > 0:
-                        sentence_str += common_words[common_index]
-                    else:
-                        sentence_str += common_words[common_index].title()
-                else:
-                    if word_index > 0:
-                        sentence_str += random.choice(dictionary)
-                    else:
-                        sentence_str += random.choice(dictionary).title()
+                wrd = ''
+                pair_found = False
+                if prev_wrd:
+                    # common word sequences
+                    if twograms.get(prev_wrd):
+                        if randint(1, 10) <= 7:
+                            wrd = random.choice(twograms[prev_wrd])
+                            pair_found = True

-                if randint(1, 10) > 1 or word_index >= no_of_words - 1:
+                if not pair_found:
+                    if randint(1, 10) <= 7:
+                        # pick a common word
+                        distribution = random.uniform(0.0, 1.0)
+                        common_index = \
+                            int(distribution * distribution *
+                                no_of_common_words)
+                        wrd = common_words[common_index]
+                    else:
+                        wrd = random.choice(dictionary)
+
+                if word_index > 0:
+                    sentence_str += wrd
+                else:
+                    sentence_str += wrd.title()
+
+                if randint(1, 10) > 1 or \
+                   word_index >= no_of_words - 1 or pair_found:
                    sentence_str += ' '
                else:
                    sentence_str += ', '
+                prev_wrd = wrd
            if sentence_index > 0:
                html_str += ' ' + sentence_str.strip() + '.'
            else: