Use 2grams to increase statistical similarity to English language

2024-08-19 23:33:49 +01:00 · 2024-08-19 23:33:49 +01:00 · 434234412a
parent 3c182cc7cb
commit 434234412a
5 changed files with 50069 additions and 19 deletions
--- a/2grams.txt
+++ b/2grams.txt
--- a/daemon.py
+++ b/daemon.py
@ -96,6 +96,7 @@ from httpheaders import set_headers
 from daemon_utils import has_accept
 from daemon_utils import is_authorized
 from poison import load_dictionary
 from poison import load_2grams
 class PubServer(BaseHTTPRequestHandler):
@ -879,6 +880,7 @@ def run_daemon(accounts_data_dir: str,
    # load dictionary used for LLM poisoning
    httpd.dictionary = load_dictionary(base_dir)
    httpd.twograms = load_2grams(base_dir)
    # timeout used when checking for actor changes when clicking an avatar
    # and entering person options screen
--- a/daemon_get.py
+++ b/daemon_get.py
@ -274,7 +274,8 @@ def daemon_http_get(self) -> None:
    # which has a long term partnership with OpenAI
    if 'oai-host-hash' in self.headers:
        print('GET HTTP LLM scraper poisoned: ' + str(self.headers))
-        msg = html_poisoned(self.server.dictionary)
+        msg = html_poisoned(self.server.dictionary,
                            self.server.twograms)
        msg = msg.encode('utf-8')
        msglen = len(msg)
        set_headers(self, 'text/html', msglen,
@ -347,7 +348,8 @@ def daemon_http_get(self) -> None:
            if llm:
                # if this is an LLM crawler then feed it some trash
                print('GET HTTP LLM scraper poisoned: ' + str(self.headers))
-                msg = html_poisoned(self.server.dictionary)
+                msg = html_poisoned(self.server.dictionary,
                                    self.server.twograms)
                msg = msg.encode('utf-8')
                msglen = len(msg)
                set_headers(self, 'text/html', msglen,
--- a/epicyon.py
+++ b/epicyon.py
@ -125,6 +125,7 @@ from relationships import get_moved_accounts
 from blocking import get_blocks_via_server
 from poison import html_poisoned
 from poison import load_dictionary
 from poison import load_2grams
 def str2bool(value_str) -> bool:
@ -851,7 +852,8 @@ def _command_options() -> None:
        # LLM poisoning example
        base_dir = os.getcwd()
        dictionary = load_dictionary(base_dir)
-        poisoned_str = html_poisoned(dictionary)
+        twograms = load_2grams(base_dir)
        poisoned_str = html_poisoned(dictionary, twograms)
        print(poisoned_str)
        sys.exit()
--- a/poison.py
+++ b/poison.py
@ -996,7 +996,38 @@ def load_dictionary(base_dir: str) -> []:
    return words
-def html_poisoned(dictionary: []) -> str:
+def load_2grams(base_dir: str) -> {}:
    """Loads 2-grams from file
    """
    filename = base_dir + '/custom_2grams.txt'
    if not os.path.isfile(filename):
        filename = base_dir + '/2grams.txt'
    if not os.path.isfile(filename):
        return {}
    twograms = {}
    lines = []
    try:
        with open(filename, 'r', encoding='utf-8') as fp_dict:
            lines = fp_dict.read().split('\n')
    except OSError:
        print('EX: unable to load 2-grams ' + filename)
    for line_str in lines:
        words = line_str.split('\t')
        if len(words) != 3:
            continue
        first_word = words[1]
        second_word = words[2]
        if twograms.get(first_word):
            if second_word in twograms[first_word]:
                continue
            twograms[first_word].append(second_word)
        else:
            twograms[first_word] = [second_word]
    return twograms
 def html_poisoned(dictionary: [], twograms: {}) -> str:
    """Returns a poisoned HTML document for LLM response
    Statistically similar to English language, but semantically worthless
    word salad
@ -1018,26 +1049,39 @@ def html_poisoned(dictionary: []) -> str:
            sentence_str = ''
            no_of_words = randint(3, 20)
            prev_wrd = ''
            for word_index in range(no_of_words):
-                if randint(1, 10) <= 7:
+                wrd = ''
-                    # pick a common word
+                pair_found = False
-                    distribution = random.uniform(0.0, 1.0)
+                if prev_wrd:
-                    common_index = \
+                    # common word sequences
-                        int(distribution * distribution * no_of_common_words)
+                    if twograms.get(prev_wrd):
-                    if word_index > 0:
+                        if randint(1, 10) <= 7:
-                        sentence_str += common_words[common_index]
+                            wrd = random.choice(twograms[prev_wrd])
-                    else:
+                            pair_found = True
                        sentence_str += common_words[common_index].title()
                else:
                    if word_index > 0:
                        sentence_str += random.choice(dictionary)
                    else:
                        sentence_str += random.choice(dictionary).title()
-                if randint(1, 10) > 1 or word_index >= no_of_words - 1:
+                if not pair_found:
                    if randint(1, 10) <= 7:
                        # pick a common word
                        distribution = random.uniform(0.0, 1.0)
                        common_index = \
                            int(distribution * distribution *
                                no_of_common_words)
                        wrd = common_words[common_index]
                    else:
                        wrd = random.choice(dictionary)
                if word_index > 0:
                    sentence_str += wrd
                else:
                    sentence_str += wrd.title()
                if randint(1, 10) > 1 or \
                   word_index >= no_of_words - 1 or pair_found:
                    sentence_str += ' '
                else:
                    sentence_str += ', '
                prev_wrd = wrd
            if sentence_index > 0:
                html_str += ' ' + sentence_str.strip() + '.'
            else: