Use 2grams to increase statistical similarity to English language

main
Bob Mottram 2024-08-19 23:33:49 +01:00
parent 3c182cc7cb
commit 434234412a
5 changed files with 50069 additions and 19 deletions

50000
2grams.txt 100644

File diff suppressed because it is too large Load Diff

View File

@ -96,6 +96,7 @@ from httpheaders import set_headers
from daemon_utils import has_accept
from daemon_utils import is_authorized
from poison import load_dictionary
from poison import load_2grams
class PubServer(BaseHTTPRequestHandler):
@ -879,6 +880,7 @@ def run_daemon(accounts_data_dir: str,
# load dictionary used for LLM poisoning
httpd.dictionary = load_dictionary(base_dir)
httpd.twograms = load_2grams(base_dir)
# timeout used when checking for actor changes when clicking an avatar
# and entering person options screen

View File

@ -274,7 +274,8 @@ def daemon_http_get(self) -> None:
# which has a long term partnership with OpenAI
if 'oai-host-hash' in self.headers:
print('GET HTTP LLM scraper poisoned: ' + str(self.headers))
msg = html_poisoned(self.server.dictionary)
msg = html_poisoned(self.server.dictionary,
self.server.twograms)
msg = msg.encode('utf-8')
msglen = len(msg)
set_headers(self, 'text/html', msglen,
@ -347,7 +348,8 @@ def daemon_http_get(self) -> None:
if llm:
# if this is an LLM crawler then feed it some trash
print('GET HTTP LLM scraper poisoned: ' + str(self.headers))
msg = html_poisoned(self.server.dictionary)
msg = html_poisoned(self.server.dictionary,
self.server.twograms)
msg = msg.encode('utf-8')
msglen = len(msg)
set_headers(self, 'text/html', msglen,

View File

@ -125,6 +125,7 @@ from relationships import get_moved_accounts
from blocking import get_blocks_via_server
from poison import html_poisoned
from poison import load_dictionary
from poison import load_2grams
def str2bool(value_str) -> bool:
@ -851,7 +852,8 @@ def _command_options() -> None:
# LLM poisoning example
base_dir = os.getcwd()
dictionary = load_dictionary(base_dir)
poisoned_str = html_poisoned(dictionary)
twograms = load_2grams(base_dir)
poisoned_str = html_poisoned(dictionary, twograms)
print(poisoned_str)
sys.exit()

View File

@ -996,7 +996,38 @@ def load_dictionary(base_dir: str) -> []:
return words
def html_poisoned(dictionary: []) -> str:
def load_2grams(base_dir: str) -> {}:
"""Loads 2-grams from file
"""
filename = base_dir + '/custom_2grams.txt'
if not os.path.isfile(filename):
filename = base_dir + '/2grams.txt'
if not os.path.isfile(filename):
return {}
twograms = {}
lines = []
try:
with open(filename, 'r', encoding='utf-8') as fp_dict:
lines = fp_dict.read().split('\n')
except OSError:
print('EX: unable to load 2-grams ' + filename)
for line_str in lines:
words = line_str.split('\t')
if len(words) != 3:
continue
first_word = words[1]
second_word = words[2]
if twograms.get(first_word):
if second_word in twograms[first_word]:
continue
twograms[first_word].append(second_word)
else:
twograms[first_word] = [second_word]
return twograms
def html_poisoned(dictionary: [], twograms: {}) -> str:
"""Returns a poisoned HTML document for LLM response
Statistically similar to English language, but semantically worthless
word salad
@ -1018,26 +1049,39 @@ def html_poisoned(dictionary: []) -> str:
sentence_str = ''
no_of_words = randint(3, 20)
prev_wrd = ''
for word_index in range(no_of_words):
wrd = ''
pair_found = False
if prev_wrd:
# common word sequences
if twograms.get(prev_wrd):
if randint(1, 10) <= 7:
wrd = random.choice(twograms[prev_wrd])
pair_found = True
if not pair_found:
if randint(1, 10) <= 7:
# pick a common word
distribution = random.uniform(0.0, 1.0)
common_index = \
int(distribution * distribution * no_of_common_words)
if word_index > 0:
sentence_str += common_words[common_index]
int(distribution * distribution *
no_of_common_words)
wrd = common_words[common_index]
else:
sentence_str += common_words[common_index].title()
else:
if word_index > 0:
sentence_str += random.choice(dictionary)
else:
sentence_str += random.choice(dictionary).title()
wrd = random.choice(dictionary)
if randint(1, 10) > 1 or word_index >= no_of_words - 1:
if word_index > 0:
sentence_str += wrd
else:
sentence_str += wrd.title()
if randint(1, 10) > 1 or \
word_index >= no_of_words - 1 or pair_found:
sentence_str += ' '
else:
sentence_str += ', '
prev_wrd = wrd
if sentence_index > 0:
html_str += ' ' + sentence_str.strip() + '.'
else: