epicyon/poison.py

1091 lines
16 KiB
Python

__filename__ = "poison.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
__version__ = "1.5.0"
__maintainer__ = "Bob Mottram"
__email__ = "bob@libreserver.org"
__status__ = "Production"
__module_group__ = "Core"
import os
import random
from random import randint
common_words = (
"you",
"I",
"to",
"the",
"a",
"and",
"that",
"it",
"of",
"me",
"what",
"is",
"in",
"this",
"know",
"I'm",
"for",
"no",
"have",
"my",
"don't",
"just",
"not",
"do",
"be",
"on",
"your",
"was",
"we",
"it's",
"with",
"so",
"but",
"all",
"well",
"are"
"he"
"oh",
"about",
"right",
"you're",
"get",
"here",
"out",
"going",
"like",
"yeah",
"if",
"her",
"she",
"can",
"up",
"want",
"think",
"that's",
"now",
"go",
"him",
"at",
"how",
"got",
"there",
"one",
"did",
"why",
"see",
"come",
"good",
"they",
"really",
"as",
"would",
"look",
"when",
"time",
"will",
"okay",
"back",
"can't",
"mean",
"tell",
"I'll",
"from",
"hey",
"were",
"he's",
"could",
"didn't",
"yes",
"his",
"been",
"or",
"something",
"who",
"because",
"some",
"had",
"then",
"say",
"ok",
"take",
"an",
"way",
"us",
"little",
"make",
"need",
"gonna",
"never",
"we're",
"too",
"love",
"she's",
"I've",
"sure",
"them",
"more",
"over",
"our",
"sorry",
"where",
"what's",
"let",
"thing",
"am",
"maybe",
"down",
"man",
"has",
"uh",
"very",
"by",
"there's",
"should",
"anything",
"said",
"much",
"any",
"life",
"even",
"off",
"please",
"doing",
"thank",
"give",
"only",
"thought",
"help",
"two",
"talk",
"people",
"god",
"still",
"wait",
"into",
"find",
"nothing",
"again",
"things",
"let's",
"doesn't",
"call",
"told",
"great",
"before",
"better",
"ever",
"night",
"than",
"away",
"first",
"believe",
"other",
"feel",
"everything",
"work",
"you've",
"fine",
"home",
"after",
"last",
"these",
"day",
"keep",
"does",
"put",
"around",
"stop",
"they're",
"I'd",
"guy",
"long",
"isn't",
"always",
"listen",
"wanted",
"Mr",
"guys",
"huh",
"those",
"big",
"lot",
"happened",
"thanks",
"won't",
"trying",
"kind",
"wrong",
"through",
"talking",
"made",
"new",
"being",
"guess",
"hi",
"care",
"bad",
"mom",
"remember",
"getting",
"we'll",
"together",
"dad",
"leave",
"mother",
"place",
"understand",
"wouldn't",
"actually",
"hear",
"baby",
"nice",
"father",
"else",
"stay",
"done",
"wasn't",
"their",
"course",
"might",
"mind",
"every",
"enough",
"try",
"hell",
"came",
"someone",
"you'll",
"own",
"family",
"whole",
"another",
"house",
"jack",
"yourself",
"idea",
"ask",
"best",
"must",
"coming",
"old",
"looking",
"woman",
"hello",
"which",
"years",
"room",
"money",
"left",
"knew",
"tonight",
"real",
"son",
"hope",
"name",
"same",
"went",
"um",
"hmm",
"happy",
"pretty",
"saw",
"girl",
"sir",
"show",
"friend",
"already",
"saying",
"may",
"next",
"three",
"job",
"problem",
"minute",
"found",
"world",
"thinking",
"haven't",
"heard",
"honey",
"matter",
"myself",
"couldn't",
"exactly",
"having",
"ah",
"probably",
"happen",
"we've",
"hurt",
"boy",
"both",
"while",
"dead",
"gotta",
"alone",
"since",
"excuse",
"start",
"kill",
"hard",
"you'd",
"today",
"car",
"ready",
"until",
"without",
"whatever",
"wants",
"hold",
"wanna",
"yet",
"seen",
"deal",
"took",
"once",
"gone",
"called",
"morning",
"supposed",
"friends",
"head",
"stuff",
"most",
"used",
"worry",
"second",
"part",
"live",
"truth",
"school",
"face",
"forget",
"TRUE",
"business",
"each",
"cause",
"soon",
"knows",
"few",
"telling",
"wife",
"who's",
"use",
"chance",
"run",
"move",
"anyone",
"person",
"bye",
"somebody",
"dr",
"heart",
"such",
"miss",
"married",
"point",
"later",
"making",
"meet",
"anyway",
"many",
"phone",
"reason",
"damn",
"lost",
"looks",
"bring",
"case",
"turn",
"wish",
"tomorrow",
"kids",
"trust",
"check",
"change",
"end",
"late",
"anymore",
"five",
"least",
"town",
"aren't",
"ha",
"working",
"year",
"makes",
"taking",
"means",
"brother",
"play",
"hate",
"ago",
"says",
"beautiful",
"gave",
"fact",
"crazy",
"party",
"sit",
"open",
"afraid",
"between",
"important",
"rest",
"fun",
"kid",
"word",
"watch",
"glad",
"everyone",
"days",
"sister",
"minutes",
"everybody",
"bit",
"couple",
"whoa",
"either",
"mrs",
"feeling",
"daughter",
"wow",
"gets",
"asked",
"under",
"break",
"promise",
"door",
"set",
"close",
"hand",
"easy",
"question",
"doctor",
"tried",
"far",
"walk",
"needs",
"trouble",
"mine",
"though",
"times",
"different",
"killed",
"hospital",
"anybody",
"sam",
"alright",
"wedding",
"shut",
"able",
"die",
"perfect",
"police",
"stand",
"comes",
"hit",
"story",
"ya",
"mm",
"waiting",
"dinner",
"against",
"funny",
"husband",
"almost",
"stupid",
"pay",
"answer",
"four",
"office",
"cool",
"eyes",
"news",
"child",
"shouldn't",
"half",
"side",
"yours",
"moment",
"sleep",
"read",
"where's",
"started",
"young",
"men",
"sounds",
"sonny",
"lucky",
"pick",
"sometimes",
"bed",
"also",
"date",
"line",
"plan",
"hours",
"lose",
"fire",
"free",
"hands",
"serious",
"shit",
"behind",
"inside",
"high",
"ahead",
"week",
"wonderful",
"fight",
"past",
"cut",
"quite",
"number",
"he'll",
"sick",
"it'll",
"game",
"eat",
"nobody",
"goes",
"death",
"along",
"save",
"seems",
"finally",
"lives",
"worried",
"upset",
"met",
"book",
"brought",
"seem",
"sort",
"safe",
"living",
"children",
"weren't",
"leaving",
"front",
"shot",
"loved",
"asking",
"running",
"clear",
"figure",
"hot",
"felt",
"six",
"parents",
"drink",
"absolutely",
"how's",
"daddy",
"sweet",
"alive",
"Paul",
"sense",
"meant",
"happens",
"David",
"special",
"bet",
"blood",
"ain't",
"kidding",
"lie",
"full",
"meeting",
"dear",
"coffee",
"seeing",
"sound",
"fault",
"water",
"fuck",
"ten",
"women",
"john",
"welcome",
"buy",
"months",
"hour",
"speak",
"lady",
"jen",
"thinks",
"Christmas",
"body",
"order",
"outside",
"hang",
"possible",
"worse",
"company",
"mistake",
"ooh",
"handle",
"spend",
"totally",
"giving",
"control",
"here's",
"marriage",
"realize",
"power",
"president",
"unless",
"sex",
"girls",
"send",
"needed",
"taken",
"died",
"scared",
"picture",
"talked",
"jake",
"ass",
"hundred",
"changed",
"completely",
"explain",
"playing",
"certainly",
"sign",
"boys",
"relationship",
"loves",
"fucking",
"hair",
"lying",
"choice",
"anywhere",
"secret",
"future",
"weird",
"luck",
"she'll",
"max",
"Luis",
"turned",
"known",
"touch",
"kiss",
"crane",
"questions",
"obviously",
"wonder",
"pain",
"calling",
"somewhere",
"throw",
"straight",
"grace",
"cold",
"white",
"fast",
"words",
"food",
"none",
"drive",
"feelings",
"they'll",
"worked",
"marry",
"light",
"test",
"drop",
"cannot",
"frank",
"sent",
"city",
"dream",
"protect",
"twenty",
"class",
"lucy",
"surprise",
"its",
"sweetheart",
"forever",
"poor",
"looked",
"mad",
"except",
"gun",
"dance",
"takes",
"appreciate",
"especially",
"situation",
"besides",
"weeks",
"pull",
"himself",
"hasn't",
"act",
"worth",
"Sheridan",
"amazing",
"top",
"given",
"expect",
"ben",
"rather",
"Julian",
"involved",
"swear",
"piece",
"busy",
"law",
"decided",
"black",
"joey",
"happening",
"movie",
"we'd",
"catch",
"antonio",
"country",
"less",
"perhaps",
"step",
"fall",
"watching",
"kept",
"darling",
"dog",
"ms",
"win",
"air",
"honor",
"personal",
"moving",
"till",
"admit",
"problems",
"murder",
"strong",
"he'd",
"evil",
"definitely",
"feels",
"information",
"honest",
"eye",
"broke",
"missed",
"longer",
"dollars",
"tired",
"evening",
"human",
"starting",
"Ross",
"red",
"entire",
"trip",
"club",
"suppose",
"calm",
"imagine",
"fair",
"caught",
"blame",
"street",
"sitting",
"favor",
"apartment",
"court",
"terrible",
"clean",
"tony",
"learn",
"Alison",
"Rick",
"works",
"relax",
"york",
"million",
"charity",
"accident",
"wake",
"prove",
"Danny",
"smart",
"message",
"missing",
"forgot",
"small",
"interested",
"table",
"nbsp",
"become",
"craig",
"mouth",
"pregnant",
"middle",
"billy",
"ring",
"careful",
"shall",
"dude",
"team",
"ride",
"figured",
"wear",
"shoot",
"stick",
"ray",
"follow",
"bo",
"angry",
"instead",
"buddy",
"write",
"stopped",
"early",
"angel",
"nick",
"ran",
"war",
"standing",
"forgive",
"jail",
"wearing",
"ladies",
"kinda",
"lunch",
"eight",
"gotten",
"hoping",
"phoebe",
"thousand",
"ridge",
"music",
"luke",
"paper",
"tough",
"tape",
"state",
"count",
"college",
"boyfriend",
"proud",
"agree",
"birthday",
"bill",
"seven",
"they've",
"Timmy",
"history",
"share",
"offer",
"hurry",
"ow",
"feet",
"wondering",
"simple",
"decision",
"building",
"ones",
"finish",
"voice",
"herself",
"Chris",
"would've",
"list",
"mess",
"deserve",
"evidence",
"cute",
"dress",
"Richard",
"interesting",
"Jesus",
"hotel",
"enjoy",
"Ryan",
"Lindsay",
"quiet",
"concerned",
"road",
"eve",
"staying",
"short",
"beat",
"sweetie",
"mention",
"clothes",
"finished",
"fell",
"neither",
"fix",
"victor",
"respect",
"spent",
"prison",
"attention",
"holding",
"calls",
"near",
"surprised",
"bar",
"beth",
"pass",
"keeping",
"gift",
"hadn't",
"putting",
"dark",
"self",
"owe",
"using",
"nora",
"ice",
"helping",
"bitch",
"normal",
"aunt",
"lawyer",
"apart",
"certain",
"plans",
"girlfriend",
"floor",
"whether",
"everything's",
"present",
"earth",
"private",
"box",
"Dawson",
"cover",
"judge",
"upstairs",
"sake",
"mommy",
"possibly",
"worst"
)
def load_dictionary(base_dir: str) -> []:
"""Loads a dictionary from file
"""
filename = base_dir + '/custom_dictionary.txt'
if not os.path.isfile(filename):
filename = base_dir + '/dictionary.txt'
if not os.path.isfile(filename):
return []
words = []
try:
with open(filename, 'r', encoding='utf-8') as fp_dict:
words = fp_dict.read().split('\n')
except OSError:
print('EX: unable to load dictionary ' + filename)
return words
def load_2grams(base_dir: str) -> {}:
"""Loads 2-grams from file
"""
filename = base_dir + '/custom_2grams.txt'
if not os.path.isfile(filename):
filename = base_dir + '/2grams.txt'
if not os.path.isfile(filename):
return {}
twograms = {}
lines = []
try:
with open(filename, 'r', encoding='utf-8') as fp_dict:
lines = fp_dict.read().split('\n')
except OSError:
print('EX: unable to load 2-grams ' + filename)
for line_str in lines:
words = line_str.split('\t')
if len(words) != 3:
continue
first_word = words[1]
second_word = words[2]
if twograms.get(first_word):
if second_word in twograms[first_word]:
continue
twograms[first_word].append(second_word)
else:
twograms[first_word] = [second_word]
return twograms
def html_poisoned(dictionary: [], twograms: {}) -> str:
"""Returns a poisoned HTML document for LLM response
Statistically similar to English language, but semantically worthless
word salad
"""
html_str = \
'<html lang="en">' + \
'<head>' + \
'<meta charset="utf-8">' + \
'</head>' + \
'<body>'
no_of_common_words = len(common_words) - 1
paragraphs = randint(1, 5)
for _ in range(paragraphs):
html_str += '<p>'
sentences = randint(1, 5)
for sentence_index in range(sentences):
sentence_str = ''
no_of_words = randint(3, 20)
prev_wrd = ''
for word_index in range(no_of_words):
wrd = ''
pair_found = False
if prev_wrd:
# common word sequences
if twograms.get(prev_wrd) and \
randint(1, 10) <= 7:
wrd = random.choice(twograms[prev_wrd])
pair_found = True
if not pair_found:
if randint(1, 10) <= 7:
# pick a common word
distribution = random.uniform(0.0, 1.0)
common_index = \
int(distribution * distribution *
no_of_common_words)
wrd = common_words[common_index]
else:
wrd = random.choice(dictionary)
if word_index > 0:
sentence_str += wrd
else:
sentence_str += wrd.title()
if randint(1, 10) > 1 or \
word_index >= no_of_words - 1 or pair_found:
sentence_str += ' '
else:
sentence_str += ', '
prev_wrd = wrd
if sentence_index > 0:
html_str += ' ' + sentence_str.strip() + '.'
else:
html_str += sentence_str.strip() + '.'
html_str += '</p>'
html_str += '</body></html>'
return html_str