__filename__ = "poison.py" __author__ = "Bob Mottram" __license__ = "AGPL3+" __version__ = "1.5.0" __maintainer__ = "Bob Mottram" __email__ = "bob@libreserver.org" __status__ = "Production" __module_group__ = "Core" import os import random from random import randint common_words = ( "you", "I", "to", "the", "a", "and", "that", "it", "of", "me", "what", "is", "in", "this", "know", "I'm", "for", "no", "have", "my", "don't", "just", "not", "do", "be", "on", "your", "was", "we", "it's", "with", "so", "but", "all", "well", "are" "he" "oh", "about", "right", "you're", "get", "here", "out", "going", "like", "yeah", "if", "her", "she", "can", "up", "want", "think", "that's", "now", "go", "him", "at", "how", "got", "there", "one", "did", "why", "see", "come", "good", "they", "really", "as", "would", "look", "when", "time", "will", "okay", "back", "can't", "mean", "tell", "I'll", "from", "hey", "were", "he's", "could", "didn't", "yes", "his", "been", "or", "something", "who", "because", "some", "had", "then", "say", "ok", "take", "an", "way", "us", "little", "make", "need", "gonna", "never", "we're", "too", "love", "she's", "I've", "sure", "them", "more", "over", "our", "sorry", "where", "what's", "let", "thing", "am", "maybe", "down", "man", "has", "uh", "very", "by", "there's", "should", "anything", "said", "much", "any", "life", "even", "off", "please", "doing", "thank", "give", "only", "thought", "help", "two", "talk", "people", "god", "still", "wait", "into", "find", "nothing", "again", "things", "let's", "doesn't", "call", "told", "great", "before", "better", "ever", "night", "than", "away", "first", "believe", "other", "feel", "everything", "work", "you've", "fine", "home", "after", "last", "these", "day", "keep", "does", "put", "around", "stop", "they're", "I'd", "guy", "long", "isn't", "always", "listen", "wanted", "Mr", "guys", "huh", "those", "big", "lot", "happened", "thanks", "won't", "trying", "kind", "wrong", "through", "talking", "made", "new", "being", "guess", "hi", "care", "bad", "mom", "remember", "getting", "we'll", "together", "dad", "leave", "mother", "place", "understand", "wouldn't", "actually", "hear", "baby", "nice", "father", "else", "stay", "done", "wasn't", "their", "course", "might", "mind", "every", "enough", "try", "hell", "came", "someone", "you'll", "own", "family", "whole", "another", "house", "jack", "yourself", "idea", "ask", "best", "must", "coming", "old", "looking", "woman", "hello", "which", "years", "room", "money", "left", "knew", "tonight", "real", "son", "hope", "name", "same", "went", "um", "hmm", "happy", "pretty", "saw", "girl", "sir", "show", "friend", "already", "saying", "may", "next", "three", "job", "problem", "minute", "found", "world", "thinking", "haven't", "heard", "honey", "matter", "myself", "couldn't", "exactly", "having", "ah", "probably", "happen", "we've", "hurt", "boy", "both", "while", "dead", "gotta", "alone", "since", "excuse", "start", "kill", "hard", "you'd", "today", "car", "ready", "until", "without", "whatever", "wants", "hold", "wanna", "yet", "seen", "deal", "took", "once", "gone", "called", "morning", "supposed", "friends", "head", "stuff", "most", "used", "worry", "second", "part", "live", "truth", "school", "face", "forget", "TRUE", "business", "each", "cause", "soon", "knows", "few", "telling", "wife", "who's", "use", "chance", "run", "move", "anyone", "person", "bye", "somebody", "dr", "heart", "such", "miss", "married", "point", "later", "making", "meet", "anyway", "many", "phone", "reason", "damn", "lost", "looks", "bring", "case", "turn", "wish", "tomorrow", "kids", "trust", "check", "change", "end", "late", "anymore", "five", "least", "town", "aren't", "ha", "working", "year", "makes", "taking", "means", "brother", "play", "hate", "ago", "says", "beautiful", "gave", "fact", "crazy", "party", "sit", "open", "afraid", "between", "important", "rest", "fun", "kid", "word", "watch", "glad", "everyone", "days", "sister", "minutes", "everybody", "bit", "couple", "whoa", "either", "mrs", "feeling", "daughter", "wow", "gets", "asked", "under", "break", "promise", "door", "set", "close", "hand", "easy", "question", "doctor", "tried", "far", "walk", "needs", "trouble", "mine", "though", "times", "different", "killed", "hospital", "anybody", "sam", "alright", "wedding", "shut", "able", "die", "perfect", "police", "stand", "comes", "hit", "story", "ya", "mm", "waiting", "dinner", "against", "funny", "husband", "almost", "stupid", "pay", "answer", "four", "office", "cool", "eyes", "news", "child", "shouldn't", "half", "side", "yours", "moment", "sleep", "read", "where's", "started", "young", "men", "sounds", "sonny", "lucky", "pick", "sometimes", "bed", "also", "date", "line", "plan", "hours", "lose", "fire", "free", "hands", "serious", "shit", "behind", "inside", "high", "ahead", "week", "wonderful", "fight", "past", "cut", "quite", "number", "he'll", "sick", "it'll", "game", "eat", "nobody", "goes", "death", "along", "save", "seems", "finally", "lives", "worried", "upset", "met", "book", "brought", "seem", "sort", "safe", "living", "children", "weren't", "leaving", "front", "shot", "loved", "asking", "running", "clear", "figure", "hot", "felt", "six", "parents", "drink", "absolutely", "how's", "daddy", "sweet", "alive", "Paul", "sense", "meant", "happens", "David", "special", "bet", "blood", "ain't", "kidding", "lie", "full", "meeting", "dear", "coffee", "seeing", "sound", "fault", "water", "fuck", "ten", "women", "john", "welcome", "buy", "months", "hour", "speak", "lady", "jen", "thinks", "Christmas", "body", "order", "outside", "hang", "possible", "worse", "company", "mistake", "ooh", "handle", "spend", "totally", "giving", "control", "here's", "marriage", "realize", "power", "president", "unless", "sex", "girls", "send", "needed", "taken", "died", "scared", "picture", "talked", "jake", "ass", "hundred", "changed", "completely", "explain", "playing", "certainly", "sign", "boys", "relationship", "loves", "fucking", "hair", "lying", "choice", "anywhere", "secret", "future", "weird", "luck", "she'll", "max", "Luis", "turned", "known", "touch", "kiss", "crane", "questions", "obviously", "wonder", "pain", "calling", "somewhere", "throw", "straight", "grace", "cold", "white", "fast", "words", "food", "none", "drive", "feelings", "they'll", "worked", "marry", "light", "test", "drop", "cannot", "frank", "sent", "city", "dream", "protect", "twenty", "class", "lucy", "surprise", "its", "sweetheart", "forever", "poor", "looked", "mad", "except", "gun", "dance", "takes", "appreciate", "especially", "situation", "besides", "weeks", "pull", "himself", "hasn't", "act", "worth", "Sheridan", "amazing", "top", "given", "expect", "ben", "rather", "Julian", "involved", "swear", "piece", "busy", "law", "decided", "black", "joey", "happening", "movie", "we'd", "catch", "antonio", "country", "less", "perhaps", "step", "fall", "watching", "kept", "darling", "dog", "ms", "win", "air", "honor", "personal", "moving", "till", "admit", "problems", "murder", "strong", "he'd", "evil", "definitely", "feels", "information", "honest", "eye", "broke", "missed", "longer", "dollars", "tired", "evening", "human", "starting", "Ross", "red", "entire", "trip", "club", "suppose", "calm", "imagine", "fair", "caught", "blame", "street", "sitting", "favor", "apartment", "court", "terrible", "clean", "tony", "learn", "Alison", "Rick", "works", "relax", "york", "million", "charity", "accident", "wake", "prove", "Danny", "smart", "message", "missing", "forgot", "small", "interested", "table", "nbsp", "become", "craig", "mouth", "pregnant", "middle", "billy", "ring", "careful", "shall", "dude", "team", "ride", "figured", "wear", "shoot", "stick", "ray", "follow", "bo", "angry", "instead", "buddy", "write", "stopped", "early", "angel", "nick", "ran", "war", "standing", "forgive", "jail", "wearing", "ladies", "kinda", "lunch", "eight", "gotten", "hoping", "phoebe", "thousand", "ridge", "music", "luke", "paper", "tough", "tape", "state", "count", "college", "boyfriend", "proud", "agree", "birthday", "bill", "seven", "they've", "Timmy", "history", "share", "offer", "hurry", "ow", "feet", "wondering", "simple", "decision", "building", "ones", "finish", "voice", "herself", "Chris", "would've", "list", "mess", "deserve", "evidence", "cute", "dress", "Richard", "interesting", "Jesus", "hotel", "enjoy", "Ryan", "Lindsay", "quiet", "concerned", "road", "eve", "staying", "short", "beat", "sweetie", "mention", "clothes", "finished", "fell", "neither", "fix", "victor", "respect", "spent", "prison", "attention", "holding", "calls", "near", "surprised", "bar", "beth", "pass", "keeping", "gift", "hadn't", "putting", "dark", "self", "owe", "using", "nora", "ice", "helping", "bitch", "normal", "aunt", "lawyer", "apart", "certain", "plans", "girlfriend", "floor", "whether", "everything's", "present", "earth", "private", "box", "Dawson", "cover", "judge", "upstairs", "sake", "mommy", "possibly", "worst" ) def load_dictionary(base_dir: str) -> []: """Loads a dictionary from file """ filename = base_dir + '/custom_dictionary.txt' if not os.path.isfile(filename): filename = base_dir + '/dictionary.txt' if not os.path.isfile(filename): return [] words = [] try: with open(filename, 'r', encoding='utf-8') as fp_dict: words = fp_dict.read().split('\n') except OSError: print('EX: unable to load dictionary ' + filename) return words def load_2grams(base_dir: str) -> {}: """Loads 2-grams from file """ filename = base_dir + '/custom_2grams.txt' if not os.path.isfile(filename): filename = base_dir + '/2grams.txt' if not os.path.isfile(filename): return {} twograms = {} lines = [] try: with open(filename, 'r', encoding='utf-8') as fp_dict: lines = fp_dict.read().split('\n') except OSError: print('EX: unable to load 2-grams ' + filename) for line_str in lines: words = line_str.split('\t') if len(words) != 3: continue first_word = words[1] second_word = words[2] if twograms.get(first_word): if second_word in twograms[first_word]: continue twograms[first_word].append(second_word) else: twograms[first_word] = [second_word] return twograms def html_poisoned(dictionary: [], twograms: {}) -> str: """Returns a poisoned HTML document for LLM response Statistically similar to English language, but semantically worthless word salad """ html_str = \ '' + \ '' + \ '' + \ '' + \ '' no_of_common_words = len(common_words) - 1 paragraphs = randint(1, 5) for _ in range(paragraphs): html_str += '

' sentences = randint(1, 5) for sentence_index in range(sentences): sentence_str = '' no_of_words = randint(3, 20) prev_wrd = '' for word_index in range(no_of_words): wrd = '' pair_found = False if prev_wrd: # common word sequences if twograms.get(prev_wrd) and \ randint(1, 10) <= 7: wrd = random.choice(twograms[prev_wrd]) pair_found = True if not pair_found: if randint(1, 10) <= 7: # pick a common word distribution = random.uniform(0.0, 1.0) common_index = \ int(distribution * distribution * no_of_common_words) wrd = common_words[common_index] else: wrd = random.choice(dictionary) if word_index > 0: sentence_str += wrd else: sentence_str += wrd.title() if randint(1, 10) > 1 or \ word_index >= no_of_words - 1 or pair_found: sentence_str += ' ' else: sentence_str += ', ' prev_wrd = wrd if sentence_index > 0: html_str += ' ' + sentence_str.strip() + '.' else: html_str += sentence_str.strip() + '.' html_str += '

' html_str += '' return html_str