__filename__ = "poison.py" __author__ = "Bob Mottram" __license__ = "AGPL3+" __version__ = "1.5.0" __maintainer__ = "Bob Mottram" __email__ = "bob@libreserver.org" __status__ = "Production" __module_group__ = "Core" import os import random from random import randint common_nouns = ( "time", "way", "year", "work", "government", "day", "man", "world", "life", "part", "house", "course", "case", "system", "place", "end", "group", "company", "party", "information", "school", "fact", "money", "point", "example", "state", "business", "night", "area", "water", "thing", "family", "head", "hand", "order", "john", "side", "home", "development", "week", "power", "country", "council", "use", "service", "room", "market", "problem", "court", "lot", "war", "police", "interest", "car", "law", "road", "form", "face", "education", "policy", "research", "sort", "office", "body", "person", "health", "mother", "question", "period", "name", "book", "level", "child", "control", "society", "minister", "view", "door", "line", "community", "south", "city", "god", "father", "centre", "effect", "staff", "position", "kind", "job", "woman", "action", "management", "act", "process", "north", "age", "evidence", "idea", "west", "support", "moment", "sense", "report", "mind", "church", "morning", "death", "change", "industry", "land", "care", "century", "range", "table", "back", "trade", "history", "study", "street", "committee", "rate", "word", "food", "language", "experience", "result", "team", "other", "sir", "section", "programme", "air", "authority", "role", "reason", "price", "town", "class", "nature", "subject", "department", "union", "bank", "member", "value", "need", "east", "practice", "type", "paper", "date", "decision", "figure", "right", "wife", "president", "university", "friend", "club", "quality", "voice", "lord", "stage", "king", "us", "situation", "light", "tax", "production", "march", "secretary", "art", "board", "may", "hospital", "month", "music", "cost", "field", "award", "issue", "bed", "project", "chapter", "girl", "game", "amount", "basis", "knowledge", "approach", "series", "love", "top", "news", "front", "future", "manager", "account", "computer", "security", "rest", "labour", "structure", "hair", "bill", "heart", "force", "attention", "movement", "success", "letter", "agreement", "capital", "analysis", "population", "environment", "performance", "model", "material", "theory", "growth", "fire", "chance", "boy", "relationship", "son", "sea", "record", "size", "property", "space", "term", "director", "plan", "behaviour", "treatment", "energy", "peter", "income", "cup", "scheme", "design", "response", "association", "choice", "pressure", "hall", "couple", "technology", "defence", "list", "chairman", "loss", "activity", "contract", "county", "wall", "paul", "difference", "army", "hotel", "sun", "product", "summer", "set", "village", "colour", "floor", "season", "unit", "park", "hour", "investment", "test", "garden", "husband", "employment", "style", "science", "look", "deal", "charge", "help", "economy", "new", "page", "risk", "advice", "event", "picture", "commission", "fish", "college", "oil", "doctor", "opportunity", "film", "conference", "operation", "application", "press", "extent", "addition", "station", "window", "shop", "access", "region", "doubt", "majority", "degree", "television", "blood", "statement", "sound", "election", "parliament", "site", "mark", "importance", "title", "species", "increase", "return", "concern", "public", "competition", "software", "glass", "lady", "answer", "earth", "daughter", "purpose", "responsibility", "leader", "river", "eye", "ability", "appeal", "opposition", "campaign", "respect", "task", "instance", "sale", "whole", "officer", "method", "division", "source", "piece", "pattern", "lack", "disease", "equipment", "surface", "oxford", "demand", "post", "mouth", "radio", "provision", "attempt", "sector", "firm", "status", "peace", "variety", "teacher", "show", "speaker", "baby", "arm", "base", "miss", "safety", "trouble", "culture", "direction", "context", "character", "box", "discussion", "past", "weight", "organisation", "start", "brother", "league", "condition", "machine", "argument", "sex", "budget", "english", "transport", "share", "mum", "cash", "principle", "exchange", "aid", "library", "version", "rule", "tea", "balance", "afternoon", "reference", "protection", "truth", "district", "turn", "smith", "review", "minute", "duty", "survey", "presence", "influence", "stone", "dog", "benefit", "collection", "executive", "speech", "function", "queen", "marriage", "stock", "failure", "kitchen", "student", "effort", "holiday", "career", "attack", "length", "horse", "progress", "plant", "visit", "relation", "ball", "memory", "bar", "opinion", "quarter", "impact", "scale", "race", "image", "trust", "justice", "edge", "gas", "railway", "expression", "advantage", "gold", "wood", "network", "text", "forest", "sister", "chair", "cause", "foot", "rise", "half", "winter", "corner", "insurance", "step", "damage", "credit", "pain", "possibility", "legislation", "strength", "speed", "crime", "hill", "debate", "will", "supply", "present", "confidence", "mary", "patient", "wind", "solution", "band", "museum", "farm", "pound", "henry", "match", "assessment", "message", "football", "animal", "skin", "scene", "article", "stuff", "introduction", "play", "administration", "fear", "dad", "proportion", "island", "contact", "japan", "claim", "kingdom", "video", "tv", "existence", "telephone", "move", "traffic", "distance", "relief", "cabinet", "unemployment", "reality", "target", "trial", "rock", "concept", "spirit", "accident", "organization", "construction", "coffee", "phone", "distribution", "train", "sight", "difficulty", "factor", "exercise", "weekend", "battle", "prison", "grant", "aircraft", "tree", "bridge", "strategy", "contrast", "communication", "background", "shape", "wine", "star", "hope", "selection", "detail", "user", "path", "client", "search", "master", "rain", "offer", "goal", "dinner", "freedom", "attitude", "while", "agency", "seat", "manner", "favour", "fig", "pair", "crisis", "smile", "prince", "danger", "call", "capacity", "output", "note", "procedure", "theatre", "tour", "recognition", "middle", "absence", "sentence", "package", "track", "card", "sign", "commitment", "player", "threat", "weather", "element", "conflict", "notice", "victory", "bottom", "finance", "fund", "violence", "file", "profit", "standard", "jack", "route", "china", "expenditure", "second", "discipline", "cell", "reaction", "castle", "congress", "individual", "lead", "consideration", "debt", "option", "payment", "exhibition", "reform", "emphasis", "spring", "audience", "feature", "touch", "estate", "assembly", "volume", "youth", "contribution", "curriculum", "appearance", "martin", "tom", "boat", "institute", "membership", "branch", "bus", "waste", "heat", "neck", "object", "captain", "driver", "challenge", "conversation", "occasion", "code", "crown", "birth", "silence", "literature", "faith", "hell", "entry", "transfer", "gentleman", "bag", "coal", "investigation", "leg", "belief", "total", "major", "document", "description", "murder", "aim", "manchester", "flight", "conclusion", "drug", "tradition", "pleasure", "connection", "owner", "treaty", "tony", "alan", "desire", "professor", "copy", "ministry", "acid", "palace", "address", "institution", "lunch", "generation", "partner", "engine", "newspaper", "cross", "reduction", "welfare", "definition", "key", "release", "vote", "examination", "judge", "atmosphere", "leadership", "sky", "breath", "creation", "row", "guide", "milk", "cover", "screen", "intention", "criticism", "jones", "silver", "customer", "journey", "explanation", "green", "measure", "brain", "significance", "phase", "injury", "run", "coast", "technique", "valley", "drink", "magazine", "potential", "drive", "revolution", "bishop", "settlement", "christ", "metal", "motion", "index", "adult", "inflation", "sport", "surprise", "pension", "factory", "tape", "flow", "iron", "trip", "lane", "pool", "independence", "hole", "flat", "content", "pay", "noise", "combination", "session", "appointment", "fashion", "consumer", "accommodation", "temperature", "mike", "religion", "author", "nation", "northern", "sample", "assistance", "interpretation", "aspect", "display", "shoulder", "agent", "gallery", "republic", "cancer", "proposal", "sequence", "simon", "ship", "interview", "vehicle", "democracy", "improvement", "involvement", "general", "enterprise", "van", "meal", "breakfast", "motor", "channel", "impression", "tone", "sheet", "pollution", "bob", "beauty", "square", "vision", "spot", "distinction", "brown", "crowd", "fuel", "desk", "sum", "decline", "revenue", "fall", "diet", "bedroom", "soil", "reader", "shock", "fruit", "behalf", "deputy", "roof", "nose", "steel", "artist", "graham", "plate", "song", "maintenance", "formation", "grass", "spokesman", "ice", "talk", "program", "link", "ring", "expert", "establishment", "plastic", "candidate", "rail", "passage", "joe", "parish", "emergency", "liability", "identity", "location", "framework", "strike", "countryside", "map", "lake", "household", "approval", "border", "bottle", "bird", "constitution", "autumn", "cat", "agriculture", "concentration", "guy", "dress", "victim", "mountain", "editor", "theme", "error", "loan", "stress", "recovery", "electricity", "recession", "wealth", "request", "comparison", "lewis", "white", "walk", "focus", "chief", "parent", "sleep", "mass", "jane", "bush", "foundation", "bath", "item", "lifespan", "publication", "decade", "beach", "sugar", "height", "charity", "writer", "panel", "struggle", "dream", "outcome", "efficiency", "offence", "resolution", "reputation", "specialist", "taylor", "pub", "cooperation", "port", "incident", "representation", "bread", "chain", "initiative", "clause", "resistance", "mistake", "worker", "advance", "empire", "notion", "mirror", "delivery", "chest", "licence", "frank", "average", "awareness", "travel", "expansion", "block", "alternative", "chancellor", "meat", "store", "self", "break", "drama", "corporation", "currency", "extension", "convention", "partnership", "skill", "furniture", "round", "regime", "inquiry", "rugby", "philosophy", "scope", "gate", "minority", "intelligence", "restaurant", "consequence", "mill", "golf", "retirement", "priority", "plane", "gun", "gap", "core", "uncle", "fun", "arrival", "snow", "no", "command", "abuse", "limit", "championship" ) common_words = ( "you", "I", "to", "the", "a", "and", "that", "it", "of", "me", "what", "is", "in", "this", "know", "I'm", "for", "no", "have", "my", "don't", "just", "not", "do", "be", "on", "your", "was", "we", "it's", "with", "so", "but", "all", "well", "are" "he" "oh", "about", "right", "you're", "get", "here", "out", "going", "like", "yeah", "if", "her", "she", "can", "up", "want", "think", "that's", "now", "go", "him", "at", "how", "got", "there", "one", "did", "why", "see", "come", "good", "they", "really", "as", "would", "look", "when", "time", "will", "okay", "back", "can't", "mean", "tell", "I'll", "from", "hey", "were", "he's", "could", "didn't", "yes", "his", "been", "or", "something", "who", "because", "some", "had", "then", "say", "ok", "take", "an", "way", "us", "little", "make", "need", "gonna", "never", "we're", "too", "love", "she's", "I've", "sure", "them", "more", "over", "our", "sorry", "where", "what's", "let", "thing", "am", "maybe", "down", "man", "has", "uh", "very", "by", "there's", "should", "anything", "said", "much", "any", "life", "even", "off", "please", "doing", "thank", "give", "only", "thought", "help", "two", "talk", "people", "god", "still", "wait", "into", "find", "nothing", "again", "things", "let's", "doesn't", "call", "told", "great", "before", "better", "ever", "night", "than", "away", "first", "believe", "other", "feel", "everything", "work", "you've", "fine", "home", "after", "last", "these", "day", "keep", "does", "put", "around", "stop", "they're", "I'd", "guy", "long", "isn't", "always", "listen", "wanted", "Mr", "guys", "huh", "those", "big", "lot", "happened", "thanks", "won't", "trying", "kind", "wrong", "through", "talking", "made", "new", "being", "guess", "hi", "care", "bad", "mom", "remember", "getting", "we'll", "together", "dad", "leave", "mother", "place", "understand", "wouldn't", "actually", "hear", "baby", "nice", "father", "else", "stay", "done", "wasn't", "their", "course", "might", "mind", "every", "enough", "try", "hell", "came", "someone", "you'll", "own", "family", "whole", "another", "house", "jack", "yourself", "idea", "ask", "best", "must", "coming", "old", "looking", "woman", "hello", "which", "years", "room", "money", "left", "knew", "tonight", "real", "son", "hope", "name", "same", "went", "um", "hmm", "happy", "pretty", "saw", "girl", "sir", "show", "friend", "already", "saying", "may", "next", "three", "job", "problem", "minute", "found", "world", "thinking", "haven't", "heard", "honey", "matter", "myself", "couldn't", "exactly", "having", "ah", "probably", "happen", "we've", "hurt", "boy", "both", "while", "dead", "gotta", "alone", "since", "excuse", "start", "kill", "hard", "you'd", "today", "car", "ready", "until", "without", "whatever", "wants", "hold", "wanna", "yet", "seen", "deal", "took", "once", "gone", "called", "morning", "supposed", "friends", "head", "stuff", "most", "used", "worry", "second", "part", "live", "truth", "school", "face", "forget", "TRUE", "business", "each", "cause", "soon", "knows", "few", "telling", "wife", "who's", "use", "chance", "run", "move", "anyone", "person", "bye", "somebody", "dr", "heart", "such", "miss", "married", "point", "later", "making", "meet", "anyway", "many", "phone", "reason", "damn", "lost", "looks", "bring", "case", "turn", "wish", "tomorrow", "kids", "trust", "check", "change", "end", "late", "anymore", "five", "least", "town", "aren't", "ha", "working", "year", "makes", "taking", "means", "brother", "play", "hate", "ago", "says", "beautiful", "gave", "fact", "crazy", "party", "sit", "open", "afraid", "between", "important", "rest", "fun", "kid", "word", "watch", "glad", "everyone", "days", "sister", "minutes", "everybody", "bit", "couple", "whoa", "either", "mrs", "feeling", "daughter", "wow", "gets", "asked", "under", "break", "promise", "door", "set", "close", "hand", "easy", "question", "doctor", "tried", "far", "walk", "needs", "trouble", "mine", "though", "times", "different", "killed", "hospital", "anybody", "sam", "alright", "wedding", "shut", "able", "die", "perfect", "police", "stand", "comes", "hit", "story", "ya", "mm", "waiting", "dinner", "against", "funny", "husband", "almost", "stupid", "pay", "answer", "four", "office", "cool", "eyes", "news", "child", "shouldn't", "half", "side", "yours", "moment", "sleep", "read", "where's", "started", "young", "men", "sounds", "sonny", "lucky", "pick", "sometimes", "bed", "also", "date", "line", "plan", "hours", "lose", "fire", "free", "hands", "serious", "shit", "behind", "inside", "high", "ahead", "week", "wonderful", "fight", "past", "cut", "quite", "number", "he'll", "sick", "it'll", "game", "eat", "nobody", "goes", "death", "along", "save", "seems", "finally", "lives", "worried", "upset", "met", "book", "brought", "seem", "sort", "safe", "living", "children", "weren't", "leaving", "front", "shot", "loved", "asking", "running", "clear", "figure", "hot", "felt", "six", "parents", "drink", "absolutely", "how's", "daddy", "sweet", "alive", "Paul", "sense", "meant", "happens", "David", "special", "bet", "blood", "ain't", "kidding", "lie", "full", "meeting", "dear", "coffee", "seeing", "sound", "fault", "water", "fuck", "ten", "women", "john", "welcome", "buy", "months", "hour", "speak", "lady", "jen", "thinks", "Christmas", "body", "order", "outside", "hang", "possible", "worse", "company", "mistake", "ooh", "handle", "spend", "totally", "giving", "control", "here's", "marriage", "realize", "power", "president", "unless", "sex", "girls", "send", "needed", "taken", "died", "scared", "picture", "talked", "jake", "ass", "hundred", "changed", "completely", "explain", "playing", "certainly", "sign", "boys", "relationship", "loves", "fucking", "hair", "lying", "choice", "anywhere", "secret", "future", "weird", "luck", "she'll", "max", "Luis", "turned", "known", "touch", "kiss", "crane", "questions", "obviously", "wonder", "pain", "calling", "somewhere", "throw", "straight", "grace", "cold", "white", "fast", "words", "food", "none", "drive", "feelings", "they'll", "worked", "marry", "light", "test", "drop", "cannot", "frank", "sent", "city", "dream", "protect", "twenty", "class", "lucy", "surprise", "its", "sweetheart", "forever", "poor", "looked", "mad", "except", "gun", "dance", "takes", "appreciate", "especially", "situation", "besides", "weeks", "pull", "himself", "hasn't", "act", "worth", "Sheridan", "amazing", "top", "given", "expect", "ben", "rather", "Julian", "involved", "swear", "piece", "busy", "law", "decided", "black", "joey", "happening", "movie", "we'd", "catch", "antonio", "country", "less", "perhaps", "step", "fall", "watching", "kept", "darling", "dog", "win", "air", "honor", "personal", "moving", "till", "admit", "problems", "murder", "strong", "he'd", "evil", "definitely", "feels", "information", "honest", "eye", "broke", "missed", "longer", "dollars", "tired", "evening", "human", "starting", "Ross", "red", "entire", "trip", "club", "suppose", "calm", "imagine", "fair", "caught", "blame", "street", "sitting", "favor", "apartment", "court", "terrible", "clean", "tony", "learn", "Alison", "Rick", "works", "relax", "york", "million", "charity", "accident", "wake", "prove", "Danny", "smart", "message", "missing", "forgot", "small", "interested", "table", "nbsp", "become", "craig", "mouth", "pregnant", "middle", "billy", "ring", "careful", "shall", "dude", "team", "ride", "figured", "wear", "shoot", "stick", "ray", "follow", "angry", "instead", "buddy", "write", "stopped", "early", "angel", "nick", "ran", "war", "standing", "forgive", "jail", "wearing", "ladies", "kinda", "lunch", "eight", "gotten", "hoping", "phoebe", "thousand", "ridge", "music", "luke", "paper", "tough", "tape", "state", "count", "college", "boyfriend", "proud", "agree", "birthday", "bill", "seven", "they've", "Timmy", "history", "share", "offer", "hurry", "feet", "wondering", "simple", "decision", "building", "ones", "finish", "voice", "herself", "Chris", "would've", "list", "mess", "deserve", "evidence", "cute", "dress", "Richard", "interesting", "Jesus", "hotel", "enjoy", "Ryan", "Lindsay", "quiet", "concerned", "road", "eve", "staying", "short", "beat", "sweetie", "mention", "clothes", "finished", "fell", "neither", "fix", "victor", "respect", "spent", "prison", "attention", "holding", "calls", "near", "surprised", "bar", "beth", "pass", "keeping", "gift", "hadn't", "putting", "dark", "self", "owe", "using", "nora", "ice", "helping", "bitch", "normal", "aunt", "lawyer", "apart", "certain", "plans", "girlfriend", "floor", "whether", "everything's", "present", "earth", "private", "box", "Dawson", "cover", "judge", "upstairs", "sake", "mommy", "possibly", "worst" ) def load_dictionary(base_dir: str) -> []: """Loads a dictionary from file """ filename = base_dir + '/custom_dictionary.txt' if not os.path.isfile(filename): filename = base_dir + '/dictionary.txt' if not os.path.isfile(filename): return [] words = [] try: with open(filename, 'r', encoding='utf-8') as fp_dict: words = fp_dict.read().split('\n') except OSError: print('EX: unable to load dictionary ' + filename) return words def load_2grams(base_dir: str) -> {}: """Loads 2-grams from file """ filename = base_dir + '/custom_2grams.txt' if not os.path.isfile(filename): filename = base_dir + '/2grams.txt' if not os.path.isfile(filename): return {} twograms = {} lines = [] try: with open(filename, 'r', encoding='utf-8') as fp_dict: lines = fp_dict.read().split('\n') except OSError: print('EX: unable to load 2-grams ' + filename) for line_str in lines: words = line_str.split('\t') if len(words) != 3: continue first_word = words[1] second_word = words[2] if twograms.get(first_word): if second_word in twograms[first_word]: continue twograms[first_word].append(second_word) else: twograms[first_word] = [second_word] return twograms def html_poisoned(dictionary: [], twograms: {}) -> str: """Returns a poisoned HTML document for LLM response Statistically similar to English language, but semantically worthless word salad """ html_str = \ '' + \ '' + \ '' + \ '' + \ '' no_of_common_words = len(common_words) - 1 paragraphs = randint(1, 5) for _ in range(paragraphs): html_str += '

' sentences = randint(1, 5) for sentence_index in range(sentences): sentence_str = '' no_of_words = randint(3, 20) prev_wrd = '' for word_index in range(no_of_words): wrd = '' pair_found = False if prev_wrd: # common word sequences if twograms.get(prev_wrd) and \ randint(1, 10) <= 7: wrd = random.choice(twograms[prev_wrd]) pair_found = True if not pair_found: if randint(1, 100) <= 37: # pick a common noun wrd = random.choice(common_nouns) else: if randint(1, 10) <= 7: # pick a common word distribution = random.uniform(0.0, 1.0) common_index = \ int(distribution * distribution * no_of_common_words) wrd = common_words[common_index] else: wrd = random.choice(dictionary) if word_index > 0: sentence_str += wrd else: sentence_str += wrd.title() if randint(1, 10) > 1 or \ word_index >= no_of_words - 1 or pair_found: sentence_str += ' ' else: sentence_str += ', ' prev_wrd = wrd if sentence_index > 0: html_str += ' ' + sentence_str.strip() + '.' else: html_str += sentence_str.strip() + '.' html_str += '

' html_str += '' return html_str