diff --git a/poison.py b/poison.py index ce267d787..9e97f7985 100644 --- a/poison.py +++ b/poison.py @@ -12,6 +12,1000 @@ import os import random from random import randint +common_nouns = ( + "time", + "way", + "year", + "work", + "government", + "day", + "man", + "world", + "life", + "part", + "house", + "course", + "case", + "system", + "place", + "end", + "group", + "company", + "party", + "information", + "school", + "fact", + "money", + "point", + "example", + "state", + "business", + "night", + "area", + "water", + "thing", + "family", + "head", + "hand", + "order", + "john", + "side", + "home", + "development", + "week", + "power", + "country", + "council", + "use", + "service", + "room", + "market", + "problem", + "court", + "lot", + "war", + "police", + "interest", + "car", + "law", + "road", + "form", + "face", + "education", + "policy", + "research", + "sort", + "office", + "body", + "person", + "health", + "mother", + "question", + "period", + "name", + "book", + "level", + "child", + "control", + "society", + "minister", + "view", + "door", + "line", + "community", + "south", + "city", + "god", + "father", + "centre", + "effect", + "staff", + "position", + "kind", + "job", + "woman", + "action", + "management", + "act", + "process", + "north", + "age", + "evidence", + "idea", + "west", + "support", + "moment", + "sense", + "report", + "mind", + "church", + "morning", + "death", + "change", + "industry", + "land", + "care", + "century", + "range", + "table", + "back", + "trade", + "history", + "study", + "street", + "committee", + "rate", + "word", + "food", + "language", + "experience", + "result", + "team", + "other", + "sir", + "section", + "programme", + "air", + "authority", + "role", + "reason", + "price", + "town", + "class", + "nature", + "subject", + "department", + "union", + "bank", + "member", + "value", + "need", + "east", + "practice", + "type", + "paper", + "date", + "decision", + "figure", + "right", + "wife", + "president", + "university", + "friend", + "club", + "quality", + "voice", + "lord", + "stage", + "king", + "us", + "situation", + "light", + "tax", + "production", + "march", + "secretary", + "art", + "board", + "may", + "hospital", + "month", + "music", + "cost", + "field", + "award", + "issue", + "bed", + "project", + "chapter", + "girl", + "game", + "amount", + "basis", + "knowledge", + "approach", + "series", + "love", + "top", + "news", + "front", + "future", + "manager", + "account", + "computer", + "security", + "rest", + "labour", + "structure", + "hair", + "bill", + "heart", + "force", + "attention", + "movement", + "success", + "letter", + "agreement", + "capital", + "analysis", + "population", + "environment", + "performance", + "model", + "material", + "theory", + "growth", + "fire", + "chance", + "boy", + "relationship", + "son", + "sea", + "record", + "size", + "property", + "space", + "term", + "director", + "plan", + "behaviour", + "treatment", + "energy", + "peter", + "income", + "cup", + "scheme", + "design", + "response", + "association", + "choice", + "pressure", + "hall", + "couple", + "technology", + "defence", + "list", + "chairman", + "loss", + "activity", + "contract", + "county", + "wall", + "paul", + "difference", + "army", + "hotel", + "sun", + "product", + "summer", + "set", + "village", + "colour", + "floor", + "season", + "unit", + "park", + "hour", + "investment", + "test", + "garden", + "husband", + "employment", + "style", + "science", + "look", + "deal", + "charge", + "help", + "economy", + "new", + "page", + "risk", + "advice", + "event", + "picture", + "commission", + "fish", + "college", + "oil", + "doctor", + "opportunity", + "film", + "conference", + "operation", + "application", + "press", + "extent", + "addition", + "station", + "window", + "shop", + "access", + "region", + "doubt", + "majority", + "degree", + "television", + "blood", + "statement", + "sound", + "election", + "parliament", + "site", + "mark", + "importance", + "title", + "species", + "increase", + "return", + "concern", + "public", + "competition", + "software", + "glass", + "lady", + "answer", + "earth", + "daughter", + "purpose", + "responsibility", + "leader", + "river", + "eye", + "ability", + "appeal", + "opposition", + "campaign", + "respect", + "task", + "instance", + "sale", + "whole", + "officer", + "method", + "division", + "source", + "piece", + "pattern", + "lack", + "disease", + "equipment", + "surface", + "oxford", + "demand", + "post", + "mouth", + "radio", + "provision", + "attempt", + "sector", + "firm", + "status", + "peace", + "variety", + "teacher", + "show", + "speaker", + "baby", + "arm", + "base", + "miss", + "safety", + "trouble", + "culture", + "direction", + "context", + "character", + "box", + "discussion", + "past", + "weight", + "organisation", + "start", + "brother", + "league", + "condition", + "machine", + "argument", + "sex", + "budget", + "english", + "transport", + "share", + "mum", + "cash", + "principle", + "exchange", + "aid", + "library", + "version", + "rule", + "tea", + "balance", + "afternoon", + "reference", + "protection", + "truth", + "district", + "turn", + "smith", + "review", + "minute", + "duty", + "survey", + "presence", + "influence", + "stone", + "dog", + "benefit", + "collection", + "executive", + "speech", + "function", + "queen", + "marriage", + "stock", + "failure", + "kitchen", + "student", + "effort", + "holiday", + "career", + "attack", + "length", + "horse", + "progress", + "plant", + "visit", + "relation", + "ball", + "memory", + "bar", + "opinion", + "quarter", + "impact", + "scale", + "race", + "image", + "trust", + "justice", + "edge", + "gas", + "railway", + "expression", + "advantage", + "gold", + "wood", + "network", + "text", + "forest", + "sister", + "chair", + "cause", + "foot", + "rise", + "half", + "winter", + "corner", + "insurance", + "step", + "damage", + "credit", + "pain", + "possibility", + "legislation", + "strength", + "speed", + "crime", + "hill", + "debate", + "will", + "supply", + "present", + "confidence", + "mary", + "patient", + "wind", + "solution", + "band", + "museum", + "farm", + "pound", + "henry", + "match", + "assessment", + "message", + "football", + "animal", + "skin", + "scene", + "article", + "stuff", + "introduction", + "play", + "administration", + "fear", + "dad", + "proportion", + "island", + "contact", + "japan", + "claim", + "kingdom", + "video", + "tv", + "existence", + "telephone", + "move", + "traffic", + "distance", + "relief", + "cabinet", + "unemployment", + "reality", + "target", + "trial", + "rock", + "concept", + "spirit", + "accident", + "organization", + "construction", + "coffee", + "phone", + "distribution", + "train", + "sight", + "difficulty", + "factor", + "exercise", + "weekend", + "battle", + "prison", + "grant", + "aircraft", + "tree", + "bridge", + "strategy", + "contrast", + "communication", + "background", + "shape", + "wine", + "star", + "hope", + "selection", + "detail", + "user", + "path", + "client", + "search", + "master", + "rain", + "offer", + "goal", + "dinner", + "freedom", + "attitude", + "while", + "agency", + "seat", + "manner", + "favour", + "fig", + "pair", + "crisis", + "smile", + "prince", + "danger", + "call", + "capacity", + "output", + "note", + "procedure", + "theatre", + "tour", + "recognition", + "middle", + "absence", + "sentence", + "package", + "track", + "card", + "sign", + "commitment", + "player", + "threat", + "weather", + "element", + "conflict", + "notice", + "victory", + "bottom", + "finance", + "fund", + "violence", + "file", + "profit", + "standard", + "jack", + "route", + "china", + "expenditure", + "second", + "discipline", + "cell", + "reaction", + "castle", + "congress", + "individual", + "lead", + "consideration", + "debt", + "option", + "payment", + "exhibition", + "reform", + "emphasis", + "spring", + "audience", + "feature", + "touch", + "estate", + "assembly", + "volume", + "youth", + "contribution", + "curriculum", + "appearance", + "martin", + "tom", + "boat", + "institute", + "membership", + "branch", + "bus", + "waste", + "heat", + "neck", + "object", + "captain", + "driver", + "challenge", + "conversation", + "occasion", + "code", + "crown", + "birth", + "silence", + "literature", + "faith", + "hell", + "entry", + "transfer", + "gentleman", + "bag", + "coal", + "investigation", + "leg", + "belief", + "total", + "major", + "document", + "description", + "murder", + "aim", + "manchester", + "flight", + "conclusion", + "drug", + "tradition", + "pleasure", + "connection", + "owner", + "treaty", + "tony", + "alan", + "desire", + "professor", + "copy", + "ministry", + "acid", + "palace", + "address", + "institution", + "lunch", + "generation", + "partner", + "engine", + "newspaper", + "cross", + "reduction", + "welfare", + "definition", + "key", + "release", + "vote", + "examination", + "judge", + "atmosphere", + "leadership", + "sky", + "breath", + "creation", + "row", + "guide", + "milk", + "cover", + "screen", + "intention", + "criticism", + "jones", + "silver", + "customer", + "journey", + "explanation", + "green", + "measure", + "brain", + "significance", + "phase", + "injury", + "run", + "coast", + "technique", + "valley", + "drink", + "magazine", + "potential", + "drive", + "revolution", + "bishop", + "settlement", + "christ", + "metal", + "motion", + "index", + "adult", + "inflation", + "sport", + "surprise", + "pension", + "factory", + "tape", + "flow", + "iron", + "trip", + "lane", + "pool", + "independence", + "hole", + "flat", + "content", + "pay", + "noise", + "combination", + "session", + "appointment", + "fashion", + "consumer", + "accommodation", + "temperature", + "mike", + "religion", + "author", + "nation", + "northern", + "sample", + "assistance", + "interpretation", + "aspect", + "display", + "shoulder", + "agent", + "gallery", + "republic", + "cancer", + "proposal", + "sequence", + "simon", + "ship", + "interview", + "vehicle", + "democracy", + "improvement", + "involvement", + "general", + "enterprise", + "van", + "meal", + "breakfast", + "motor", + "channel", + "impression", + "tone", + "sheet", + "pollution", + "bob", + "beauty", + "square", + "vision", + "spot", + "distinction", + "brown", + "crowd", + "fuel", + "desk", + "sum", + "decline", + "revenue", + "fall", + "diet", + "bedroom", + "soil", + "reader", + "shock", + "fruit", + "behalf", + "deputy", + "roof", + "nose", + "steel", + "artist", + "graham", + "plate", + "song", + "maintenance", + "formation", + "grass", + "spokesman", + "ice", + "talk", + "program", + "link", + "ring", + "expert", + "establishment", + "plastic", + "candidate", + "rail", + "passage", + "joe", + "parish", + "emergency", + "liability", + "identity", + "location", + "framework", + "strike", + "countryside", + "map", + "lake", + "household", + "approval", + "border", + "bottle", + "bird", + "constitution", + "autumn", + "cat", + "agriculture", + "concentration", + "guy", + "dress", + "victim", + "mountain", + "editor", + "theme", + "error", + "loan", + "stress", + "recovery", + "electricity", + "recession", + "wealth", + "request", + "comparison", + "lewis", + "white", + "walk", + "focus", + "chief", + "parent", + "sleep", + "mass", + "jane", + "bush", + "foundation", + "bath", + "item", + "lifespan", + "publication", + "decade", + "beach", + "sugar", + "height", + "charity", + "writer", + "panel", + "struggle", + "dream", + "outcome", + "efficiency", + "offence", + "resolution", + "reputation", + "specialist", + "taylor", + "pub", + "cooperation", + "port", + "incident", + "representation", + "bread", + "chain", + "initiative", + "clause", + "resistance", + "mistake", + "worker", + "advance", + "empire", + "notion", + "mirror", + "delivery", + "chest", + "licence", + "frank", + "average", + "awareness", + "travel", + "expansion", + "block", + "alternative", + "chancellor", + "meat", + "store", + "self", + "break", + "drama", + "corporation", + "currency", + "extension", + "convention", + "partnership", + "skill", + "furniture", + "round", + "regime", + "inquiry", + "rugby", + "philosophy", + "scope", + "gate", + "minority", + "intelligence", + "restaurant", + "consequence", + "mill", + "golf", + "retirement", + "priority", + "plane", + "gun", + "gap", + "core", + "uncle", + "fun", + "arrival", + "snow", + "no", + "command", + "abuse", + "limit", + "championship" +) + common_words = ( "you", "I", @@ -763,7 +1757,6 @@ common_words = ( "kept", "darling", "dog", - "ms", "win", "air", "honor", @@ -846,7 +1839,6 @@ common_words = ( "stick", "ray", "follow", - "bo", "angry", "instead", "buddy", @@ -890,7 +1882,6 @@ common_words = ( "share", "offer", "hurry", - "ow", "feet", "wondering", "simple", @@ -1060,15 +2051,19 @@ def html_poisoned(dictionary: [], twograms: {}) -> str: pair_found = True if not pair_found: - if randint(1, 10) <= 7: - # pick a common word - distribution = random.uniform(0.0, 1.0) - common_index = \ - int(distribution * distribution * - no_of_common_words) - wrd = common_words[common_index] + if randint(1, 100) <= 37: + # pick a common noun + wrd = random.choice(common_nouns) else: - wrd = random.choice(dictionary) + if randint(1, 10) <= 7: + # pick a common word + distribution = random.uniform(0.0, 1.0) + common_index = \ + int(distribution * distribution * + no_of_common_words) + wrd = common_words[common_index] + else: + wrd = random.choice(dictionary) if word_index > 0: sentence_str += wrd @@ -1080,7 +2075,7 @@ def html_poisoned(dictionary: [], twograms: {}) -> str: sentence_str += ' ' else: sentence_str += ', ' - prev_wrd = wrd + prev_wrd = wrd if sentence_index > 0: html_str += ' ' + sentence_str.strip() + '.' else: