__filename__ = "poison.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
__version__ = "1.5.0"
__maintainer__ = "Bob Mottram"
__email__ = "bob@libreserver.org"
__status__ = "Production"
__module_group__ = "Core"


import os
import random
from random import randint

common_words = (
    "you",
    "I",
    "to",
    "the",
    "a",
    "and",
    "that",
    "it",
    "of",
    "me",
    "what",
    "is",
    "in",
    "this",
    "know",
    "I'm",
    "for",
    "no",
    "have",
    "my",
    "don't",
    "just",
    "not",
    "do",
    "be",
    "on",
    "your",
    "was",
    "we",
    "it's",
    "with",
    "so",
    "but",
    "all",
    "well",
    "are"
    "he"
    "oh",
    "about",
    "right",
    "you're",
    "get",
    "here",
    "out",
    "going",
    "like",
    "yeah",
    "if",
    "her",
    "she",
    "can",
    "up",
    "want",
    "think",
    "that's",
    "now",
    "go",
    "him",
    "at",
    "how",
    "got",
    "there",
    "one",
    "did",
    "why",
    "see",
    "come",
    "good",
    "they",
    "really",
    "as",
    "would",
    "look",
    "when",
    "time",
    "will",
    "okay",
    "back",
    "can't",
    "mean",
    "tell",
    "I'll",
    "from",
    "hey",
    "were",
    "he's",
    "could",
    "didn't",
    "yes",
    "his",
    "been",
    "or",
    "something",
    "who",
    "because",
    "some",
    "had",
    "then",
    "say",
    "ok",
    "take",
    "an",
    "way",
    "us",
    "little",
    "make",
    "need",
    "gonna",
    "never",
    "we're",
    "too",
    "love",
    "she's",
    "I've",
    "sure",
    "them",
    "more",
    "over",
    "our",
    "sorry",
    "where",
    "what's",
    "let",
    "thing",
    "am",
    "maybe",
    "down",
    "man",
    "has",
    "uh",
    "very",
    "by",
    "there's",
    "should",
    "anything",
    "said",
    "much",
    "any",
    "life",
    "even",
    "off",
    "please",
    "doing",
    "thank",
    "give",
    "only",
    "thought",
    "help",
    "two",
    "talk",
    "people",
    "god",
    "still",
    "wait",
    "into",
    "find",
    "nothing",
    "again",
    "things",
    "let's",
    "doesn't",
    "call",
    "told",
    "great",
    "before",
    "better",
    "ever",
    "night",
    "than",
    "away",
    "first",
    "believe",
    "other",
    "feel",
    "everything",
    "work",
    "you've",
    "fine",
    "home",
    "after",
    "last",
    "these",
    "day",
    "keep",
    "does",
    "put",
    "around",
    "stop",
    "they're",
    "I'd",
    "guy",
    "long",
    "isn't",
    "always",
    "listen",
    "wanted",
    "Mr",
    "guys",
    "huh",
    "those",
    "big",
    "lot",
    "happened",
    "thanks",
    "won't",
    "trying",
    "kind",
    "wrong",
    "through",
    "talking",
    "made",
    "new",
    "being",
    "guess",
    "hi",
    "care",
    "bad",
    "mom",
    "remember",
    "getting",
    "we'll",
    "together",
    "dad",
    "leave",
    "mother",
    "place",
    "understand",
    "wouldn't",
    "actually",
    "hear",
    "baby",
    "nice",
    "father",
    "else",
    "stay",
    "done",
    "wasn't",
    "their",
    "course",
    "might",
    "mind",
    "every",
    "enough",
    "try",
    "hell",
    "came",
    "someone",
    "you'll",
    "own",
    "family",
    "whole",
    "another",
    "house",
    "jack",
    "yourself",
    "idea",
    "ask",
    "best",
    "must",
    "coming",
    "old",
    "looking",
    "woman",
    "hello",
    "which",
    "years",
    "room",
    "money",
    "left",
    "knew",
    "tonight",
    "real",
    "son",
    "hope",
    "name",
    "same",
    "went",
    "um",
    "hmm",
    "happy",
    "pretty",
    "saw",
    "girl",
    "sir",
    "show",
    "friend",
    "already",
    "saying",
    "may",
    "next",
    "three",
    "job",
    "problem",
    "minute",
    "found",
    "world",
    "thinking",
    "haven't",
    "heard",
    "honey",
    "matter",
    "myself",
    "couldn't",
    "exactly",
    "having",
    "ah",
    "probably",
    "happen",
    "we've",
    "hurt",
    "boy",
    "both",
    "while",
    "dead",
    "gotta",
    "alone",
    "since",
    "excuse",
    "start",
    "kill",
    "hard",
    "you'd",
    "today",
    "car",
    "ready",
    "until",
    "without",
    "whatever",
    "wants",
    "hold",
    "wanna",
    "yet",
    "seen",
    "deal",
    "took",
    "once",
    "gone",
    "called",
    "morning",
    "supposed",
    "friends",
    "head",
    "stuff",
    "most",
    "used",
    "worry",
    "second",
    "part",
    "live",
    "truth",
    "school",
    "face",
    "forget",
    "TRUE",
    "business",
    "each",
    "cause",
    "soon",
    "knows",
    "few",
    "telling",
    "wife",
    "who's",
    "use",
    "chance",
    "run",
    "move",
    "anyone",
    "person",
    "bye",
    "somebody",
    "dr",
    "heart",
    "such",
    "miss",
    "married",
    "point",
    "later",
    "making",
    "meet",
    "anyway",
    "many",
    "phone",
    "reason",
    "damn",
    "lost",
    "looks",
    "bring",
    "case",
    "turn",
    "wish",
    "tomorrow",
    "kids",
    "trust",
    "check",
    "change",
    "end",
    "late",
    "anymore",
    "five",
    "least",
    "town",
    "aren't",
    "ha",
    "working",
    "year",
    "makes",
    "taking",
    "means",
    "brother",
    "play",
    "hate",
    "ago",
    "says",
    "beautiful",
    "gave",
    "fact",
    "crazy",
    "party",
    "sit",
    "open",
    "afraid",
    "between",
    "important",
    "rest",
    "fun",
    "kid",
    "word",
    "watch",
    "glad",
    "everyone",
    "days",
    "sister",
    "minutes",
    "everybody",
    "bit",
    "couple",
    "whoa",
    "either",
    "mrs",
    "feeling",
    "daughter",
    "wow",
    "gets",
    "asked",
    "under",
    "break",
    "promise",
    "door",
    "set",
    "close",
    "hand",
    "easy",
    "question",
    "doctor",
    "tried",
    "far",
    "walk",
    "needs",
    "trouble",
    "mine",
    "though",
    "times",
    "different",
    "killed",
    "hospital",
    "anybody",
    "sam",
    "alright",
    "wedding",
    "shut",
    "able",
    "die",
    "perfect",
    "police",
    "stand",
    "comes",
    "hit",
    "story",
    "ya",
    "mm",
    "waiting",
    "dinner",
    "against",
    "funny",
    "husband",
    "almost",
    "stupid",
    "pay",
    "answer",
    "four",
    "office",
    "cool",
    "eyes",
    "news",
    "child",
    "shouldn't",
    "half",
    "side",
    "yours",
    "moment",
    "sleep",
    "read",
    "where's",
    "started",
    "young",
    "men",
    "sounds",
    "sonny",
    "lucky",
    "pick",
    "sometimes",
    "bed",
    "also",
    "date",
    "line",
    "plan",
    "hours",
    "lose",
    "fire",
    "free",
    "hands",
    "serious",
    "shit",
    "behind",
    "inside",
    "high",
    "ahead",
    "week",
    "wonderful",
    "fight",
    "past",
    "cut",
    "quite",
    "number",
    "he'll",
    "sick",
    "it'll",
    "game",
    "eat",
    "nobody",
    "goes",
    "death",
    "along",
    "save",
    "seems",
    "finally",
    "lives",
    "worried",
    "upset",
    "met",
    "book",
    "brought",
    "seem",
    "sort",
    "safe",
    "living",
    "children",
    "weren't",
    "leaving",
    "front",
    "shot",
    "loved",
    "asking",
    "running",
    "clear",
    "figure",
    "hot",
    "felt",
    "six",
    "parents",
    "drink",
    "absolutely",
    "how's",
    "daddy",
    "sweet",
    "alive",
    "Paul",
    "sense",
    "meant",
    "happens",
    "David",
    "special",
    "bet",
    "blood",
    "ain't",
    "kidding",
    "lie",
    "full",
    "meeting",
    "dear",
    "coffee",
    "seeing",
    "sound",
    "fault",
    "water",
    "fuck",
    "ten",
    "women",
    "john",
    "welcome",
    "buy",
    "months",
    "hour",
    "speak",
    "lady",
    "jen",
    "thinks",
    "Christmas",
    "body",
    "order",
    "outside",
    "hang",
    "possible",
    "worse",
    "company",
    "mistake",
    "ooh",
    "handle",
    "spend",
    "totally",
    "giving",
    "control",
    "here's",
    "marriage",
    "realize",
    "power",
    "president",
    "unless",
    "sex",
    "girls",
    "send",
    "needed",
    "taken",
    "died",
    "scared",
    "picture",
    "talked",
    "jake",
    "ass",
    "hundred",
    "changed",
    "completely",
    "explain",
    "playing",
    "certainly",
    "sign",
    "boys",
    "relationship",
    "loves",
    "fucking",
    "hair",
    "lying",
    "choice",
    "anywhere",
    "secret",
    "future",
    "weird",
    "luck",
    "she'll",
    "max",
    "Luis",
    "turned",
    "known",
    "touch",
    "kiss",
    "crane",
    "questions",
    "obviously",
    "wonder",
    "pain",
    "calling",
    "somewhere",
    "throw",
    "straight",
    "grace",
    "cold",
    "white",
    "fast",
    "words",
    "food",
    "none",
    "drive",
    "feelings",
    "they'll",
    "worked",
    "marry",
    "light",
    "test",
    "drop",
    "cannot",
    "frank",
    "sent",
    "city",
    "dream",
    "protect",
    "twenty",
    "class",
    "lucy",
    "surprise",
    "its",
    "sweetheart",
    "forever",
    "poor",
    "looked",
    "mad",
    "except",
    "gun",
    "dance",
    "takes",
    "appreciate",
    "especially",
    "situation",
    "besides",
    "weeks",
    "pull",
    "himself",
    "hasn't",
    "act",
    "worth",
    "Sheridan",
    "amazing",
    "top",
    "given",
    "expect",
    "ben",
    "rather",
    "Julian",
    "involved",
    "swear",
    "piece",
    "busy",
    "law",
    "decided",
    "black",
    "joey",
    "happening",
    "movie",
    "we'd",
    "catch",
    "antonio",
    "country",
    "less",
    "perhaps",
    "step",
    "fall",
    "watching",
    "kept",
    "darling",
    "dog",
    "ms",
    "win",
    "air",
    "honor",
    "personal",
    "moving",
    "till",
    "admit",
    "problems",
    "murder",
    "strong",
    "he'd",
    "evil",
    "definitely",
    "feels",
    "information",
    "honest",
    "eye",
    "broke",
    "missed",
    "longer",
    "dollars",
    "tired",
    "evening",
    "human",
    "starting",
    "Ross",
    "red",
    "entire",
    "trip",
    "club",
    "suppose",
    "calm",
    "imagine",
    "fair",
    "caught",
    "blame",
    "street",
    "sitting",
    "favor",
    "apartment",
    "court",
    "terrible",
    "clean",
    "tony",
    "learn",
    "Alison",
    "Rick",
    "works",
    "relax",
    "york",
    "million",
    "charity",
    "accident",
    "wake",
    "prove",
    "Danny",
    "smart",
    "message",
    "missing",
    "forgot",
    "small",
    "interested",
    "table",
    "nbsp",
    "become",
    "craig",
    "mouth",
    "pregnant",
    "middle",
    "billy",
    "ring",
    "careful",
    "shall",
    "dude",
    "team",
    "ride",
    "figured",
    "wear",
    "shoot",
    "stick",
    "ray",
    "follow",
    "bo",
    "angry",
    "instead",
    "buddy",
    "write",
    "stopped",
    "early",
    "angel",
    "nick",
    "ran",
    "war",
    "standing",
    "forgive",
    "jail",
    "wearing",
    "ladies",
    "kinda",
    "lunch",
    "eight",
    "gotten",
    "hoping",
    "phoebe",
    "thousand",
    "ridge",
    "music",
    "luke",
    "paper",
    "tough",
    "tape",
    "state",
    "count",
    "college",
    "boyfriend",
    "proud",
    "agree",
    "birthday",
    "bill",
    "seven",
    "they've",
    "Timmy",
    "history",
    "share",
    "offer",
    "hurry",
    "ow",
    "feet",
    "wondering",
    "simple",
    "decision",
    "building",
    "ones",
    "finish",
    "voice",
    "herself",
    "Chris",
    "would've",
    "list",
    "mess",
    "deserve",
    "evidence",
    "cute",
    "dress",
    "Richard",
    "interesting",
    "Jesus",
    "hotel",
    "enjoy",
    "Ryan",
    "Lindsay",
    "quiet",
    "concerned",
    "road",
    "eve",
    "staying",
    "short",
    "beat",
    "sweetie",
    "mention",
    "clothes",
    "finished",
    "fell",
    "neither",
    "fix",
    "victor",
    "respect",
    "spent",
    "prison",
    "attention",
    "holding",
    "calls",
    "near",
    "surprised",
    "bar",
    "beth",
    "pass",
    "keeping",
    "gift",
    "hadn't",
    "putting",
    "dark",
    "self",
    "owe",
    "using",
    "nora",
    "ice",
    "helping",
    "bitch",
    "normal",
    "aunt",
    "lawyer",
    "apart",
    "certain",
    "plans",
    "girlfriend",
    "floor",
    "whether",
    "everything's",
    "present",
    "earth",
    "private",
    "box",
    "Dawson",
    "cover",
    "judge",
    "upstairs",
    "sake",
    "mommy",
    "possibly",
    "worst"
)


def load_dictionary(base_dir: str) -> []:
    """Loads a dictionary from file
    """
    filename = base_dir + '/custom_dictionary.txt'
    if not os.path.isfile(filename):
        filename = base_dir + '/dictionary.txt'
    if not os.path.isfile(filename):
        return []

    words = []
    try:
        with open(filename, 'r', encoding='utf-8') as fp_dict:
            words = fp_dict.read().split('\n')
    except OSError:
        print('EX: unable to load dictionary ' + filename)
    return words


def load_2grams(base_dir: str) -> {}:
    """Loads 2-grams from file
    """
    filename = base_dir + '/custom_2grams.txt'
    if not os.path.isfile(filename):
        filename = base_dir + '/2grams.txt'
    if not os.path.isfile(filename):
        return {}

    twograms = {}
    lines = []
    try:
        with open(filename, 'r', encoding='utf-8') as fp_dict:
            lines = fp_dict.read().split('\n')
    except OSError:
        print('EX: unable to load 2-grams ' + filename)
    for line_str in lines:
        words = line_str.split('\t')
        if len(words) != 3:
            continue
        first_word = words[1]
        second_word = words[2]
        if twograms.get(first_word):
            if second_word in twograms[first_word]:
                continue
            twograms[first_word].append(second_word)
        else:
            twograms[first_word] = [second_word]
    return twograms


def html_poisoned(dictionary: [], twograms: {}) -> str:
    """Returns a poisoned HTML document for LLM response
    Statistically similar to English language, but semantically worthless
    word salad
    """
    html_str = \
        '<html lang="en">' + \
        '<head>' + \
        '<meta charset="utf-8">' + \
        '</head>' + \
        '<body>'
    no_of_common_words = len(common_words) - 1
    paragraphs = randint(1, 5)
    for _ in range(paragraphs):
        html_str += '<p>'
        sentences = randint(1, 5)

        for sentence_index in range(sentences):
            sentence_str = ''

            no_of_words = randint(3, 20)
            prev_wrd = ''
            for word_index in range(no_of_words):
                wrd = ''
                pair_found = False
                if prev_wrd:
                    # common word sequences
                    if twograms.get(prev_wrd) and \
                       randint(1, 10) <= 7:
                        wrd = random.choice(twograms[prev_wrd])
                        pair_found = True

                if not pair_found:
                    if randint(1, 10) <= 7:
                        # pick a common word
                        distribution = random.uniform(0.0, 1.0)
                        common_index = \
                            int(distribution * distribution *
                                no_of_common_words)
                        wrd = common_words[common_index]
                    else:
                        wrd = random.choice(dictionary)

                if word_index > 0:
                    sentence_str += wrd
                else:
                    sentence_str += wrd.title()

                if randint(1, 10) > 1 or \
                   word_index >= no_of_words - 1 or pair_found:
                    sentence_str += ' '
                else:
                    sentence_str += ', '
                prev_wrd = wrd
            if sentence_index > 0:
                html_str += ' ' + sentence_str.strip() + '.'
            else:
                html_str += sentence_str.strip() + '.'
        html_str += '</p>'
    html_str += '</body></html>'
    return html_str