__filename__ = "categories.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
__version__ = "1.2.0"
__maintainer__ = "Bob Mottram"
__email__ = "bob@libreserver.org"
__status__ = "Production"
__module_group__ = "RSS Feeds"

import os
import datetime


def getHashtagCategory(base_dir: str, hashtag: str) -> str:
    """Returns the category for the hashtag
    """
    categoryFilename = base_dir + '/tags/' + hashtag + '.category'
    if not os.path.isfile(categoryFilename):
        categoryFilename = base_dir + '/tags/' + hashtag.title() + '.category'
        if not os.path.isfile(categoryFilename):
            categoryFilename = \
                base_dir + '/tags/' + hashtag.upper() + '.category'
            if not os.path.isfile(categoryFilename):
                return ''

    categoryStr = None
    try:
        with open(categoryFilename, 'r') as fp:
            categoryStr = fp.read()
    except OSError:
        print('EX: unable to read category ' + categoryFilename)
    if categoryStr:
        return categoryStr
    return ''


def getHashtagCategories(base_dir: str,
                         recent: bool = False, category: str = None) -> None:
    """Returns a dictionary containing hashtag categories
    """
    maxTagLength = 42
    hashtagCategories = {}

    if recent:
        currTime = datetime.datetime.utcnow()
        daysSinceEpoch = (currTime - datetime.datetime(1970, 1, 1)).days
        recently = daysSinceEpoch - 1

    for subdir, dirs, files in os.walk(base_dir + '/tags'):
        for f in files:
            if not f.endswith('.category'):
                continue
            categoryFilename = os.path.join(base_dir + '/tags', f)
            if not os.path.isfile(categoryFilename):
                continue
            hashtag = f.split('.')[0]
            if len(hashtag) > maxTagLength:
                continue
            with open(categoryFilename, 'r') as fp:
                categoryStr = fp.read()

                if not categoryStr:
                    continue

                if category:
                    # only return a dictionary for a specific category
                    if categoryStr != category:
                        continue

                if recent:
                    tagsFilename = base_dir + '/tags/' + hashtag + '.txt'
                    if not os.path.isfile(tagsFilename):
                        continue
                    modTimesinceEpoc = \
                        os.path.getmtime(tagsFilename)
                    lastModifiedDate = \
                        datetime.datetime.fromtimestamp(modTimesinceEpoc)
                    fileDaysSinceEpoch = \
                        (lastModifiedDate -
                         datetime.datetime(1970, 1, 1)).days
                    if fileDaysSinceEpoch < recently:
                        continue

                if not hashtagCategories.get(categoryStr):
                    hashtagCategories[categoryStr] = [hashtag]
                else:
                    if hashtag not in hashtagCategories[categoryStr]:
                        hashtagCategories[categoryStr].append(hashtag)
        break
    return hashtagCategories


def updateHashtagCategories(base_dir: str) -> None:
    """Regenerates the list of hashtag categories
    """
    categoryListFilename = base_dir + '/accounts/categoryList.txt'
    hashtagCategories = getHashtagCategories(base_dir)
    if not hashtagCategories:
        if os.path.isfile(categoryListFilename):
            try:
                os.remove(categoryListFilename)
            except OSError:
                print('EX: updateHashtagCategories ' +
                      'unable to delete cached category list ' +
                      categoryListFilename)
        return

    categoryList = []
    for categoryStr, hashtagList in hashtagCategories.items():
        categoryList.append(categoryStr)
    categoryList.sort()

    categoryListStr = ''
    for categoryStr in categoryList:
        categoryListStr += categoryStr + '\n'

    # save a list of available categories for quick lookup
    try:
        with open(categoryListFilename, 'w+') as fp:
            fp.write(categoryListStr)
    except OSError:
        print('EX: unable to write category ' + categoryListFilename)


def _validHashtagCategory(category: str) -> bool:
    """Returns true if the category name is valid
    """
    if not category:
        return False

    invalidChars = (',', ' ', '<', ';', '\\', '"', '&', '#')
    for ch in invalidChars:
        if ch in category:
            return False

    # too long
    if len(category) > 40:
        return False

    return True


def setHashtagCategory(base_dir: str, hashtag: str, category: str,
                       update: bool, force: bool = False) -> bool:
    """Sets the category for the hashtag
    """
    if not _validHashtagCategory(category):
        return False

    if not force:
        hashtagFilename = base_dir + '/tags/' + hashtag + '.txt'
        if not os.path.isfile(hashtagFilename):
            hashtag = hashtag.title()
            hashtagFilename = base_dir + '/tags/' + hashtag + '.txt'
            if not os.path.isfile(hashtagFilename):
                hashtag = hashtag.upper()
                hashtagFilename = base_dir + '/tags/' + hashtag + '.txt'
                if not os.path.isfile(hashtagFilename):
                    return False

    if not os.path.isdir(base_dir + '/tags'):
        os.mkdir(base_dir + '/tags')
    categoryFilename = base_dir + '/tags/' + hashtag + '.category'
    if force:
        # don't overwrite any existing categories
        if os.path.isfile(categoryFilename):
            return False

    categoryWritten = False
    try:
        with open(categoryFilename, 'w+') as fp:
            fp.write(category)
            categoryWritten = True
    except OSError as ex:
        print('EX: unable to write category ' + categoryFilename +
              ' ' + str(ex))

    if categoryWritten:
        if update:
            updateHashtagCategories(base_dir)
        return True

    return False


def guessHashtagCategory(tagName: str, hashtagCategories: {}) -> str:
    """Tries to guess a category for the given hashtag.
    This works by trying to find the longest similar hashtag
    """
    if len(tagName) < 4:
        return ''

    categoryMatched = ''
    tagMatchedLen = 0

    for categoryStr, hashtagList in hashtagCategories.items():
        for hashtag in hashtagList:
            if len(hashtag) < 4:
                # avoid matching very small strings which often
                # lead to spurious categories
                continue
            if hashtag not in tagName:
                if tagName not in hashtag:
                    continue
            if not categoryMatched:
                tagMatchedLen = len(hashtag)
                categoryMatched = categoryStr
            else:
                # match the longest tag
                if len(hashtag) > tagMatchedLen:
                    categoryMatched = categoryStr
    if not categoryMatched:
        return ''
    return categoryMatched