epicyon/categories.py

__filename__ = "categories.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
__version__ = "1.5.0"
__maintainer__ = "Bob Mottram"
__email__ = "bob@libreserver.org"
__status__ = "Production"
__module_group__ = "RSS Feeds"

import os
import datetime
from utils import data_dir
from utils import date_utcnow
from utils import date_epoch

MAX_TAG_LENGTH = 42

INVALID_HASHTAG_CHARS = (',', ' ', '<', ';', '\\', '"', '&', '#')


def get_hashtag_category(base_dir: str, hashtag: str) -> str:
    """Returns the category for the hashtag
    """
    category_filename = base_dir + '/tags/' + hashtag + '.category'
    if not os.path.isfile(category_filename):
        category_filename = base_dir + '/tags/' + hashtag.title() + '.category'
        if not os.path.isfile(category_filename):
            category_filename = \
                base_dir + '/tags/' + hashtag.upper() + '.category'
            if not os.path.isfile(category_filename):
                return ''

    category_str = None
    try:
        with open(category_filename, 'r', encoding='utf-8') as fp_category:
            category_str = fp_category.read()
    except OSError:
        print('EX: unable to read category ' + category_filename)
    except UnicodeEncodeError as ex:
        print('EX: unable to read category unicode ' + category_filename +
              ' ' + str(ex))
    if category_str:
        return category_str
    return ''


def load_city_hashtags(base_dir: str, translate: {}) -> None:
    """create hashtag categories for cities
    """
    category_str = 'places'
    if translate.get(category_str):
        category_str = translate[category_str]

    for _, _, files in os.walk(base_dir + '/data/cities'):
        for cities_file in files:
            if not cities_file.endswith('.txt'):
                continue
            cities_filename = base_dir + '/data/cities/' + cities_file
            if not os.path.isfile(cities_filename):
                continue
            cities = []
            try:
                with open(cities_filename, 'r', encoding='utf-8') as fp_cities:
                    cities = fp_cities.read().split('\n')
            except OSError:
                print('EX: unable to load cities file ' + cities_filename)
            if not cities:
                continue
            for hashtag in cities:
                hashtag = hashtag.lower().strip()
                hashtag = hashtag.replace(' & ', ' and ')
                hashtag = hashtag.replace('/', '')

                hashtag2 = hashtag.replace('-', '').replace(' ', '')
                city_filename = base_dir + '/tags/' + hashtag2 + '.category'
                if not os.path.isfile(city_filename):
                    try:
                        with open(city_filename, 'w+',
                                  encoding='utf-8') as fp_city:
                            fp_city.write(category_str)
                    except OSError:
                        print('EX: unable to write city category ' +
                              city_filename)
                if '-' in hashtag:
                    section = hashtag.split('-')
                    new_hashtag = ''
                    for text in section:
                        new_hashtag += text.lower().title()
                    hashtag2 = new_hashtag
                    city_filename = \
                        base_dir + '/tags/' + hashtag2 + '.category'
                    if not os.path.isfile(city_filename):
                        try:
                            with open(city_filename, 'w+',
                                      encoding='utf-8') as fp_city:
                                fp_city.write(category_str)
                        except OSError:
                            print('EX: unable to write city category2 ' +
                                  city_filename)
                if ' ' in hashtag:
                    section = hashtag.split(' ')
                    new_hashtag = ''
                    for text in section:
                        new_hashtag += text.lower().title()
                    hashtag2 = new_hashtag
                    city_filename = \
                        base_dir + '/tags/' + hashtag2 + '.category'
                    if not os.path.isfile(city_filename):
                        try:
                            with open(city_filename, 'w+',
                                      encoding='utf-8') as fp_city:
                                fp_city.write(category_str)
                        except OSError:
                            print('EX: unable to write city category3 ' +
                                  city_filename)


def get_hashtag_categories(base_dir: str,
                           recent: bool, category: str) -> None:
    """Returns a dictionary containing hashtag categories
    """
    hashtag_categories = {}

    if recent:
        curr_time = date_utcnow()
        days_since_epoch = (curr_time - date_epoch()).days
        recently = days_since_epoch - 1

    for _, _, files in os.walk(base_dir + '/tags'):
        for catfile in files:
            if not catfile.endswith('.category'):
                continue
            category_filename = os.path.join(base_dir + '/tags', catfile)
            if not os.path.isfile(category_filename):
                continue
            hashtag = catfile.split('.')[0]
            if len(hashtag) > MAX_TAG_LENGTH:
                continue

            category_str = None
            try:
                with open(category_filename, 'r',
                          encoding='utf-8') as fp_category:
                    category_str = fp_category.read()
            except OSError:
                print('EX: get_hashtag_categories ' + category_filename)
            except UnicodeEncodeError as ex:
                print('EX: get_hashtag_categories unicode ' +
                      category_filename + ' ' + str(ex))

            if not category_str:
                continue

            if category:
                # only return a dictionary for a specific category
                if category_str != category:
                    continue

            if recent:
                tags_filename = base_dir + '/tags/' + hashtag + '.txt'
                if not os.path.isfile(tags_filename):
                    continue
                mod_time_since_epoc = \
                    os.path.getmtime(tags_filename)
                last_modified_date = \
                    datetime.datetime.fromtimestamp(mod_time_since_epoc,
                                                    datetime.timezone.utc)
                file_days_since_epoch = \
                    (last_modified_date - date_epoch()).days
                if file_days_since_epoch < recently:
                    continue

            if not hashtag_categories.get(category_str):
                hashtag_categories[category_str] = [hashtag]
            else:
                if hashtag not in hashtag_categories[category_str]:
                    hashtag_categories[category_str].append(hashtag)
        break
    return hashtag_categories


def update_hashtag_categories(base_dir: str) -> None:
    """Regenerates the list of hashtag categories
    """
    category_list_filename = data_dir(base_dir) + '/categoryList.txt'
    hashtag_categories = get_hashtag_categories(base_dir, False, None)
    if not hashtag_categories:
        if os.path.isfile(category_list_filename):
            try:
                os.remove(category_list_filename)
            except OSError:
                print('EX: update_hashtag_categories ' +
                      'unable to delete cached category list ' +
                      category_list_filename)
        return

    category_list = []
    for category_str, _ in hashtag_categories.items():
        category_list.append(category_str)
    category_list.sort()

    category_list_str = ''
    for category_str in category_list:
        category_list_str += category_str + '\n'

    # save a list of available categories for quick lookup
    try:
        with open(category_list_filename, 'w+',
                  encoding='utf-8') as fp_category:
            fp_category.write(category_list_str)
    except OSError:
        print('EX: unable to write category ' + category_list_filename)


def _valid_hashtag_category(category: str) -> bool:
    """Returns true if the category name is valid
    """
    if not category:
        return False

    for char in INVALID_HASHTAG_CHARS:
        if char in category:
            return False

    # too long
    if len(category) > 40:
        return False

    return True


def set_hashtag_category(base_dir: str, hashtag: str, category: str,
                         update: bool, force: bool) -> bool:
    """Sets the category for the hashtag
    """
    if not _valid_hashtag_category(category):
        return False

    if not force:
        hashtag_filename = base_dir + '/tags/' + hashtag + '.txt'
        if not os.path.isfile(hashtag_filename):
            hashtag = hashtag.title()
            hashtag_filename = base_dir + '/tags/' + hashtag + '.txt'
            if not os.path.isfile(hashtag_filename):
                hashtag = hashtag.upper()
                hashtag_filename = base_dir + '/tags/' + hashtag + '.txt'
                if not os.path.isfile(hashtag_filename):
                    return False

    if not os.path.isdir(base_dir + '/tags'):
        os.mkdir(base_dir + '/tags')
    category_filename = base_dir + '/tags/' + hashtag + '.category'
    if force:
        # don't overwrite any existing categories
        if os.path.isfile(category_filename):
            return False

    category_written = False
    try:
        with open(category_filename, 'w+', encoding='utf-8') as fp_category:
            fp_category.write(category)
            category_written = True
    except OSError as ex:
        print('EX: unable to write category ' + category_filename +
              ' ' + str(ex))
    except UnicodeEncodeError as ex:
        print('EX: unable to write category unicode ' + category_filename +
              ' ' + str(ex))

    if category_written:
        if update:
            update_hashtag_categories(base_dir)
        return True

    return False


def guess_hashtag_category(tag_name: str, hashtag_categories: {},
                           min_tag_length: int) -> str:
    """Tries to guess a category for the given hashtag.
    This works by trying to find the longest similar hashtag
    """
    if len(tag_name) < min_tag_length:
        return ''

    category_matched = ''
    tag_matched_len = 0
    finished = False

    for category_str, hashtag_list in hashtag_categories.items():
        if finished:
            break
        for hashtag in hashtag_list:
            if hashtag == tag_name:
                # exact match
                category_matched = category_str
                finished = True
                break
            if len(hashtag) < min_tag_length:
                # avoid matching very small strings which often
                # lead to spurious categories
                continue
            if hashtag not in tag_name:
                if tag_name not in hashtag:
                    continue
            if not category_matched:
                tag_matched_len = len(hashtag)
                category_matched = category_str
            else:
                # match the longest tag
                if len(hashtag) > tag_matched_len:
                    category_matched = category_str
    if not category_matched:
        return ''
    return category_matched
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00			`__filename__ = "categories.py"`
			`__author__ = "Bob Mottram"`
			`__license__ = "AGPL3+"`
Version 1.5.0 2024-01-21 19:01:20 +00:00			`__version__ = "1.5.0"`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00			`__maintainer__ = "Bob Mottram"`
Change domain to libreserver.org 2021-09-10 16:14:50 +00:00			`__email__ = "bob@libreserver.org"`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00			`__status__ = "Production"`
Module groups 2021-06-15 15:08:12 +00:00			`__module_group__ = "RSS Feeds"`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00
			`import os`
			`import datetime`
Function for accounts data directory 2024-05-12 12:35:26 +00:00			`from utils import data_dir`
Preparing for utcnow deprecation 2023-11-20 22:27:58 +00:00			`from utils import date_utcnow`
			`from utils import date_epoch`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00
Snake case 2021-12-30 18:38:36 +00:00			`MAX_TAG_LENGTH = 42`

			`INVALID_HASHTAG_CHARS = (',', ' ', '<', ';', '\\', '"', '&', '#')`

Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00
Moving to snake case 2021-12-29 21:55:09 +00:00			`def get_hashtag_category(base_dir: str, hashtag: str) -> str:`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00			`"""Returns the category for the hashtag`
			`"""`
Snake case 2021-12-30 18:38:36 +00:00			`category_filename = base_dir + '/tags/' + hashtag + '.category'`
			`if not os.path.isfile(category_filename):`
			`category_filename = base_dir + '/tags/' + hashtag.title() + '.category'`
			`if not os.path.isfile(category_filename):`
			`category_filename = \`
Snake case 2021-12-25 16:17:53 +00:00			`base_dir + '/tags/' + hashtag.upper() + '.category'`
Snake case 2021-12-30 18:38:36 +00:00			`if not os.path.isfile(category_filename):`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00			`return ''`

Snake case 2021-12-30 18:38:36 +00:00			`category_str = None`
File reading exception handling 2021-11-26 12:28:20 +00:00			`try:`
Standardise file pointer names 2024-07-14 13:01:46 +00:00			`with open(category_filename, 'r', encoding='utf-8') as fp_category:`
			`category_str = fp_category.read()`
File reading exception handling 2021-11-26 12:28:20 +00:00			`except OSError:`
Snake case 2021-12-30 18:38:36 +00:00			`print('EX: unable to read category ' + category_filename)`
Add exception handling 2024-02-01 10:50:00 +00:00			`except UnicodeEncodeError as ex:`
			`print('EX: unable to read category unicode ' + category_filename +`
			`' ' + str(ex))`
Snake case 2021-12-30 18:38:36 +00:00			`if category_str:`
			`return category_str`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00			`return ''`


Load city categories 2023-09-02 14:42:59 +00:00			`def load_city_hashtags(base_dir: str, translate: {}) -> None:`
			`"""create hashtag categories for cities`
			`"""`
			`category_str = 'places'`
			`if translate.get(category_str):`
			`category_str = translate[category_str]`

			`for _, _, files in os.walk(base_dir + '/data/cities'):`
			`for cities_file in files:`
			`if not cities_file.endswith('.txt'):`
			`continue`
			`cities_filename = base_dir + '/data/cities/' + cities_file`
			`if not os.path.isfile(cities_filename):`
			`continue`
			`cities = []`
			`try:`
			`with open(cities_filename, 'r', encoding='utf-8') as fp_cities:`
			`cities = fp_cities.read().split('\n')`
			`except OSError:`
			`print('EX: unable to load cities file ' + cities_filename)`
			`if not cities:`
			`continue`
			`for hashtag in cities:`
			`hashtag = hashtag.lower().strip()`
			`hashtag = hashtag.replace(' & ', ' and ')`
Remove slashes from cities 2023-09-09 09:31:15 +00:00			`hashtag = hashtag.replace('/', '')`
Load city categories 2023-09-02 14:42:59 +00:00
			`hashtag2 = hashtag.replace('-', '').replace(' ', '')`
			`city_filename = base_dir + '/tags/' + hashtag2 + '.category'`
			`if not os.path.isfile(city_filename):`
			`try:`
			`with open(city_filename, 'w+',`
			`encoding='utf-8') as fp_city:`
			`fp_city.write(category_str)`
			`except OSError:`
			`print('EX: unable to write city category ' +`
			`city_filename)`
			`if '-' in hashtag:`
			`section = hashtag.split('-')`
			`new_hashtag = ''`
			`for text in section:`
			`new_hashtag += text.lower().title()`
			`hashtag2 = new_hashtag`
			`city_filename = \`
			`base_dir + '/tags/' + hashtag2 + '.category'`
			`if not os.path.isfile(city_filename):`
			`try:`
			`with open(city_filename, 'w+',`
			`encoding='utf-8') as fp_city:`
			`fp_city.write(category_str)`
			`except OSError:`
			`print('EX: unable to write city category2 ' +`
			`city_filename)`
			`if ' ' in hashtag:`
			`section = hashtag.split(' ')`
			`new_hashtag = ''`
			`for text in section:`
			`new_hashtag += text.lower().title()`
			`hashtag2 = new_hashtag`
			`city_filename = \`
			`base_dir + '/tags/' + hashtag2 + '.category'`
			`if not os.path.isfile(city_filename):`
			`try:`
			`with open(city_filename, 'w+',`
			`encoding='utf-8') as fp_city:`
			`fp_city.write(category_str)`
			`except OSError:`
			`print('EX: unable to write city category3 ' +`
			`city_filename)`


Moving to snake case 2021-12-29 21:55:09 +00:00			`def get_hashtag_categories(base_dir: str,`
Remove default arguments 2024-02-19 14:38:29 +00:00			`recent: bool, category: str) -> None:`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00			`"""Returns a dictionary containing hashtag categories`
			`"""`
Snake case 2021-12-30 18:38:36 +00:00			`hashtag_categories = {}`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00
			`if recent:`
Preparing for utcnow deprecation 2023-11-20 22:27:58 +00:00			`curr_time = date_utcnow()`
			`days_since_epoch = (curr_time - date_epoch()).days`
Snake case 2021-12-30 18:38:36 +00:00			`recently = days_since_epoch - 1`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00
Tidying 2022-05-30 21:41:18 +00:00			`for _, _, files in os.walk(base_dir + '/tags'):`
Snake case 2021-12-30 18:38:36 +00:00			`for catfile in files:`
			`if not catfile.endswith('.category'):`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00			`continue`
Snake case 2021-12-30 18:38:36 +00:00			`category_filename = os.path.join(base_dir + '/tags', catfile)`
			`if not os.path.isfile(category_filename):`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00			`continue`
Snake case 2021-12-30 18:38:36 +00:00			`hashtag = catfile.split('.')[0]`
			`if len(hashtag) > MAX_TAG_LENGTH:`
Maximum hashtag length within categories 2021-01-24 10:45:35 +00:00			`continue`
Less indentation 2023-09-02 11:47:24 +00:00
			`category_str = None`
Add exception handling 2024-02-01 10:50:00 +00:00			`try:`
			`with open(category_filename, 'r',`
			`encoding='utf-8') as fp_category:`
			`category_str = fp_category.read()`
			`except OSError:`
			`print('EX: get_hashtag_categories ' + category_filename)`
			`except UnicodeEncodeError as ex:`
			`print('EX: get_hashtag_categories unicode ' +`
			`category_filename + ' ' + str(ex))`
Revert "Reading functions" This reverts commit ee0ffade9d74f8465338aab59546ef296c8f2a74. 2021-06-21 22:52:04 +00:00
Less indentation 2023-09-02 11:47:24 +00:00			`if not category_str:`
			`continue`
Revert "Reading functions" This reverts commit ee0ffade9d74f8465338aab59546ef296c8f2a74. 2021-06-21 22:52:04 +00:00
Less indentation 2023-09-02 11:47:24 +00:00			`if category:`
			`# only return a dictionary for a specific category`
			`if category_str != category:`
			`continue`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00
Less indentation 2023-09-02 11:47:24 +00:00			`if recent:`
			`tags_filename = base_dir + '/tags/' + hashtag + '.txt'`
			`if not os.path.isfile(tags_filename):`
			`continue`
			`mod_time_since_epoc = \`
			`os.path.getmtime(tags_filename)`
			`last_modified_date = \`
Preparing for utcnow deprecation 2023-11-20 22:27:58 +00:00			`datetime.datetime.fromtimestamp(mod_time_since_epoc,`
			`datetime.timezone.utc)`
Less indentation 2023-09-02 11:47:24 +00:00			`file_days_since_epoch = \`
Preparing for utcnow deprecation 2023-11-20 22:27:58 +00:00			`(last_modified_date - date_epoch()).days`
Less indentation 2023-09-02 11:47:24 +00:00			`if file_days_since_epoch < recently:`
			`continue`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00
Less indentation 2023-09-02 11:47:24 +00:00			`if not hashtag_categories.get(category_str):`
			`hashtag_categories[category_str] = [hashtag]`
			`else:`
			`if hashtag not in hashtag_categories[category_str]:`
			`hashtag_categories[category_str].append(hashtag)`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00			`break`
Snake case 2021-12-30 18:38:36 +00:00			`return hashtag_categories`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00

Moving to snake case 2021-12-29 21:55:09 +00:00			`def update_hashtag_categories(base_dir: str) -> None:`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00			`"""Regenerates the list of hashtag categories`
			`"""`
Function for accounts data directory 2024-05-12 12:35:26 +00:00			`category_list_filename = data_dir(base_dir) + '/categoryList.txt'`
Remove default arguments 2024-02-19 14:38:29 +00:00			`hashtag_categories = get_hashtag_categories(base_dir, False, None)`
Snake case 2021-12-30 18:38:36 +00:00			`if not hashtag_categories:`
			`if os.path.isfile(category_list_filename):`
Exception handling when deleting files This can fail if a file is manually deleted or deleted in another thread 2021-09-05 10:17:43 +00:00			`try:`
Snake case 2021-12-30 18:38:36 +00:00			`os.remove(category_list_filename)`
More specific exceptions 2021-11-25 18:42:38 +00:00			`except OSError:`
Moving to snake case 2021-12-29 21:55:09 +00:00			`print('EX: update_hashtag_categories ' +`
Adding debug to exceptions 2021-10-29 16:31:20 +00:00			`'unable to delete cached category list ' +`
Snake case 2021-12-30 18:38:36 +00:00			`category_list_filename)`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00			`return`

Snake case 2021-12-30 18:38:36 +00:00			`category_list = []`
			`for category_str, _ in hashtag_categories.items():`
			`category_list.append(category_str)`
			`category_list.sort()`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00
Snake case 2021-12-30 18:38:36 +00:00			`category_list_str = ''`
			`for category_str in category_list:`
			`category_list_str += category_str + '\n'`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00
			`# save a list of available categories for quick lookup`
More specific exceptions 2021-11-25 18:42:38 +00:00			`try:`
Explicitly set file encoding 2022-06-09 14:46:30 +00:00			`with open(category_list_filename, 'w+',`
			`encoding='utf-8') as fp_category:`
Snake case 2021-12-30 18:38:36 +00:00			`fp_category.write(category_list_str)`
More specific exceptions 2021-11-25 18:42:38 +00:00			`except OSError:`
Snake case 2021-12-30 18:38:36 +00:00			`print('EX: unable to write category ' + category_list_filename)`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00

Moving to snake case 2021-12-29 21:55:09 +00:00			`def _valid_hashtag_category(category: str) -> bool:`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00			`"""Returns true if the category name is valid`
			`"""`
			`if not category:`
			`return False`

Snake case 2021-12-30 18:38:36 +00:00			`for char in INVALID_HASHTAG_CHARS:`
			`if char in category:`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00			`return False`

			`# too long`
			`if len(category) > 40:`
			`return False`

			`return True`


Moving to snake case 2021-12-29 21:55:09 +00:00			`def set_hashtag_category(base_dir: str, hashtag: str, category: str,`
Remove default function argument 2024-05-01 12:03:34 +00:00			`update: bool, force: bool) -> bool:`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00			`"""Sets the category for the hashtag`
			`"""`
Moving to snake case 2021-12-29 21:55:09 +00:00			`if not _valid_hashtag_category(category):`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00			`return False`

			`if not force:`
Snake case 2021-12-30 18:38:36 +00:00			`hashtag_filename = base_dir + '/tags/' + hashtag + '.txt'`
			`if not os.path.isfile(hashtag_filename):`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00			`hashtag = hashtag.title()`
Snake case 2021-12-30 18:38:36 +00:00			`hashtag_filename = base_dir + '/tags/' + hashtag + '.txt'`
			`if not os.path.isfile(hashtag_filename):`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00			`hashtag = hashtag.upper()`
Snake case 2021-12-30 18:38:36 +00:00			`hashtag_filename = base_dir + '/tags/' + hashtag + '.txt'`
			`if not os.path.isfile(hashtag_filename):`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00			`return False`

Snake case 2021-12-25 16:17:53 +00:00			`if not os.path.isdir(base_dir + '/tags'):`
			`os.mkdir(base_dir + '/tags')`
Snake case 2021-12-30 18:38:36 +00:00			`category_filename = base_dir + '/tags/' + hashtag + '.category'`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00			`if force:`
			`# don't overwrite any existing categories`
Snake case 2021-12-30 18:38:36 +00:00			`if os.path.isfile(category_filename):`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00			`return False`
File reading exception handling 2021-11-26 12:28:20 +00:00
Snake case 2021-12-30 18:38:36 +00:00			`category_written = False`
More specific exceptions 2021-11-25 18:42:38 +00:00			`try:`
Explicitly set file encoding 2022-06-09 14:46:30 +00:00			`with open(category_filename, 'w+', encoding='utf-8') as fp_category:`
Snake case 2021-12-30 18:38:36 +00:00			`fp_category.write(category)`
			`category_written = True`
Exception variable names 2021-12-25 15:28:52 +00:00			`except OSError as ex:`
Snake case 2021-12-30 18:38:36 +00:00			`print('EX: unable to write category ' + category_filename +`
Exception variable names 2021-12-25 15:28:52 +00:00			`' ' + str(ex))`
Add exception handling 2024-02-01 10:50:00 +00:00			`except UnicodeEncodeError as ex:`
			`print('EX: unable to write category unicode ' + category_filename +`
			`' ' + str(ex))`
File reading exception handling 2021-11-26 12:28:20 +00:00
Snake case 2021-12-30 18:38:36 +00:00			`if category_written:`
File reading exception handling 2021-11-26 12:28:20 +00:00			`if update:`
Moving to snake case 2021-12-29 21:55:09 +00:00			`update_hashtag_categories(base_dir)`
File reading exception handling 2021-11-26 12:28:20 +00:00			`return True`

Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00			`return False`


Minmimum hashtag length for category learning 2024-01-10 13:51:19 +00:00			`def guess_hashtag_category(tag_name: str, hashtag_categories: {},`
			`min_tag_length: int) -> str:`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00			`"""Tries to guess a category for the given hashtag.`
			`This works by trying to find the longest similar hashtag`
			`"""`
Minmimum hashtag length for category learning 2024-01-10 13:51:19 +00:00			`if len(tag_name) < min_tag_length:`
Fix test 2021-07-13 08:43:07 +00:00			`return ''`

Snake case 2021-12-30 18:38:36 +00:00			`category_matched = ''`
			`tag_matched_len = 0`
Check for exact matches 2024-01-10 14:04:53 +00:00			`finished = False`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00
Snake case 2021-12-30 18:38:36 +00:00			`for category_str, hashtag_list in hashtag_categories.items():`
Check for exact matches 2024-01-10 14:04:53 +00:00			`if finished:`
			`break`
Snake case 2021-12-30 18:38:36 +00:00			`for hashtag in hashtag_list:`
Check for exact matches 2024-01-10 14:04:53 +00:00			`if hashtag == tag_name:`
			`# exact match`
			`category_matched = category_str`
			`finished = True`
			`break`
Minmimum hashtag length for category learning 2024-01-10 13:51:19 +00:00			`if len(hashtag) < min_tag_length:`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00			`# avoid matching very small strings which often`
			`# lead to spurious categories`
			`continue`
minimum hashtag length for guessing 2024-01-10 13:41:59 +00:00			`if hashtag not in tag_name:`
			`if tag_name not in hashtag:`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00			`continue`
Snake case 2021-12-30 18:38:36 +00:00			`if not category_matched:`
			`tag_matched_len = len(hashtag)`
			`category_matched = category_str`
Move hashtag categories functions to their own module 2020-12-22 10:30:52 +00:00			`else:`
			`# match the longest tag`
Snake case 2021-12-30 18:38:36 +00:00			`if len(hashtag) > tag_matched_len:`
			`category_matched = category_str`
			`if not category_matched:`
Fix test 2021-07-13 08:43:07 +00:00			`return ''`
Snake case 2021-12-30 18:38:36 +00:00			`return category_matched`