2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								__filename__ = "categories.py"
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								__author__ = "Bob Mottram"
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								__license__ = "AGPL3+"
							 | 
						
					
						
							
								
									
										
										
										
											2024-01-21 19:01:20 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								__version__ = "1.5.0"
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								__maintainer__ = "Bob Mottram"
							 | 
						
					
						
							
								
									
										
										
										
											2021-09-10 16:14:50 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								__email__ = "bob@libreserver.org"
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								__status__ = "Production"
							 | 
						
					
						
							
								
									
										
										
										
											2021-06-15 15:08:12 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								__module_group__ = "RSS Feeds"
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								import os
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								import datetime
							 | 
						
					
						
							
								
									
										
										
										
											2024-05-12 12:35:26 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								from utils import data_dir
							 | 
						
					
						
							
								
									
										
										
										
											2023-11-20 22:27:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								from utils import date_utcnow
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								from utils import date_epoch
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								MAX_TAG_LENGTH = 42
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								INVALID_HASHTAG_CHARS = (',', ' ', '<', ';', '\\', '"', '&', '#')
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								def get_hashtag_category(base_dir: str, hashtag: str) -> str:
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """Returns the category for the hashtag
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    category_filename = base_dir + '/tags/' + hashtag + '.category'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if not os.path.isfile(category_filename):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        category_filename = base_dir + '/tags/' + hashtag.title() + '.category'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if not os.path.isfile(category_filename):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            category_filename = \
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-25 16:17:53 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                base_dir + '/tags/' + hashtag.upper() + '.category'
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            if not os.path.isfile(category_filename):
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                return ''
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    category_str = None
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-26 12:28:20 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    try:
							 | 
						
					
						
							
								
									
										
										
										
											2022-06-09 14:46:30 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        with open(category_filename, 'r', encoding='utf-8') as category_file:
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            category_str = category_file.read()
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-26 12:28:20 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    except OSError:
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        print('EX: unable to read category ' + category_filename)
							 | 
						
					
						
							
								
									
										
										
										
											2024-02-01 10:50:00 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    except UnicodeEncodeError as ex:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        print('EX: unable to read category unicode ' + category_filename +
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								              ' ' + str(ex))
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if category_str:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return category_str
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    return ''
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-09-02 14:42:59 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								def load_city_hashtags(base_dir: str, translate: {}) -> None:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """create hashtag categories for cities
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    category_str = 'places'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if translate.get(category_str):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        category_str = translate[category_str]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    for _, _, files in os.walk(base_dir + '/data/cities'):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        for cities_file in files:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if not cities_file.endswith('.txt'):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                continue
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            cities_filename = base_dir + '/data/cities/' + cities_file
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if not os.path.isfile(cities_filename):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                continue
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            cities = []
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            try:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                with open(cities_filename, 'r', encoding='utf-8') as fp_cities:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    cities = fp_cities.read().split('\n')
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            except OSError:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                print('EX: unable to load cities file ' + cities_filename)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if not cities:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                continue
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            for hashtag in cities:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                hashtag = hashtag.lower().strip()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                hashtag = hashtag.replace(' & ', ' and ')
							 | 
						
					
						
							
								
									
										
										
										
											2023-09-09 09:31:15 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                hashtag = hashtag.replace('/', '')
							 | 
						
					
						
							
								
									
										
										
										
											2023-09-02 14:42:59 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                hashtag2 = hashtag.replace('-', '').replace(' ', '')
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                city_filename = base_dir + '/tags/' + hashtag2 + '.category'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                if not os.path.isfile(city_filename):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    try:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                        with open(city_filename, 'w+',
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                                  encoding='utf-8') as fp_city:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                            fp_city.write(category_str)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    except OSError:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                        print('EX: unable to write city category ' +
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                              city_filename)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                if '-' in hashtag:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    section = hashtag.split('-')
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    new_hashtag = ''
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    for text in section:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                        new_hashtag += text.lower().title()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    hashtag2 = new_hashtag
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    city_filename = \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                        base_dir + '/tags/' + hashtag2 + '.category'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    if not os.path.isfile(city_filename):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                        try:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                            with open(city_filename, 'w+',
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                                      encoding='utf-8') as fp_city:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                                fp_city.write(category_str)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                        except OSError:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                            print('EX: unable to write city category2 ' +
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                                  city_filename)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                if ' ' in hashtag:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    section = hashtag.split(' ')
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    new_hashtag = ''
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    for text in section:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                        new_hashtag += text.lower().title()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    hashtag2 = new_hashtag
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    city_filename = \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                        base_dir + '/tags/' + hashtag2 + '.category'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    if not os.path.isfile(city_filename):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                        try:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                            with open(city_filename, 'w+',
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                                      encoding='utf-8') as fp_city:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                                fp_city.write(category_str)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                        except OSError:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                            print('EX: unable to write city category3 ' +
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                                  city_filename)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								def get_hashtag_categories(base_dir: str,
							 | 
						
					
						
							
								
									
										
										
										
											2024-02-19 14:38:29 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                           recent: bool, category: str) -> None:
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """Returns a dictionary containing hashtag categories
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    hashtag_categories = {}
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if recent:
							 | 
						
					
						
							
								
									
										
										
										
											2023-11-20 22:27:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        curr_time = date_utcnow()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        days_since_epoch = (curr_time - date_epoch()).days
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        recently = days_since_epoch - 1
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-05-30 21:41:18 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    for _, _, files in os.walk(base_dir + '/tags'):
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        for catfile in files:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if not catfile.endswith('.category'):
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                continue
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            category_filename = os.path.join(base_dir + '/tags', catfile)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if not os.path.isfile(category_filename):
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                continue
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            hashtag = catfile.split('.')[0]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if len(hashtag) > MAX_TAG_LENGTH:
							 | 
						
					
						
							
								
									
										
										
										
											2021-01-24 10:45:35 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                continue
							 | 
						
					
						
							
								
									
										
										
										
											2023-09-02 11:47:24 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            category_str = None
							 | 
						
					
						
							
								
									
										
										
										
											2024-02-01 10:50:00 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            try:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                with open(category_filename, 'r',
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                          encoding='utf-8') as fp_category:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    category_str = fp_category.read()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            except OSError:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                print('EX: get_hashtag_categories ' + category_filename)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            except UnicodeEncodeError as ex:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                print('EX: get_hashtag_categories unicode ' +
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                      category_filename + ' ' + str(ex))
							 | 
						
					
						
							
								
									
										
										
										
											2021-06-21 22:52:04 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-09-02 11:47:24 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            if not category_str:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                continue
							 | 
						
					
						
							
								
									
										
										
										
											2021-06-21 22:52:04 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-09-02 11:47:24 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            if category:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                # only return a dictionary for a specific category
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                if category_str != category:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    continue
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-09-02 11:47:24 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            if recent:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                tags_filename = base_dir + '/tags/' + hashtag + '.txt'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                if not os.path.isfile(tags_filename):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    continue
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                mod_time_since_epoc = \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    os.path.getmtime(tags_filename)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                last_modified_date = \
							 | 
						
					
						
							
								
									
										
										
										
											2023-11-20 22:27:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                    datetime.datetime.fromtimestamp(mod_time_since_epoc,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                                                    datetime.timezone.utc)
							 | 
						
					
						
							
								
									
										
										
										
											2023-09-02 11:47:24 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                file_days_since_epoch = \
							 | 
						
					
						
							
								
									
										
										
										
											2023-11-20 22:27:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                    (last_modified_date - date_epoch()).days
							 | 
						
					
						
							
								
									
										
										
										
											2023-09-02 11:47:24 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                if file_days_since_epoch < recently:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    continue
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-09-02 11:47:24 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            if not hashtag_categories.get(category_str):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                hashtag_categories[category_str] = [hashtag]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            else:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                if hashtag not in hashtag_categories[category_str]:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    hashtag_categories[category_str].append(hashtag)
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        break
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    return hashtag_categories
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								def update_hashtag_categories(base_dir: str) -> None:
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """Regenerates the list of hashtag categories
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							
								
									
										
										
										
											2024-05-12 12:35:26 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    category_list_filename = data_dir(base_dir) + '/categoryList.txt'
							 | 
						
					
						
							
								
									
										
										
										
											2024-02-19 14:38:29 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    hashtag_categories = get_hashtag_categories(base_dir, False, None)
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if not hashtag_categories:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if os.path.isfile(category_list_filename):
							 | 
						
					
						
							
								
									
										
										
										
											2021-09-05 10:17:43 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            try:
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                os.remove(category_list_filename)
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-25 18:42:38 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            except OSError:
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                print('EX: update_hashtag_categories ' +
							 | 
						
					
						
							
								
									
										
										
										
											2021-10-29 16:31:20 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                      'unable to delete cached category list ' +
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                      category_list_filename)
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    category_list = []
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    for category_str, _ in hashtag_categories.items():
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        category_list.append(category_str)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    category_list.sort()
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    category_list_str = ''
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    for category_str in category_list:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        category_list_str += category_str + '\n'
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    # save a list of available categories for quick lookup
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-25 18:42:38 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    try:
							 | 
						
					
						
							
								
									
										
										
										
											2022-06-09 14:46:30 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        with open(category_list_filename, 'w+',
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                  encoding='utf-8') as fp_category:
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            fp_category.write(category_list_str)
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-25 18:42:38 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    except OSError:
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        print('EX: unable to write category ' + category_list_filename)
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								def _valid_hashtag_category(category: str) -> bool:
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """Returns true if the category name is valid
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if not category:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return False
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    for char in INVALID_HASHTAG_CHARS:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if char in category:
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            return False
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    # too long
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if len(category) > 40:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return False
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    return True
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								def set_hashtag_category(base_dir: str, hashtag: str, category: str,
							 | 
						
					
						
							
								
									
										
										
										
											2024-05-01 12:03:34 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                         update: bool, force: bool) -> bool:
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """Sets the category for the hashtag
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if not _valid_hashtag_category(category):
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return False
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if not force:
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        hashtag_filename = base_dir + '/tags/' + hashtag + '.txt'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if not os.path.isfile(hashtag_filename):
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            hashtag = hashtag.title()
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            hashtag_filename = base_dir + '/tags/' + hashtag + '.txt'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if not os.path.isfile(hashtag_filename):
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                hashtag = hashtag.upper()
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                hashtag_filename = base_dir + '/tags/' + hashtag + '.txt'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                if not os.path.isfile(hashtag_filename):
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    return False
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-25 16:17:53 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if not os.path.isdir(base_dir + '/tags'):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        os.mkdir(base_dir + '/tags')
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    category_filename = base_dir + '/tags/' + hashtag + '.category'
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if force:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # don't overwrite any existing categories
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        if os.path.isfile(category_filename):
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            return False
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-26 12:28:20 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    category_written = False
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-25 18:42:38 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    try:
							 | 
						
					
						
							
								
									
										
										
										
											2022-06-09 14:46:30 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        with open(category_filename, 'w+', encoding='utf-8') as fp_category:
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            fp_category.write(category)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            category_written = True
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-25 15:28:52 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    except OSError as ex:
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        print('EX: unable to write category ' + category_filename +
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-25 15:28:52 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								              ' ' + str(ex))
							 | 
						
					
						
							
								
									
										
										
										
											2024-02-01 10:50:00 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    except UnicodeEncodeError as ex:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        print('EX: unable to write category unicode ' + category_filename +
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								              ' ' + str(ex))
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-26 12:28:20 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if category_written:
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-26 12:28:20 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        if update:
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            update_hashtag_categories(base_dir)
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-26 12:28:20 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return True
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    return False
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2024-01-10 13:51:19 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								def guess_hashtag_category(tag_name: str, hashtag_categories: {},
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                           min_tag_length: int) -> str:
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """Tries to guess a category for the given hashtag.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    This works by trying to find the longest similar hashtag
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							
								
									
										
										
										
											2024-01-10 13:51:19 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if len(tag_name) < min_tag_length:
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-13 08:43:07 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return ''
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    category_matched = ''
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    tag_matched_len = 0
							 | 
						
					
						
							
								
									
										
										
										
											2024-01-10 14:04:53 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    finished = False
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    for category_str, hashtag_list in hashtag_categories.items():
							 | 
						
					
						
							
								
									
										
										
										
											2024-01-10 14:04:53 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        if finished:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            break
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        for hashtag in hashtag_list:
							 | 
						
					
						
							
								
									
										
										
										
											2024-01-10 14:04:53 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            if hashtag == tag_name:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                # exact match
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                category_matched = category_str
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                finished = True
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                break
							 | 
						
					
						
							
								
									
										
										
										
											2024-01-10 13:51:19 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            if len(hashtag) < min_tag_length:
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                # avoid matching very small strings which often
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                # lead to spurious categories
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                continue
							 | 
						
					
						
							
								
									
										
										
										
											2024-01-10 13:41:59 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            if hashtag not in tag_name:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                if tag_name not in hashtag:
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    continue
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            if not category_matched:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                tag_matched_len = len(hashtag)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                category_matched = category_str
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            else:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                # match the longest tag
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                if len(hashtag) > tag_matched_len:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    category_matched = category_str
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if not category_matched:
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-13 08:43:07 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return ''
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    return category_matched
							 |