| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  | __filename__ = "categories.py" | 
					
						
							|  |  |  | __author__ = "Bob Mottram" | 
					
						
							|  |  |  | __license__ = "AGPL3+" | 
					
						
							| 
									
										
										
										
											2024-12-22 23:37:30 +00:00
										 |  |  | __version__ = "1.6.0" | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  | __maintainer__ = "Bob Mottram" | 
					
						
							| 
									
										
										
										
											2021-09-10 16:14:50 +00:00
										 |  |  | __email__ = "bob@libreserver.org" | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  | __status__ = "Production" | 
					
						
							| 
									
										
										
										
											2021-06-15 15:08:12 +00:00
										 |  |  | __module_group__ = "RSS Feeds" | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | import os | 
					
						
							|  |  |  | import datetime | 
					
						
							| 
									
										
										
										
											2024-05-12 12:35:26 +00:00
										 |  |  | from utils import data_dir | 
					
						
							| 
									
										
										
										
											2023-11-20 22:27:58 +00:00
										 |  |  | from utils import date_utcnow | 
					
						
							|  |  |  | from utils import date_epoch | 
					
						
							| 
									
										
										
										
											2024-08-08 17:23:33 +00:00
										 |  |  | from utils import replace_strings | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  | MAX_TAG_LENGTH = 42 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | INVALID_HASHTAG_CHARS = (',', ' ', '<', ';', '\\', '"', '&', '#') | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  | def get_hashtag_category(base_dir: str, hashtag: str) -> str: | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  |     """Returns the category for the hashtag
 | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |     category_filename = base_dir + '/tags/' + hashtag + '.category' | 
					
						
							|  |  |  |     if not os.path.isfile(category_filename): | 
					
						
							|  |  |  |         category_filename = base_dir + '/tags/' + hashtag.title() + '.category' | 
					
						
							|  |  |  |         if not os.path.isfile(category_filename): | 
					
						
							|  |  |  |             category_filename = \ | 
					
						
							| 
									
										
										
										
											2021-12-25 16:17:53 +00:00
										 |  |  |                 base_dir + '/tags/' + hashtag.upper() + '.category' | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |             if not os.path.isfile(category_filename): | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  |                 return '' | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |     category_str = None | 
					
						
							| 
									
										
										
										
											2021-11-26 12:28:20 +00:00
										 |  |  |     try: | 
					
						
							| 
									
										
										
										
											2024-07-14 13:01:46 +00:00
										 |  |  |         with open(category_filename, 'r', encoding='utf-8') as fp_category: | 
					
						
							|  |  |  |             category_str = fp_category.read() | 
					
						
							| 
									
										
										
										
											2021-11-26 12:28:20 +00:00
										 |  |  |     except OSError: | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |         print('EX: unable to read category ' + category_filename) | 
					
						
							| 
									
										
										
										
											2024-02-01 10:50:00 +00:00
										 |  |  |     except UnicodeEncodeError as ex: | 
					
						
							|  |  |  |         print('EX: unable to read category unicode ' + category_filename + | 
					
						
							|  |  |  |               ' ' + str(ex)) | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |     if category_str: | 
					
						
							|  |  |  |         return category_str | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  |     return '' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-09-02 14:42:59 +00:00
										 |  |  | def load_city_hashtags(base_dir: str, translate: {}) -> None: | 
					
						
							|  |  |  |     """create hashtag categories for cities
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     category_str = 'places' | 
					
						
							|  |  |  |     if translate.get(category_str): | 
					
						
							|  |  |  |         category_str = translate[category_str] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-08 17:23:33 +00:00
										 |  |  |     replacements = { | 
					
						
							|  |  |  |         ' & ': ' and ', | 
					
						
							|  |  |  |         '/': '' | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     replacements2 = { | 
					
						
							|  |  |  |         '-': '', | 
					
						
							|  |  |  |         ' ': '' | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2023-09-02 14:42:59 +00:00
										 |  |  |     for _, _, files in os.walk(base_dir + '/data/cities'): | 
					
						
							|  |  |  |         for cities_file in files: | 
					
						
							|  |  |  |             if not cities_file.endswith('.txt'): | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             cities_filename = base_dir + '/data/cities/' + cities_file | 
					
						
							|  |  |  |             if not os.path.isfile(cities_filename): | 
					
						
							|  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2024-12-23 15:39:55 +00:00
										 |  |  |             cities: list[str] = [] | 
					
						
							| 
									
										
										
										
											2023-09-02 14:42:59 +00:00
										 |  |  |             try: | 
					
						
							|  |  |  |                 with open(cities_filename, 'r', encoding='utf-8') as fp_cities: | 
					
						
							|  |  |  |                     cities = fp_cities.read().split('\n') | 
					
						
							|  |  |  |             except OSError: | 
					
						
							|  |  |  |                 print('EX: unable to load cities file ' + cities_filename) | 
					
						
							|  |  |  |             if not cities: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             for hashtag in cities: | 
					
						
							|  |  |  |                 hashtag = hashtag.lower().strip() | 
					
						
							| 
									
										
										
										
											2024-08-08 17:23:33 +00:00
										 |  |  |                 hashtag = replace_strings(hashtag, replacements) | 
					
						
							| 
									
										
										
										
											2023-09-02 14:42:59 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-08 17:23:33 +00:00
										 |  |  |                 hashtag2 = replace_strings(hashtag, replacements2) | 
					
						
							| 
									
										
										
										
											2023-09-02 14:42:59 +00:00
										 |  |  |                 city_filename = base_dir + '/tags/' + hashtag2 + '.category' | 
					
						
							|  |  |  |                 if not os.path.isfile(city_filename): | 
					
						
							|  |  |  |                     try: | 
					
						
							|  |  |  |                         with open(city_filename, 'w+', | 
					
						
							|  |  |  |                                   encoding='utf-8') as fp_city: | 
					
						
							|  |  |  |                             fp_city.write(category_str) | 
					
						
							|  |  |  |                     except OSError: | 
					
						
							|  |  |  |                         print('EX: unable to write city category ' + | 
					
						
							|  |  |  |                               city_filename) | 
					
						
							|  |  |  |                 if '-' in hashtag: | 
					
						
							|  |  |  |                     section = hashtag.split('-') | 
					
						
							|  |  |  |                     new_hashtag = '' | 
					
						
							|  |  |  |                     for text in section: | 
					
						
							|  |  |  |                         new_hashtag += text.lower().title() | 
					
						
							|  |  |  |                     hashtag2 = new_hashtag | 
					
						
							|  |  |  |                     city_filename = \ | 
					
						
							|  |  |  |                         base_dir + '/tags/' + hashtag2 + '.category' | 
					
						
							|  |  |  |                     if not os.path.isfile(city_filename): | 
					
						
							|  |  |  |                         try: | 
					
						
							|  |  |  |                             with open(city_filename, 'w+', | 
					
						
							|  |  |  |                                       encoding='utf-8') as fp_city: | 
					
						
							|  |  |  |                                 fp_city.write(category_str) | 
					
						
							|  |  |  |                         except OSError: | 
					
						
							|  |  |  |                             print('EX: unable to write city category2 ' + | 
					
						
							|  |  |  |                                   city_filename) | 
					
						
							|  |  |  |                 if ' ' in hashtag: | 
					
						
							|  |  |  |                     section = hashtag.split(' ') | 
					
						
							|  |  |  |                     new_hashtag = '' | 
					
						
							|  |  |  |                     for text in section: | 
					
						
							|  |  |  |                         new_hashtag += text.lower().title() | 
					
						
							|  |  |  |                     hashtag2 = new_hashtag | 
					
						
							|  |  |  |                     city_filename = \ | 
					
						
							|  |  |  |                         base_dir + '/tags/' + hashtag2 + '.category' | 
					
						
							|  |  |  |                     if not os.path.isfile(city_filename): | 
					
						
							|  |  |  |                         try: | 
					
						
							|  |  |  |                             with open(city_filename, 'w+', | 
					
						
							|  |  |  |                                       encoding='utf-8') as fp_city: | 
					
						
							|  |  |  |                                 fp_city.write(category_str) | 
					
						
							|  |  |  |                         except OSError: | 
					
						
							|  |  |  |                             print('EX: unable to write city category3 ' + | 
					
						
							|  |  |  |                                   city_filename) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  | def get_hashtag_categories(base_dir: str, | 
					
						
							| 
									
										
										
										
											2024-02-19 14:38:29 +00:00
										 |  |  |                            recent: bool, category: str) -> None: | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  |     """Returns a dictionary containing hashtag categories
 | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |     hashtag_categories = {} | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     if recent: | 
					
						
							| 
									
										
										
										
											2023-11-20 22:27:58 +00:00
										 |  |  |         curr_time = date_utcnow() | 
					
						
							|  |  |  |         days_since_epoch = (curr_time - date_epoch()).days | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |         recently = days_since_epoch - 1 | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-05-30 21:41:18 +00:00
										 |  |  |     for _, _, files in os.walk(base_dir + '/tags'): | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |         for catfile in files: | 
					
						
							|  |  |  |             if not catfile.endswith('.category'): | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |             category_filename = os.path.join(base_dir + '/tags', catfile) | 
					
						
							|  |  |  |             if not os.path.isfile(category_filename): | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |             hashtag = catfile.split('.')[0] | 
					
						
							|  |  |  |             if len(hashtag) > MAX_TAG_LENGTH: | 
					
						
							| 
									
										
										
										
											2021-01-24 10:45:35 +00:00
										 |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2023-09-02 11:47:24 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |             category_str = None | 
					
						
							| 
									
										
										
										
											2024-02-01 10:50:00 +00:00
										 |  |  |             try: | 
					
						
							|  |  |  |                 with open(category_filename, 'r', | 
					
						
							|  |  |  |                           encoding='utf-8') as fp_category: | 
					
						
							|  |  |  |                     category_str = fp_category.read() | 
					
						
							|  |  |  |             except OSError: | 
					
						
							|  |  |  |                 print('EX: get_hashtag_categories ' + category_filename) | 
					
						
							|  |  |  |             except UnicodeEncodeError as ex: | 
					
						
							|  |  |  |                 print('EX: get_hashtag_categories unicode ' + | 
					
						
							|  |  |  |                       category_filename + ' ' + str(ex)) | 
					
						
							| 
									
										
										
										
											2021-06-21 22:52:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-09-02 11:47:24 +00:00
										 |  |  |             if not category_str: | 
					
						
							|  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2021-06-21 22:52:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-09-02 11:47:24 +00:00
										 |  |  |             if category: | 
					
						
							|  |  |  |                 # only return a dictionary for a specific category | 
					
						
							|  |  |  |                 if category_str != category: | 
					
						
							|  |  |  |                     continue | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-09-02 11:47:24 +00:00
										 |  |  |             if recent: | 
					
						
							|  |  |  |                 tags_filename = base_dir + '/tags/' + hashtag + '.txt' | 
					
						
							|  |  |  |                 if not os.path.isfile(tags_filename): | 
					
						
							|  |  |  |                     continue | 
					
						
							|  |  |  |                 mod_time_since_epoc = \ | 
					
						
							|  |  |  |                     os.path.getmtime(tags_filename) | 
					
						
							|  |  |  |                 last_modified_date = \ | 
					
						
							| 
									
										
										
										
											2023-11-20 22:27:58 +00:00
										 |  |  |                     datetime.datetime.fromtimestamp(mod_time_since_epoc, | 
					
						
							|  |  |  |                                                     datetime.timezone.utc) | 
					
						
							| 
									
										
										
										
											2023-09-02 11:47:24 +00:00
										 |  |  |                 file_days_since_epoch = \ | 
					
						
							| 
									
										
										
										
											2023-11-20 22:27:58 +00:00
										 |  |  |                     (last_modified_date - date_epoch()).days | 
					
						
							| 
									
										
										
										
											2023-09-02 11:47:24 +00:00
										 |  |  |                 if file_days_since_epoch < recently: | 
					
						
							|  |  |  |                     continue | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-09-02 11:47:24 +00:00
										 |  |  |             if not hashtag_categories.get(category_str): | 
					
						
							|  |  |  |                 hashtag_categories[category_str] = [hashtag] | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 if hashtag not in hashtag_categories[category_str]: | 
					
						
							|  |  |  |                     hashtag_categories[category_str].append(hashtag) | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  |         break | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |     return hashtag_categories | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  | def update_hashtag_categories(base_dir: str) -> None: | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  |     """Regenerates the list of hashtag categories
 | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2024-05-12 12:35:26 +00:00
										 |  |  |     category_list_filename = data_dir(base_dir) + '/categoryList.txt' | 
					
						
							| 
									
										
										
										
											2024-02-19 14:38:29 +00:00
										 |  |  |     hashtag_categories = get_hashtag_categories(base_dir, False, None) | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |     if not hashtag_categories: | 
					
						
							|  |  |  |         if os.path.isfile(category_list_filename): | 
					
						
							| 
									
										
										
										
											2021-09-05 10:17:43 +00:00
										 |  |  |             try: | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |                 os.remove(category_list_filename) | 
					
						
							| 
									
										
										
										
											2021-11-25 18:42:38 +00:00
										 |  |  |             except OSError: | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |                 print('EX: update_hashtag_categories ' + | 
					
						
							| 
									
										
										
										
											2021-10-29 16:31:20 +00:00
										 |  |  |                       'unable to delete cached category list ' + | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |                       category_list_filename) | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  |         return | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-12-23 15:39:55 +00:00
										 |  |  |     category_list: list[str] = [] | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |     for category_str, _ in hashtag_categories.items(): | 
					
						
							|  |  |  |         category_list.append(category_str) | 
					
						
							|  |  |  |     category_list.sort() | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |     category_list_str = '' | 
					
						
							|  |  |  |     for category_str in category_list: | 
					
						
							|  |  |  |         category_list_str += category_str + '\n' | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # save a list of available categories for quick lookup | 
					
						
							| 
									
										
										
										
											2021-11-25 18:42:38 +00:00
										 |  |  |     try: | 
					
						
							| 
									
										
										
										
											2022-06-09 14:46:30 +00:00
										 |  |  |         with open(category_list_filename, 'w+', | 
					
						
							|  |  |  |                   encoding='utf-8') as fp_category: | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |             fp_category.write(category_list_str) | 
					
						
							| 
									
										
										
										
											2021-11-25 18:42:38 +00:00
										 |  |  |     except OSError: | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |         print('EX: unable to write category ' + category_list_filename) | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  | def _valid_hashtag_category(category: str) -> bool: | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  |     """Returns true if the category name is valid
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     if not category: | 
					
						
							|  |  |  |         return False | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |     for char in INVALID_HASHTAG_CHARS: | 
					
						
							|  |  |  |         if char in category: | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  |             return False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # too long | 
					
						
							|  |  |  |     if len(category) > 40: | 
					
						
							|  |  |  |         return False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return True | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  | def set_hashtag_category(base_dir: str, hashtag: str, category: str, | 
					
						
							| 
									
										
										
										
											2024-05-01 12:03:34 +00:00
										 |  |  |                          update: bool, force: bool) -> bool: | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  |     """Sets the category for the hashtag
 | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |     if not _valid_hashtag_category(category): | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  |         return False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if not force: | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |         hashtag_filename = base_dir + '/tags/' + hashtag + '.txt' | 
					
						
							|  |  |  |         if not os.path.isfile(hashtag_filename): | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  |             hashtag = hashtag.title() | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |             hashtag_filename = base_dir + '/tags/' + hashtag + '.txt' | 
					
						
							|  |  |  |             if not os.path.isfile(hashtag_filename): | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  |                 hashtag = hashtag.upper() | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |                 hashtag_filename = base_dir + '/tags/' + hashtag + '.txt' | 
					
						
							|  |  |  |                 if not os.path.isfile(hashtag_filename): | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  |                     return False | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-25 16:17:53 +00:00
										 |  |  |     if not os.path.isdir(base_dir + '/tags'): | 
					
						
							|  |  |  |         os.mkdir(base_dir + '/tags') | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |     category_filename = base_dir + '/tags/' + hashtag + '.category' | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  |     if force: | 
					
						
							|  |  |  |         # don't overwrite any existing categories | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |         if os.path.isfile(category_filename): | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  |             return False | 
					
						
							| 
									
										
										
										
											2021-11-26 12:28:20 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |     category_written = False | 
					
						
							| 
									
										
										
										
											2021-11-25 18:42:38 +00:00
										 |  |  |     try: | 
					
						
							| 
									
										
										
										
											2022-06-09 14:46:30 +00:00
										 |  |  |         with open(category_filename, 'w+', encoding='utf-8') as fp_category: | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |             fp_category.write(category) | 
					
						
							|  |  |  |             category_written = True | 
					
						
							| 
									
										
										
										
											2021-12-25 15:28:52 +00:00
										 |  |  |     except OSError as ex: | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |         print('EX: unable to write category ' + category_filename + | 
					
						
							| 
									
										
										
										
											2021-12-25 15:28:52 +00:00
										 |  |  |               ' ' + str(ex)) | 
					
						
							| 
									
										
										
										
											2024-02-01 10:50:00 +00:00
										 |  |  |     except UnicodeEncodeError as ex: | 
					
						
							|  |  |  |         print('EX: unable to write category unicode ' + category_filename + | 
					
						
							|  |  |  |               ' ' + str(ex)) | 
					
						
							| 
									
										
										
										
											2021-11-26 12:28:20 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |     if category_written: | 
					
						
							| 
									
										
										
										
											2021-11-26 12:28:20 +00:00
										 |  |  |         if update: | 
					
						
							| 
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 |  |  |             update_hashtag_categories(base_dir) | 
					
						
							| 
									
										
										
										
											2021-11-26 12:28:20 +00:00
										 |  |  |         return True | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  |     return False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-10 13:51:19 +00:00
										 |  |  | def guess_hashtag_category(tag_name: str, hashtag_categories: {}, | 
					
						
							|  |  |  |                            min_tag_length: int) -> str: | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  |     """Tries to guess a category for the given hashtag.
 | 
					
						
							|  |  |  |     This works by trying to find the longest similar hashtag | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2024-01-10 13:51:19 +00:00
										 |  |  |     if len(tag_name) < min_tag_length: | 
					
						
							| 
									
										
										
										
											2021-07-13 08:43:07 +00:00
										 |  |  |         return '' | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |     category_matched = '' | 
					
						
							|  |  |  |     tag_matched_len = 0 | 
					
						
							| 
									
										
										
										
											2024-01-10 14:04:53 +00:00
										 |  |  |     finished = False | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |     for category_str, hashtag_list in hashtag_categories.items(): | 
					
						
							| 
									
										
										
										
											2024-01-10 14:04:53 +00:00
										 |  |  |         if finished: | 
					
						
							|  |  |  |             break | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |         for hashtag in hashtag_list: | 
					
						
							| 
									
										
										
										
											2024-01-10 14:04:53 +00:00
										 |  |  |             if hashtag == tag_name: | 
					
						
							|  |  |  |                 # exact match | 
					
						
							|  |  |  |                 category_matched = category_str | 
					
						
							|  |  |  |                 finished = True | 
					
						
							|  |  |  |                 break | 
					
						
							| 
									
										
										
										
											2024-01-10 13:51:19 +00:00
										 |  |  |             if len(hashtag) < min_tag_length: | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  |                 # avoid matching very small strings which often | 
					
						
							|  |  |  |                 # lead to spurious categories | 
					
						
							|  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2024-01-10 13:41:59 +00:00
										 |  |  |             if hashtag not in tag_name: | 
					
						
							|  |  |  |                 if tag_name not in hashtag: | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  |                     continue | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |             if not category_matched: | 
					
						
							|  |  |  |                 tag_matched_len = len(hashtag) | 
					
						
							|  |  |  |                 category_matched = category_str | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  |             else: | 
					
						
							|  |  |  |                 # match the longest tag | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |                 if len(hashtag) > tag_matched_len: | 
					
						
							|  |  |  |                     category_matched = category_str | 
					
						
							|  |  |  |     if not category_matched: | 
					
						
							| 
									
										
										
										
											2021-07-13 08:43:07 +00:00
										 |  |  |         return '' | 
					
						
							| 
									
										
										
										
											2021-12-30 18:38:36 +00:00
										 |  |  |     return category_matched |