| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  | __filename__ = "categories.py" | 
					
						
							|  |  |  | __author__ = "Bob Mottram" | 
					
						
							|  |  |  | __license__ = "AGPL3+" | 
					
						
							| 
									
										
										
										
											2021-01-26 10:07:42 +00:00
										 |  |  | __version__ = "1.2.0" | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  | __maintainer__ = "Bob Mottram" | 
					
						
							|  |  |  | __email__ = "bob@freedombone.net" | 
					
						
							|  |  |  | __status__ = "Production" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import os | 
					
						
							|  |  |  | import datetime | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def getHashtagCategory(baseDir: str, hashtag: str) -> str: | 
					
						
							|  |  |  |     """Returns the category for the hashtag
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     categoryFilename = baseDir + '/tags/' + hashtag + '.category' | 
					
						
							|  |  |  |     if not os.path.isfile(categoryFilename): | 
					
						
							|  |  |  |         categoryFilename = baseDir + '/tags/' + hashtag.title() + '.category' | 
					
						
							|  |  |  |         if not os.path.isfile(categoryFilename): | 
					
						
							|  |  |  |             categoryFilename = \ | 
					
						
							|  |  |  |                 baseDir + '/tags/' + hashtag.upper() + '.category' | 
					
						
							|  |  |  |             if not os.path.isfile(categoryFilename): | 
					
						
							|  |  |  |                 return '' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     with open(categoryFilename, 'r') as fp: | 
					
						
							|  |  |  |         categoryStr = fp.read() | 
					
						
							|  |  |  |         if categoryStr: | 
					
						
							|  |  |  |             return categoryStr | 
					
						
							|  |  |  |     return '' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def getHashtagCategories(baseDir: str, recent=False, category=None) -> None: | 
					
						
							|  |  |  |     """Returns a dictionary containing hashtag categories
 | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2021-01-24 10:45:35 +00:00
										 |  |  |     maxTagLength = 42 | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  |     hashtagCategories = {} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if recent: | 
					
						
							|  |  |  |         currTime = datetime.datetime.utcnow() | 
					
						
							|  |  |  |         daysSinceEpoch = (currTime - datetime.datetime(1970, 1, 1)).days | 
					
						
							|  |  |  |         recently = daysSinceEpoch - 1 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for subdir, dirs, files in os.walk(baseDir + '/tags'): | 
					
						
							|  |  |  |         for f in files: | 
					
						
							|  |  |  |             if not f.endswith('.category'): | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             categoryFilename = os.path.join(baseDir + '/tags', f) | 
					
						
							|  |  |  |             if not os.path.isfile(categoryFilename): | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             hashtag = f.split('.')[0] | 
					
						
							| 
									
										
										
										
											2021-01-24 10:45:35 +00:00
										 |  |  |             if len(hashtag) > maxTagLength: | 
					
						
							|  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  |             with open(categoryFilename, 'r') as fp: | 
					
						
							|  |  |  |                 categoryStr = fp.read() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 if not categoryStr: | 
					
						
							|  |  |  |                     continue | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 if category: | 
					
						
							|  |  |  |                     # only return a dictionary for a specific category | 
					
						
							|  |  |  |                     if categoryStr != category: | 
					
						
							|  |  |  |                         continue | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 if recent: | 
					
						
							|  |  |  |                     tagsFilename = baseDir + '/tags/' + hashtag + '.txt' | 
					
						
							|  |  |  |                     if not os.path.isfile(tagsFilename): | 
					
						
							|  |  |  |                         continue | 
					
						
							|  |  |  |                     modTimesinceEpoc = \ | 
					
						
							|  |  |  |                         os.path.getmtime(tagsFilename) | 
					
						
							|  |  |  |                     lastModifiedDate = \ | 
					
						
							|  |  |  |                         datetime.datetime.fromtimestamp(modTimesinceEpoc) | 
					
						
							|  |  |  |                     fileDaysSinceEpoch = \ | 
					
						
							|  |  |  |                         (lastModifiedDate - | 
					
						
							|  |  |  |                          datetime.datetime(1970, 1, 1)).days | 
					
						
							|  |  |  |                     if fileDaysSinceEpoch < recently: | 
					
						
							|  |  |  |                         continue | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 if not hashtagCategories.get(categoryStr): | 
					
						
							|  |  |  |                     hashtagCategories[categoryStr] = [hashtag] | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     if hashtag not in hashtagCategories[categoryStr]: | 
					
						
							|  |  |  |                         hashtagCategories[categoryStr].append(hashtag) | 
					
						
							|  |  |  |         break | 
					
						
							|  |  |  |     return hashtagCategories | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def _updateHashtagCategories(baseDir: str) -> None: | 
					
						
							|  |  |  |     """Regenerates the list of hashtag categories
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     categoryListFilename = baseDir + '/accounts/categoryList.txt' | 
					
						
							|  |  |  |     hashtagCategories = getHashtagCategories(baseDir) | 
					
						
							|  |  |  |     if not hashtagCategories: | 
					
						
							|  |  |  |         if os.path.isfile(categoryListFilename): | 
					
						
							|  |  |  |             os.remove(categoryListFilename) | 
					
						
							|  |  |  |         return | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     categoryList = [] | 
					
						
							|  |  |  |     for categoryStr, hashtagList in hashtagCategories.items(): | 
					
						
							|  |  |  |         categoryList.append(categoryStr) | 
					
						
							|  |  |  |     categoryList.sort() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     categoryListStr = '' | 
					
						
							|  |  |  |     for categoryStr in categoryList: | 
					
						
							|  |  |  |         categoryListStr += categoryStr + '\n' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # save a list of available categories for quick lookup | 
					
						
							|  |  |  |     with open(categoryListFilename, 'w+') as fp: | 
					
						
							|  |  |  |         fp.write(categoryListStr) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def _validHashtagCategory(category: str) -> bool: | 
					
						
							|  |  |  |     """Returns true if the category name is valid
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     if not category: | 
					
						
							|  |  |  |         return False | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-02-09 13:16:53 +00:00
										 |  |  |     invalidChars = (',', ' ', '<', ';', '\\', '"', '&', '#') | 
					
						
							| 
									
										
										
										
											2020-12-22 10:30:52 +00:00
										 |  |  |     for ch in invalidChars: | 
					
						
							|  |  |  |         if ch in category: | 
					
						
							|  |  |  |             return False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # too long | 
					
						
							|  |  |  |     if len(category) > 40: | 
					
						
							|  |  |  |         return False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return True | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def setHashtagCategory(baseDir: str, hashtag: str, category: str, | 
					
						
							|  |  |  |                        force=False) -> bool: | 
					
						
							|  |  |  |     """Sets the category for the hashtag
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     if not _validHashtagCategory(category): | 
					
						
							|  |  |  |         return False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if not force: | 
					
						
							|  |  |  |         hashtagFilename = baseDir + '/tags/' + hashtag + '.txt' | 
					
						
							|  |  |  |         if not os.path.isfile(hashtagFilename): | 
					
						
							|  |  |  |             hashtag = hashtag.title() | 
					
						
							|  |  |  |             hashtagFilename = baseDir + '/tags/' + hashtag + '.txt' | 
					
						
							|  |  |  |             if not os.path.isfile(hashtagFilename): | 
					
						
							|  |  |  |                 hashtag = hashtag.upper() | 
					
						
							|  |  |  |                 hashtagFilename = baseDir + '/tags/' + hashtag + '.txt' | 
					
						
							|  |  |  |                 if not os.path.isfile(hashtagFilename): | 
					
						
							|  |  |  |                     return False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if not os.path.isdir(baseDir + '/tags'): | 
					
						
							|  |  |  |         os.mkdir(baseDir + '/tags') | 
					
						
							|  |  |  |     categoryFilename = baseDir + '/tags/' + hashtag + '.category' | 
					
						
							|  |  |  |     if force: | 
					
						
							|  |  |  |         # don't overwrite any existing categories | 
					
						
							|  |  |  |         if os.path.isfile(categoryFilename): | 
					
						
							|  |  |  |             return False | 
					
						
							|  |  |  |     with open(categoryFilename, 'w+') as fp: | 
					
						
							|  |  |  |         fp.write(category) | 
					
						
							|  |  |  |         _updateHashtagCategories(baseDir) | 
					
						
							|  |  |  |         return True | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def guessHashtagCategory(tagName: str, hashtagCategories: {}) -> str: | 
					
						
							|  |  |  |     """Tries to guess a category for the given hashtag.
 | 
					
						
							|  |  |  |     This works by trying to find the longest similar hashtag | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     categoryMatched = '' | 
					
						
							|  |  |  |     tagMatchedLen = 0 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for categoryStr, hashtagList in hashtagCategories.items(): | 
					
						
							|  |  |  |         for hashtag in hashtagList: | 
					
						
							|  |  |  |             if len(hashtag) < 3: | 
					
						
							|  |  |  |                 # avoid matching very small strings which often | 
					
						
							|  |  |  |                 # lead to spurious categories | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             if hashtag not in tagName: | 
					
						
							|  |  |  |                 if tagName not in hashtag: | 
					
						
							|  |  |  |                     continue | 
					
						
							|  |  |  |             if not categoryMatched: | 
					
						
							|  |  |  |                 tagMatchedLen = len(hashtag) | 
					
						
							|  |  |  |                 categoryMatched = categoryStr | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 # match the longest tag | 
					
						
							|  |  |  |                 if len(hashtag) > tagMatchedLen: | 
					
						
							|  |  |  |                     categoryMatched = categoryStr | 
					
						
							|  |  |  |     if not categoryMatched: | 
					
						
							|  |  |  |         return | 
					
						
							|  |  |  |     return categoryMatched |