forked from indymedia/epicyon
				
			Guess hashtag categorisations
							parent
							
								
									60abb1a0a6
								
							
						
					
					
						commit
						67e06f65c9
					
				
							
								
								
									
										37
									
								
								inbox.py
								
								
								
								
							
							
						
						
									
										37
									
								
								inbox.py
								
								
								
								
							|  | @ -30,6 +30,8 @@ from utils import loadJson | |||
| from utils import saveJson | ||||
| from utils import updateLikesCollection | ||||
| from utils import undoLikesCollectionEntry | ||||
| from utils import getHashtagCategories | ||||
| from utils import setHashtagCategory | ||||
| from httpsig import verifyPostHeaders | ||||
| from session import createSession | ||||
| from session import getJson | ||||
|  | @ -68,6 +70,31 @@ from happening import saveEventPost | |||
| from delete import removeOldHashtags | ||||
| 
 | ||||
| 
 | ||||
| def guessHashtagCategory(tagName: str, hashtagCategories: {}) -> str: | ||||
|     """Tries to guess a category for the given hashtag. | ||||
|     This works by trying to find the longest similar hashtag | ||||
|     """ | ||||
|     categoryMatched = '' | ||||
|     tagMatched = '' | ||||
|     tagMatchedLen = 0 | ||||
| 
 | ||||
|     for categoryStr, hashtagList in hashtagCategories.items(): | ||||
|         for hashtag in hashtagList: | ||||
|             if hashtag in tagName: | ||||
|                 if not tagMatched: | ||||
|                     tagMatched = hashtag | ||||
|                     tagMatchedLen = len(tagMatched) | ||||
|                     categoryMatched = categoryStr | ||||
|                 else: | ||||
|                     # match the longest tag | ||||
|                     if len(hashtag) > tagMatchedLen: | ||||
|                         tagMatched = hashtag | ||||
|                         categoryMatched = categoryStr | ||||
|     if not categoryMatched: | ||||
|         return | ||||
|     return categoryMatched | ||||
| 
 | ||||
| 
 | ||||
| def storeHashTags(baseDir: str, nickname: str, postJsonObject: {}) -> None: | ||||
|     """Extracts hashtags from an incoming post and updates the | ||||
|     relevant tags files. | ||||
|  | @ -91,6 +118,8 @@ def storeHashTags(baseDir: str, nickname: str, postJsonObject: {}) -> None: | |||
|         print('Creating tags directory') | ||||
|         os.mkdir(tagsDir) | ||||
| 
 | ||||
|     hashtagCategories = getHashtagCategories(baseDir) | ||||
| 
 | ||||
|     for tag in postJsonObject['object']['tag']: | ||||
|         if not tag.get('type'): | ||||
|             continue | ||||
|  | @ -122,6 +151,14 @@ def storeHashTags(baseDir: str, nickname: str, postJsonObject: {}) -> None: | |||
|                           tagsFilename + ' ' + str(e)) | ||||
|                 removeOldHashtags(baseDir, 3) | ||||
| 
 | ||||
|         # automatically assign a category to the tag if possible | ||||
|         categoryFilename = tagsDir + '/' + tagName + '.category' | ||||
|         if not os.path.isfile(categoryFilename): | ||||
|             categoryStr = \ | ||||
|                 guessHashtagCategory(tagName, hashtagCategories) | ||||
|             if categoryStr: | ||||
|                 setHashtagCategory(baseDir, tagName, categoryStr) | ||||
| 
 | ||||
| 
 | ||||
| def inboxStorePostToHtmlCache(recentPostsCache: {}, maxRecentPosts: int, | ||||
|                               translate: {}, | ||||
|  |  | |||
							
								
								
									
										15
									
								
								tests.py
								
								
								
								
							
							
						
						
									
										15
									
								
								tests.py
								
								
								
								
							|  | @ -71,6 +71,7 @@ from delete import sendDeleteViaServer | |||
| from inbox import jsonPostAllowsComments | ||||
| from inbox import validInbox | ||||
| from inbox import validInboxFilenames | ||||
| from inbox import guessHashtagCategory | ||||
| from content import htmlReplaceEmailQuote | ||||
| from content import htmlReplaceQuoteMarks | ||||
| from content import dangerousMarkup | ||||
|  | @ -2421,8 +2422,22 @@ def testValidNickname(): | |||
|     assert not validNickname(domain, nickname) | ||||
| 
 | ||||
| 
 | ||||
| def testGuessHashtagCategory() -> None: | ||||
|     print('testGuessHashtagCategory') | ||||
|     hashtagCategories = { | ||||
|         "foo": ["swan", "goose"], | ||||
|         "bar": ["cat", "mouse"] | ||||
|     } | ||||
|     guess = guessHashtagCategory("unspecifiedgoose", hashtagCategories) | ||||
|     assert guess == "foo" | ||||
| 
 | ||||
|     guess = guessHashtagCategory("catpic", hashtagCategories) | ||||
|     assert guess == "bar" | ||||
| 
 | ||||
| 
 | ||||
| def runAllTests(): | ||||
|     print('Running tests...') | ||||
|     testGuessHashtagCategory() | ||||
|     testValidNickname() | ||||
|     testParseFeedDate() | ||||
|     testFirstParagraphFromString() | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue