forked from indymedia/epicyon
Guess hashtag categorisations
parent
60abb1a0a6
commit
67e06f65c9
37
inbox.py
37
inbox.py
|
@ -30,6 +30,8 @@ from utils import loadJson
|
|||
from utils import saveJson
|
||||
from utils import updateLikesCollection
|
||||
from utils import undoLikesCollectionEntry
|
||||
from utils import getHashtagCategories
|
||||
from utils import setHashtagCategory
|
||||
from httpsig import verifyPostHeaders
|
||||
from session import createSession
|
||||
from session import getJson
|
||||
|
@ -68,6 +70,31 @@ from happening import saveEventPost
|
|||
from delete import removeOldHashtags
|
||||
|
||||
|
||||
def guessHashtagCategory(tagName: str, hashtagCategories: {}) -> str:
|
||||
"""Tries to guess a category for the given hashtag.
|
||||
This works by trying to find the longest similar hashtag
|
||||
"""
|
||||
categoryMatched = ''
|
||||
tagMatched = ''
|
||||
tagMatchedLen = 0
|
||||
|
||||
for categoryStr, hashtagList in hashtagCategories.items():
|
||||
for hashtag in hashtagList:
|
||||
if hashtag in tagName:
|
||||
if not tagMatched:
|
||||
tagMatched = hashtag
|
||||
tagMatchedLen = len(tagMatched)
|
||||
categoryMatched = categoryStr
|
||||
else:
|
||||
# match the longest tag
|
||||
if len(hashtag) > tagMatchedLen:
|
||||
tagMatched = hashtag
|
||||
categoryMatched = categoryStr
|
||||
if not categoryMatched:
|
||||
return
|
||||
return categoryMatched
|
||||
|
||||
|
||||
def storeHashTags(baseDir: str, nickname: str, postJsonObject: {}) -> None:
|
||||
"""Extracts hashtags from an incoming post and updates the
|
||||
relevant tags files.
|
||||
|
@ -91,6 +118,8 @@ def storeHashTags(baseDir: str, nickname: str, postJsonObject: {}) -> None:
|
|||
print('Creating tags directory')
|
||||
os.mkdir(tagsDir)
|
||||
|
||||
hashtagCategories = getHashtagCategories(baseDir)
|
||||
|
||||
for tag in postJsonObject['object']['tag']:
|
||||
if not tag.get('type'):
|
||||
continue
|
||||
|
@ -122,6 +151,14 @@ def storeHashTags(baseDir: str, nickname: str, postJsonObject: {}) -> None:
|
|||
tagsFilename + ' ' + str(e))
|
||||
removeOldHashtags(baseDir, 3)
|
||||
|
||||
# automatically assign a category to the tag if possible
|
||||
categoryFilename = tagsDir + '/' + tagName + '.category'
|
||||
if not os.path.isfile(categoryFilename):
|
||||
categoryStr = \
|
||||
guessHashtagCategory(tagName, hashtagCategories)
|
||||
if categoryStr:
|
||||
setHashtagCategory(baseDir, tagName, categoryStr)
|
||||
|
||||
|
||||
def inboxStorePostToHtmlCache(recentPostsCache: {}, maxRecentPosts: int,
|
||||
translate: {},
|
||||
|
|
15
tests.py
15
tests.py
|
@ -71,6 +71,7 @@ from delete import sendDeleteViaServer
|
|||
from inbox import jsonPostAllowsComments
|
||||
from inbox import validInbox
|
||||
from inbox import validInboxFilenames
|
||||
from inbox import guessHashtagCategory
|
||||
from content import htmlReplaceEmailQuote
|
||||
from content import htmlReplaceQuoteMarks
|
||||
from content import dangerousMarkup
|
||||
|
@ -2421,8 +2422,22 @@ def testValidNickname():
|
|||
assert not validNickname(domain, nickname)
|
||||
|
||||
|
||||
def testGuessHashtagCategory() -> None:
|
||||
print('testGuessHashtagCategory')
|
||||
hashtagCategories = {
|
||||
"foo": ["swan", "goose"],
|
||||
"bar": ["cat", "mouse"]
|
||||
}
|
||||
guess = guessHashtagCategory("unspecifiedgoose", hashtagCategories)
|
||||
assert guess == "foo"
|
||||
|
||||
guess = guessHashtagCategory("catpic", hashtagCategories)
|
||||
assert guess == "bar"
|
||||
|
||||
|
||||
def runAllTests():
|
||||
print('Running tests...')
|
||||
testGuessHashtagCategory()
|
||||
testValidNickname()
|
||||
testParseFeedDate()
|
||||
testFirstParagraphFromString()
|
||||
|
|
Loading…
Reference in New Issue