forked from indymedia/epicyon
Guess hashtag categorisations
parent
60abb1a0a6
commit
67e06f65c9
37
inbox.py
37
inbox.py
|
@ -30,6 +30,8 @@ from utils import loadJson
|
||||||
from utils import saveJson
|
from utils import saveJson
|
||||||
from utils import updateLikesCollection
|
from utils import updateLikesCollection
|
||||||
from utils import undoLikesCollectionEntry
|
from utils import undoLikesCollectionEntry
|
||||||
|
from utils import getHashtagCategories
|
||||||
|
from utils import setHashtagCategory
|
||||||
from httpsig import verifyPostHeaders
|
from httpsig import verifyPostHeaders
|
||||||
from session import createSession
|
from session import createSession
|
||||||
from session import getJson
|
from session import getJson
|
||||||
|
@ -68,6 +70,31 @@ from happening import saveEventPost
|
||||||
from delete import removeOldHashtags
|
from delete import removeOldHashtags
|
||||||
|
|
||||||
|
|
||||||
|
def guessHashtagCategory(tagName: str, hashtagCategories: {}) -> str:
|
||||||
|
"""Tries to guess a category for the given hashtag.
|
||||||
|
This works by trying to find the longest similar hashtag
|
||||||
|
"""
|
||||||
|
categoryMatched = ''
|
||||||
|
tagMatched = ''
|
||||||
|
tagMatchedLen = 0
|
||||||
|
|
||||||
|
for categoryStr, hashtagList in hashtagCategories.items():
|
||||||
|
for hashtag in hashtagList:
|
||||||
|
if hashtag in tagName:
|
||||||
|
if not tagMatched:
|
||||||
|
tagMatched = hashtag
|
||||||
|
tagMatchedLen = len(tagMatched)
|
||||||
|
categoryMatched = categoryStr
|
||||||
|
else:
|
||||||
|
# match the longest tag
|
||||||
|
if len(hashtag) > tagMatchedLen:
|
||||||
|
tagMatched = hashtag
|
||||||
|
categoryMatched = categoryStr
|
||||||
|
if not categoryMatched:
|
||||||
|
return
|
||||||
|
return categoryMatched
|
||||||
|
|
||||||
|
|
||||||
def storeHashTags(baseDir: str, nickname: str, postJsonObject: {}) -> None:
|
def storeHashTags(baseDir: str, nickname: str, postJsonObject: {}) -> None:
|
||||||
"""Extracts hashtags from an incoming post and updates the
|
"""Extracts hashtags from an incoming post and updates the
|
||||||
relevant tags files.
|
relevant tags files.
|
||||||
|
@ -91,6 +118,8 @@ def storeHashTags(baseDir: str, nickname: str, postJsonObject: {}) -> None:
|
||||||
print('Creating tags directory')
|
print('Creating tags directory')
|
||||||
os.mkdir(tagsDir)
|
os.mkdir(tagsDir)
|
||||||
|
|
||||||
|
hashtagCategories = getHashtagCategories(baseDir)
|
||||||
|
|
||||||
for tag in postJsonObject['object']['tag']:
|
for tag in postJsonObject['object']['tag']:
|
||||||
if not tag.get('type'):
|
if not tag.get('type'):
|
||||||
continue
|
continue
|
||||||
|
@ -122,6 +151,14 @@ def storeHashTags(baseDir: str, nickname: str, postJsonObject: {}) -> None:
|
||||||
tagsFilename + ' ' + str(e))
|
tagsFilename + ' ' + str(e))
|
||||||
removeOldHashtags(baseDir, 3)
|
removeOldHashtags(baseDir, 3)
|
||||||
|
|
||||||
|
# automatically assign a category to the tag if possible
|
||||||
|
categoryFilename = tagsDir + '/' + tagName + '.category'
|
||||||
|
if not os.path.isfile(categoryFilename):
|
||||||
|
categoryStr = \
|
||||||
|
guessHashtagCategory(tagName, hashtagCategories)
|
||||||
|
if categoryStr:
|
||||||
|
setHashtagCategory(baseDir, tagName, categoryStr)
|
||||||
|
|
||||||
|
|
||||||
def inboxStorePostToHtmlCache(recentPostsCache: {}, maxRecentPosts: int,
|
def inboxStorePostToHtmlCache(recentPostsCache: {}, maxRecentPosts: int,
|
||||||
translate: {},
|
translate: {},
|
||||||
|
|
15
tests.py
15
tests.py
|
@ -71,6 +71,7 @@ from delete import sendDeleteViaServer
|
||||||
from inbox import jsonPostAllowsComments
|
from inbox import jsonPostAllowsComments
|
||||||
from inbox import validInbox
|
from inbox import validInbox
|
||||||
from inbox import validInboxFilenames
|
from inbox import validInboxFilenames
|
||||||
|
from inbox import guessHashtagCategory
|
||||||
from content import htmlReplaceEmailQuote
|
from content import htmlReplaceEmailQuote
|
||||||
from content import htmlReplaceQuoteMarks
|
from content import htmlReplaceQuoteMarks
|
||||||
from content import dangerousMarkup
|
from content import dangerousMarkup
|
||||||
|
@ -2421,8 +2422,22 @@ def testValidNickname():
|
||||||
assert not validNickname(domain, nickname)
|
assert not validNickname(domain, nickname)
|
||||||
|
|
||||||
|
|
||||||
|
def testGuessHashtagCategory() -> None:
|
||||||
|
print('testGuessHashtagCategory')
|
||||||
|
hashtagCategories = {
|
||||||
|
"foo": ["swan", "goose"],
|
||||||
|
"bar": ["cat", "mouse"]
|
||||||
|
}
|
||||||
|
guess = guessHashtagCategory("unspecifiedgoose", hashtagCategories)
|
||||||
|
assert guess == "foo"
|
||||||
|
|
||||||
|
guess = guessHashtagCategory("catpic", hashtagCategories)
|
||||||
|
assert guess == "bar"
|
||||||
|
|
||||||
|
|
||||||
def runAllTests():
|
def runAllTests():
|
||||||
print('Running tests...')
|
print('Running tests...')
|
||||||
|
testGuessHashtagCategory()
|
||||||
testValidNickname()
|
testValidNickname()
|
||||||
testParseFeedDate()
|
testParseFeedDate()
|
||||||
testFirstParagraphFromString()
|
testFirstParagraphFromString()
|
||||||
|
|
Loading…
Reference in New Issue