Don't guess very small tags

main
Bob Mottram 2020-12-22 10:13:15 +00:00
parent 69a24ad454
commit 679c06b20e
1 changed files with 13 additions and 7 deletions

View File

@ -81,14 +81,20 @@ def guessHashtagCategory(tagName: str, hashtagCategories: {}) -> str:
for categoryStr, hashtagList in hashtagCategories.items(): for categoryStr, hashtagList in hashtagCategories.items():
for hashtag in hashtagList: for hashtag in hashtagList:
if hashtag in tagName or tagName in hashtag: if len(hashtag) < 3:
if not categoryMatched: # avoid matching very small strings which often
tagMatchedLen = len(hashtag) # lead to spurious categories
continue
if hashtag not in tagName:
if tagName not in hashtag:
continue
if not categoryMatched:
tagMatchedLen = len(hashtag)
categoryMatched = categoryStr
else:
# match the longest tag
if len(hashtag) > tagMatchedLen:
categoryMatched = categoryStr categoryMatched = categoryStr
else:
# match the longest tag
if len(hashtag) > tagMatchedLen:
categoryMatched = categoryStr
if not categoryMatched: if not categoryMatched:
return return
return categoryMatched return categoryMatched