Minmimum hashtag length for category learning

main
Bob Mottram 2024-01-10 13:51:19 +00:00
parent a088d4dde2
commit dbf5a9ecbd
3 changed files with 7 additions and 6 deletions

View File

@ -262,11 +262,12 @@ def set_hashtag_category(base_dir: str, hashtag: str, category: str,
return False
def guess_hashtag_category(tag_name: str, hashtag_categories: {}) -> str:
def guess_hashtag_category(tag_name: str, hashtag_categories: {},
min_tag_length: int) -> str:
"""Tries to guess a category for the given hashtag.
This works by trying to find the longest similar hashtag
"""
if len(tag_name) < 6:
if len(tag_name) < min_tag_length:
return ''
category_matched = ''
@ -274,7 +275,7 @@ def guess_hashtag_category(tag_name: str, hashtag_categories: {}) -> str:
for category_str, hashtag_list in hashtag_categories.items():
for hashtag in hashtag_list:
if len(hashtag) < 6:
if len(hashtag) < min_tag_length:
# avoid matching very small strings which often
# lead to spurious categories
continue

View File

@ -440,7 +440,7 @@ def store_hash_tags(base_dir: str, nickname: str, domain: str,
if not os.path.isfile(category_filename):
hashtag_categories = get_hashtag_categories(base_dir)
category_str = \
guess_hashtag_category(tag_name, hashtag_categories)
guess_hashtag_category(tag_name, hashtag_categories, 6)
if category_str:
set_hashtag_category(base_dir, tag_name,
category_str, False)

View File

@ -4838,10 +4838,10 @@ def _test_guess_tag_category() -> None:
"foo": ["swan", "goose"],
"bar": ["cats", "mouse"]
}
guess = guess_hashtag_category("unspecifiedgoose", hashtag_categories)
guess = guess_hashtag_category("unspecifiedgoose", hashtag_categories, 4)
assert guess == "foo"
guess = guess_hashtag_category("mastocats", hashtag_categories)
guess = guess_hashtag_category("mastocats", hashtag_categories, 4)
assert guess == "bar"