mirror of https://gitlab.com/bashrc2/epicyon
Minmimum hashtag length for category learning
parent
a088d4dde2
commit
dbf5a9ecbd
|
@ -262,11 +262,12 @@ def set_hashtag_category(base_dir: str, hashtag: str, category: str,
|
|||
return False
|
||||
|
||||
|
||||
def guess_hashtag_category(tag_name: str, hashtag_categories: {}) -> str:
|
||||
def guess_hashtag_category(tag_name: str, hashtag_categories: {},
|
||||
min_tag_length: int) -> str:
|
||||
"""Tries to guess a category for the given hashtag.
|
||||
This works by trying to find the longest similar hashtag
|
||||
"""
|
||||
if len(tag_name) < 6:
|
||||
if len(tag_name) < min_tag_length:
|
||||
return ''
|
||||
|
||||
category_matched = ''
|
||||
|
@ -274,7 +275,7 @@ def guess_hashtag_category(tag_name: str, hashtag_categories: {}) -> str:
|
|||
|
||||
for category_str, hashtag_list in hashtag_categories.items():
|
||||
for hashtag in hashtag_list:
|
||||
if len(hashtag) < 6:
|
||||
if len(hashtag) < min_tag_length:
|
||||
# avoid matching very small strings which often
|
||||
# lead to spurious categories
|
||||
continue
|
||||
|
|
2
inbox.py
2
inbox.py
|
@ -440,7 +440,7 @@ def store_hash_tags(base_dir: str, nickname: str, domain: str,
|
|||
if not os.path.isfile(category_filename):
|
||||
hashtag_categories = get_hashtag_categories(base_dir)
|
||||
category_str = \
|
||||
guess_hashtag_category(tag_name, hashtag_categories)
|
||||
guess_hashtag_category(tag_name, hashtag_categories, 6)
|
||||
if category_str:
|
||||
set_hashtag_category(base_dir, tag_name,
|
||||
category_str, False)
|
||||
|
|
4
tests.py
4
tests.py
|
@ -4838,10 +4838,10 @@ def _test_guess_tag_category() -> None:
|
|||
"foo": ["swan", "goose"],
|
||||
"bar": ["cats", "mouse"]
|
||||
}
|
||||
guess = guess_hashtag_category("unspecifiedgoose", hashtag_categories)
|
||||
guess = guess_hashtag_category("unspecifiedgoose", hashtag_categories, 4)
|
||||
assert guess == "foo"
|
||||
|
||||
guess = guess_hashtag_category("mastocats", hashtag_categories)
|
||||
guess = guess_hashtag_category("mastocats", hashtag_categories, 4)
|
||||
assert guess == "bar"
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue