mirror of https://gitlab.com/bashrc2/epicyon
Minmimum hashtag length for category learning
parent
a088d4dde2
commit
dbf5a9ecbd
|
@ -262,11 +262,12 @@ def set_hashtag_category(base_dir: str, hashtag: str, category: str,
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def guess_hashtag_category(tag_name: str, hashtag_categories: {}) -> str:
|
def guess_hashtag_category(tag_name: str, hashtag_categories: {},
|
||||||
|
min_tag_length: int) -> str:
|
||||||
"""Tries to guess a category for the given hashtag.
|
"""Tries to guess a category for the given hashtag.
|
||||||
This works by trying to find the longest similar hashtag
|
This works by trying to find the longest similar hashtag
|
||||||
"""
|
"""
|
||||||
if len(tag_name) < 6:
|
if len(tag_name) < min_tag_length:
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
category_matched = ''
|
category_matched = ''
|
||||||
|
@ -274,7 +275,7 @@ def guess_hashtag_category(tag_name: str, hashtag_categories: {}) -> str:
|
||||||
|
|
||||||
for category_str, hashtag_list in hashtag_categories.items():
|
for category_str, hashtag_list in hashtag_categories.items():
|
||||||
for hashtag in hashtag_list:
|
for hashtag in hashtag_list:
|
||||||
if len(hashtag) < 6:
|
if len(hashtag) < min_tag_length:
|
||||||
# avoid matching very small strings which often
|
# avoid matching very small strings which often
|
||||||
# lead to spurious categories
|
# lead to spurious categories
|
||||||
continue
|
continue
|
||||||
|
|
2
inbox.py
2
inbox.py
|
@ -440,7 +440,7 @@ def store_hash_tags(base_dir: str, nickname: str, domain: str,
|
||||||
if not os.path.isfile(category_filename):
|
if not os.path.isfile(category_filename):
|
||||||
hashtag_categories = get_hashtag_categories(base_dir)
|
hashtag_categories = get_hashtag_categories(base_dir)
|
||||||
category_str = \
|
category_str = \
|
||||||
guess_hashtag_category(tag_name, hashtag_categories)
|
guess_hashtag_category(tag_name, hashtag_categories, 6)
|
||||||
if category_str:
|
if category_str:
|
||||||
set_hashtag_category(base_dir, tag_name,
|
set_hashtag_category(base_dir, tag_name,
|
||||||
category_str, False)
|
category_str, False)
|
||||||
|
|
4
tests.py
4
tests.py
|
@ -4838,10 +4838,10 @@ def _test_guess_tag_category() -> None:
|
||||||
"foo": ["swan", "goose"],
|
"foo": ["swan", "goose"],
|
||||||
"bar": ["cats", "mouse"]
|
"bar": ["cats", "mouse"]
|
||||||
}
|
}
|
||||||
guess = guess_hashtag_category("unspecifiedgoose", hashtag_categories)
|
guess = guess_hashtag_category("unspecifiedgoose", hashtag_categories, 4)
|
||||||
assert guess == "foo"
|
assert guess == "foo"
|
||||||
|
|
||||||
guess = guess_hashtag_category("mastocats", hashtag_categories)
|
guess = guess_hashtag_category("mastocats", hashtag_categories, 4)
|
||||||
assert guess == "bar"
|
assert guess == "bar"
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue