mirror of https://gitlab.com/bashrc2/epicyon
				
				
				
			Minmimum hashtag length for category learning
							parent
							
								
									a088d4dde2
								
							
						
					
					
						commit
						dbf5a9ecbd
					
				| 
						 | 
				
			
			@ -262,11 +262,12 @@ def set_hashtag_category(base_dir: str, hashtag: str, category: str,
 | 
			
		|||
    return False
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def guess_hashtag_category(tag_name: str, hashtag_categories: {}) -> str:
 | 
			
		||||
def guess_hashtag_category(tag_name: str, hashtag_categories: {},
 | 
			
		||||
                           min_tag_length: int) -> str:
 | 
			
		||||
    """Tries to guess a category for the given hashtag.
 | 
			
		||||
    This works by trying to find the longest similar hashtag
 | 
			
		||||
    """
 | 
			
		||||
    if len(tag_name) < 6:
 | 
			
		||||
    if len(tag_name) < min_tag_length:
 | 
			
		||||
        return ''
 | 
			
		||||
 | 
			
		||||
    category_matched = ''
 | 
			
		||||
| 
						 | 
				
			
			@ -274,7 +275,7 @@ def guess_hashtag_category(tag_name: str, hashtag_categories: {}) -> str:
 | 
			
		|||
 | 
			
		||||
    for category_str, hashtag_list in hashtag_categories.items():
 | 
			
		||||
        for hashtag in hashtag_list:
 | 
			
		||||
            if len(hashtag) < 6:
 | 
			
		||||
            if len(hashtag) < min_tag_length:
 | 
			
		||||
                # avoid matching very small strings which often
 | 
			
		||||
                # lead to spurious categories
 | 
			
		||||
                continue
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										2
									
								
								inbox.py
								
								
								
								
							
							
						
						
									
										2
									
								
								inbox.py
								
								
								
								
							| 
						 | 
				
			
			@ -440,7 +440,7 @@ def store_hash_tags(base_dir: str, nickname: str, domain: str,
 | 
			
		|||
            if not os.path.isfile(category_filename):
 | 
			
		||||
                hashtag_categories = get_hashtag_categories(base_dir)
 | 
			
		||||
                category_str = \
 | 
			
		||||
                    guess_hashtag_category(tag_name, hashtag_categories)
 | 
			
		||||
                    guess_hashtag_category(tag_name, hashtag_categories, 6)
 | 
			
		||||
                if category_str:
 | 
			
		||||
                    set_hashtag_category(base_dir, tag_name,
 | 
			
		||||
                                         category_str, False)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										4
									
								
								tests.py
								
								
								
								
							
							
						
						
									
										4
									
								
								tests.py
								
								
								
								
							| 
						 | 
				
			
			@ -4838,10 +4838,10 @@ def _test_guess_tag_category() -> None:
 | 
			
		|||
        "foo": ["swan", "goose"],
 | 
			
		||||
        "bar": ["cats", "mouse"]
 | 
			
		||||
    }
 | 
			
		||||
    guess = guess_hashtag_category("unspecifiedgoose", hashtag_categories)
 | 
			
		||||
    guess = guess_hashtag_category("unspecifiedgoose", hashtag_categories, 4)
 | 
			
		||||
    assert guess == "foo"
 | 
			
		||||
 | 
			
		||||
    guess = guess_hashtag_category("mastocats", hashtag_categories)
 | 
			
		||||
    guess = guess_hashtag_category("mastocats", hashtag_categories, 4)
 | 
			
		||||
    assert guess == "bar"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue