mirror of https://gitlab.com/bashrc2/epicyon
				
				
				
			Remove square capitals when filtering
							parent
							
								
									e40ac467fd
								
							
						
					
					
						commit
						e2ba518b96
					
				| 
						 | 
				
			
			@ -13,6 +13,7 @@ from utils import text_in_file
 | 
			
		|||
from utils import remove_eol
 | 
			
		||||
from utils import standardize_text
 | 
			
		||||
from utils import remove_inverted_text
 | 
			
		||||
from utils import remove_square_capitals
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def add_filter(base_dir: str, nickname: str, domain: str, words: str) -> bool:
 | 
			
		||||
| 
						 | 
				
			
			@ -125,6 +126,7 @@ def _is_filtered_base(filename: str, content: str,
 | 
			
		|||
        return False
 | 
			
		||||
 | 
			
		||||
    content = remove_inverted_text(content, system_language)
 | 
			
		||||
    content = remove_square_capitals(content, system_language)
 | 
			
		||||
 | 
			
		||||
    # convert any fancy characters to ordinary ones
 | 
			
		||||
    content = standardize_text(content)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										10
									
								
								tests.py
								
								
								
								
							
							
						
						
									
										10
									
								
								tests.py
								
								
								
								
							| 
						 | 
				
			
			@ -55,6 +55,7 @@ from follow import send_follow_request_via_server
 | 
			
		|||
from follow import send_unfollow_request_via_server
 | 
			
		||||
from siteactive import site_is_active
 | 
			
		||||
from utils import remove_inverted_text
 | 
			
		||||
from utils import remove_square_capitals
 | 
			
		||||
from utils import standardize_text
 | 
			
		||||
from utils import remove_eol
 | 
			
		||||
from utils import text_in_file
 | 
			
		||||
| 
						 | 
				
			
			@ -7562,6 +7563,15 @@ def _test_uninvert():
 | 
			
		|||
        print('result: ' + result)
 | 
			
		||||
    assert result == expected
 | 
			
		||||
 | 
			
		||||
    text = '🅻🅴🆅🅸🅰🆃🅰🆁 abc'
 | 
			
		||||
    expected = "LEVIATAR abc"
 | 
			
		||||
    result = remove_square_capitals(text, 'en')
 | 
			
		||||
    if result != expected:
 | 
			
		||||
        print('expected: ' + expected)
 | 
			
		||||
        print('result: ' + result)
 | 
			
		||||
        print('text: ' + text)
 | 
			
		||||
    assert result == expected
 | 
			
		||||
 | 
			
		||||
    text = '<p>Some ordinary text</p><p>ʇsǝʇ ɐ sı sıɥʇ</p>'
 | 
			
		||||
    expected = "<p>Some ordinary text</p><p>this is a test</p>"
 | 
			
		||||
    result = remove_inverted_text(text, 'en')
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										18
									
								
								utils.py
								
								
								
								
							
							
						
						
									
										18
									
								
								utils.py
								
								
								
								
							| 
						 | 
				
			
			@ -3915,3 +3915,21 @@ def remove_inverted_text(text: str, system_language: str) -> str:
 | 
			
		|||
                new_text += separator
 | 
			
		||||
 | 
			
		||||
    return new_text
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def remove_square_capitals(text: str, system_language: str) -> str:
 | 
			
		||||
    """Removes any square capital text from the given string
 | 
			
		||||
    """
 | 
			
		||||
    if system_language != 'en':
 | 
			
		||||
        return text
 | 
			
		||||
    offset = ord('A')
 | 
			
		||||
    start_value = ord('🅰')
 | 
			
		||||
    end_value = start_value + 26
 | 
			
		||||
    result = ''
 | 
			
		||||
    for text_ch in text:
 | 
			
		||||
        text_value = ord(text_ch)
 | 
			
		||||
        if text_value < start_value or text_value > end_value:
 | 
			
		||||
            result += text_ch
 | 
			
		||||
        else:
 | 
			
		||||
            result += chr(offset + text_value - start_value)
 | 
			
		||||
    return result
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue