mirror of https://gitlab.com/bashrc2/epicyon
Remove square capitals when filtering
parent
e40ac467fd
commit
e2ba518b96
|
@ -13,6 +13,7 @@ from utils import text_in_file
|
|||
from utils import remove_eol
|
||||
from utils import standardize_text
|
||||
from utils import remove_inverted_text
|
||||
from utils import remove_square_capitals
|
||||
|
||||
|
||||
def add_filter(base_dir: str, nickname: str, domain: str, words: str) -> bool:
|
||||
|
@ -125,6 +126,7 @@ def _is_filtered_base(filename: str, content: str,
|
|||
return False
|
||||
|
||||
content = remove_inverted_text(content, system_language)
|
||||
content = remove_square_capitals(content, system_language)
|
||||
|
||||
# convert any fancy characters to ordinary ones
|
||||
content = standardize_text(content)
|
||||
|
|
10
tests.py
10
tests.py
|
@ -55,6 +55,7 @@ from follow import send_follow_request_via_server
|
|||
from follow import send_unfollow_request_via_server
|
||||
from siteactive import site_is_active
|
||||
from utils import remove_inverted_text
|
||||
from utils import remove_square_capitals
|
||||
from utils import standardize_text
|
||||
from utils import remove_eol
|
||||
from utils import text_in_file
|
||||
|
@ -7562,6 +7563,15 @@ def _test_uninvert():
|
|||
print('result: ' + result)
|
||||
assert result == expected
|
||||
|
||||
text = '🅻🅴🆅🅸🅰🆃🅰🆁 abc'
|
||||
expected = "LEVIATAR abc"
|
||||
result = remove_square_capitals(text, 'en')
|
||||
if result != expected:
|
||||
print('expected: ' + expected)
|
||||
print('result: ' + result)
|
||||
print('text: ' + text)
|
||||
assert result == expected
|
||||
|
||||
text = '<p>Some ordinary text</p><p>ʇsǝʇ ɐ sı sıɥʇ</p>'
|
||||
expected = "<p>Some ordinary text</p><p>this is a test</p>"
|
||||
result = remove_inverted_text(text, 'en')
|
||||
|
|
18
utils.py
18
utils.py
|
@ -3915,3 +3915,21 @@ def remove_inverted_text(text: str, system_language: str) -> str:
|
|||
new_text += separator
|
||||
|
||||
return new_text
|
||||
|
||||
|
||||
def remove_square_capitals(text: str, system_language: str) -> str:
|
||||
"""Removes any square capital text from the given string
|
||||
"""
|
||||
if system_language != 'en':
|
||||
return text
|
||||
offset = ord('A')
|
||||
start_value = ord('🅰')
|
||||
end_value = start_value + 26
|
||||
result = ''
|
||||
for text_ch in text:
|
||||
text_value = ord(text_ch)
|
||||
if text_value < start_value or text_value > end_value:
|
||||
result += text_ch
|
||||
else:
|
||||
result += chr(offset + text_value - start_value)
|
||||
return result
|
||||
|
|
Loading…
Reference in New Issue