mirror of https://gitlab.com/bashrc2/epicyon
229 lines
6.0 KiB
Python
229 lines
6.0 KiB
Python
__filename__ = "unicodetext.py"
|
||
__author__ = "Bob Mottram"
|
||
__license__ = "AGPL3+"
|
||
__version__ = "1.6.0"
|
||
__maintainer__ = "Bob Mottram"
|
||
__email__ = "bob@libreserver.org"
|
||
__status__ = "Production"
|
||
__module_group__ = "Core"
|
||
|
||
# functions which deal with fancy unicode text characters.
|
||
# Such text is "clever", but fucks up screen readers and accessibility
|
||
# in general
|
||
|
||
|
||
def uninvert_text(text: str) -> str:
|
||
"""uninverts inverted text
|
||
"""
|
||
if len(text) < 4:
|
||
return text
|
||
|
||
flip_table = {
|
||
'\u0021': '\u00A1',
|
||
'\u0022': '\u201E',
|
||
'\u0026': '\u214B',
|
||
'\u002E': '\u02D9',
|
||
'\u0033': '\u0190',
|
||
'\u0034': '\u152D',
|
||
'\u0037': '\u2C62',
|
||
'\u003B': '\u061B',
|
||
'\u003F': '\u00BF',
|
||
'\u0041': '\u2200',
|
||
'\u0042': '\u10412',
|
||
'\u0043': '\u2183',
|
||
'\u0044': '\u25D6',
|
||
'\u0045': '\u018E',
|
||
'\u0046': '\u2132',
|
||
'\u0047': '\u2141',
|
||
'\u004A': '\u017F',
|
||
'\u004B': '\u22CA',
|
||
'\u004C': '\u2142',
|
||
'\u004D': '\u0057',
|
||
'\u004E': '\u1D0E',
|
||
'\u0050': '\u0500',
|
||
'\u0051': '\u038C',
|
||
'\u0052': '\u1D1A',
|
||
'\u0054': '\u22A5',
|
||
'\u0055': '\u2229',
|
||
'\u0056': '\u1D27',
|
||
'\u0059': '\u2144',
|
||
'\u005F': '\u203E',
|
||
'\u0061': '\u0250',
|
||
'\u0062': '\u0071',
|
||
'\u0063': '\u0254',
|
||
'\u0064': '\u0070',
|
||
'\u0065': '\u01DD',
|
||
'\u0066': '\u025F',
|
||
'\u0067': '\u0183',
|
||
'\u0068': '\u0265',
|
||
'\u0069': '\u0131',
|
||
'\u006A': '\u027E',
|
||
'\u006B': '\u029E',
|
||
'\u006C': '\u0283',
|
||
'\u006D': '\u026F',
|
||
'\u006E': '\u0075',
|
||
'\u0072': '\u0279',
|
||
'\u0074': '\u0287',
|
||
'\u0076': '\u028C',
|
||
'\u0077': '\u028D',
|
||
'\u0079': '\u028E',
|
||
'\u203F': '\u2040',
|
||
'\u2234': '\u2235'
|
||
}
|
||
|
||
matches = 0
|
||
possible_result = ''
|
||
for ch_test in text:
|
||
ch_result = ch_test
|
||
for ch1, ch_inv in flip_table.items():
|
||
if ch_test == ch_inv:
|
||
matches += 1
|
||
ch_result = ch1
|
||
break
|
||
possible_result = ch_result + possible_result
|
||
|
||
result = text
|
||
if matches > len(text)/2:
|
||
result = possible_result
|
||
new_result = ''
|
||
extra_replace = {
|
||
'[': ']',
|
||
']': '[',
|
||
'(': ')',
|
||
')': '(',
|
||
'<': '>',
|
||
'>': '<',
|
||
'9': '6',
|
||
'6': '9'
|
||
}
|
||
for ch1 in result:
|
||
ch_result = ch1
|
||
for ch2, rep in extra_replace.items():
|
||
if ch1 == ch2:
|
||
ch_result = rep
|
||
break
|
||
new_result += ch_result
|
||
result = new_result
|
||
return result
|
||
|
||
|
||
def remove_inverted_text(text: str, system_language: str) -> str:
|
||
"""Removes any inverted text from the given string
|
||
"""
|
||
if system_language != 'en':
|
||
return text
|
||
|
||
text = uninvert_text(text)
|
||
|
||
inverted_lower = [*"_ʎ_ʍʌ_ʇ_ɹ____ɯʃʞɾıɥƃɟǝ_ɔ_ɐ"]
|
||
inverted_upper = [*"_⅄__ᴧ∩⊥_ᴚΌԀ_ᴎ_⅂⋊ſ__⅁ℲƎ◖Ↄ𐐒∀"]
|
||
|
||
start_separator = ''
|
||
separator = '\n'
|
||
if '</p>' in text:
|
||
text = text.replace('<p>', '')
|
||
start_separator = '<p>'
|
||
separator = '</p>'
|
||
paragraphs = text.split(separator)
|
||
new_text = ''
|
||
inverted_list = (inverted_lower, inverted_upper)
|
||
z_value = (ord('z'), ord('Z'))
|
||
for para in paragraphs:
|
||
replaced_chars = 0
|
||
|
||
for idx in range(2):
|
||
index = 0
|
||
for test_ch in inverted_list[idx]:
|
||
if test_ch == '_':
|
||
index += 1
|
||
continue
|
||
if test_ch in para:
|
||
para = para.replace(test_ch, chr(z_value[idx] - index))
|
||
replaced_chars += 1
|
||
index += 1
|
||
|
||
if replaced_chars > 2:
|
||
para = para[::-1]
|
||
if para:
|
||
new_text += start_separator + para
|
||
if separator in text:
|
||
new_text += separator
|
||
|
||
return new_text
|
||
|
||
|
||
def remove_square_capitals(text: str, system_language: str) -> str:
|
||
"""Removes any square capital text from the given string
|
||
"""
|
||
if system_language != 'en':
|
||
return text
|
||
offset = ord('A')
|
||
start_value = ord('🅰')
|
||
end_value = start_value + 26
|
||
result = ''
|
||
for text_ch in text:
|
||
text_value = ord(text_ch)
|
||
if text_value < start_value or text_value > end_value:
|
||
result += text_ch
|
||
else:
|
||
result += chr(offset + text_value - start_value)
|
||
return result
|
||
|
||
|
||
def _standardize_text_range(text: str,
|
||
range_start: int, range_end: int,
|
||
offset: str) -> str:
|
||
"""Convert any fancy characters within the given range into ordinary ones
|
||
"""
|
||
offset = ord(offset)
|
||
ctr = 0
|
||
text = list(text)
|
||
while ctr < len(text):
|
||
val = ord(text[ctr])
|
||
if val in range(range_start, range_end):
|
||
text[ctr] = chr(val - range_start + offset)
|
||
ctr += 1
|
||
return "".join(text)
|
||
|
||
|
||
def standardize_text(text: str) -> str:
|
||
"""Converts fancy unicode text to ordinary letters
|
||
"""
|
||
if not text:
|
||
return text
|
||
|
||
char_ranges = (
|
||
[65345, 'a'],
|
||
[119886, 'a'],
|
||
[119990, 'a'],
|
||
[120042, 'a'],
|
||
[120094, 'a'],
|
||
[120146, 'a'],
|
||
[120198, 'a'],
|
||
[120302, 'a'],
|
||
[120354, 'a'],
|
||
[120406, 'a'],
|
||
[65313, 'A'],
|
||
[119912, 'A'],
|
||
[119964, 'A'],
|
||
[120016, 'A'],
|
||
[120068, 'A'],
|
||
[120120, 'A'],
|
||
[120172, 'A'],
|
||
[120224, 'A'],
|
||
[120328, 'A'],
|
||
[120380, 'A'],
|
||
[120432, 'A'],
|
||
[127344, 'A'],
|
||
[127312, 'A'],
|
||
[127280, 'A'],
|
||
[127248, 'A']
|
||
)
|
||
for char_range in char_ranges:
|
||
range_start = char_range[0]
|
||
range_end = range_start + 26
|
||
offset = char_range[1]
|
||
text = _standardize_text_range(text, range_start, range_end, offset)
|
||
|
||
return uninvert_text(text)
|