mirror of https://gitlab.com/bashrc2/epicyon
				
				
				
			
		
			
				
	
	
		
			229 lines
		
	
	
		
			6.0 KiB
		
	
	
	
		
			Python
		
	
	
			
		
		
	
	
			229 lines
		
	
	
		
			6.0 KiB
		
	
	
	
		
			Python
		
	
	
| __filename__ = "unicodetext.py"
 | ||
| __author__ = "Bob Mottram"
 | ||
| __license__ = "AGPL3+"
 | ||
| __version__ = "1.6.0"
 | ||
| __maintainer__ = "Bob Mottram"
 | ||
| __email__ = "bob@libreserver.org"
 | ||
| __status__ = "Production"
 | ||
| __module_group__ = "Core"
 | ||
| 
 | ||
| # functions which deal with fancy unicode text characters.
 | ||
| # Such text is "clever", but fucks up screen readers and accessibility
 | ||
| # in general
 | ||
| 
 | ||
| 
 | ||
| def uninvert_text(text: str) -> str:
 | ||
|     """uninverts inverted text
 | ||
|     """
 | ||
|     if len(text) < 4:
 | ||
|         return text
 | ||
| 
 | ||
|     flip_table = {
 | ||
|         '\u0021': '\u00A1',
 | ||
|         '\u0022': '\u201E',
 | ||
|         '\u0026': '\u214B',
 | ||
|         '\u002E': '\u02D9',
 | ||
|         '\u0033': '\u0190',
 | ||
|         '\u0034': '\u152D',
 | ||
|         '\u0037': '\u2C62',
 | ||
|         '\u003B': '\u061B',
 | ||
|         '\u003F': '\u00BF',
 | ||
|         '\u0041': '\u2200',
 | ||
|         '\u0042': '\u10412',
 | ||
|         '\u0043': '\u2183',
 | ||
|         '\u0044': '\u25D6',
 | ||
|         '\u0045': '\u018E',
 | ||
|         '\u0046': '\u2132',
 | ||
|         '\u0047': '\u2141',
 | ||
|         '\u004A': '\u017F',
 | ||
|         '\u004B': '\u22CA',
 | ||
|         '\u004C': '\u2142',
 | ||
|         '\u004D': '\u0057',
 | ||
|         '\u004E': '\u1D0E',
 | ||
|         '\u0050': '\u0500',
 | ||
|         '\u0051': '\u038C',
 | ||
|         '\u0052': '\u1D1A',
 | ||
|         '\u0054': '\u22A5',
 | ||
|         '\u0055': '\u2229',
 | ||
|         '\u0056': '\u1D27',
 | ||
|         '\u0059': '\u2144',
 | ||
|         '\u005F': '\u203E',
 | ||
|         '\u0061': '\u0250',
 | ||
|         '\u0062': '\u0071',
 | ||
|         '\u0063': '\u0254',
 | ||
|         '\u0064': '\u0070',
 | ||
|         '\u0065': '\u01DD',
 | ||
|         '\u0066': '\u025F',
 | ||
|         '\u0067': '\u0183',
 | ||
|         '\u0068': '\u0265',
 | ||
|         '\u0069': '\u0131',
 | ||
|         '\u006A': '\u027E',
 | ||
|         '\u006B': '\u029E',
 | ||
|         '\u006C': '\u0283',
 | ||
|         '\u006D': '\u026F',
 | ||
|         '\u006E': '\u0075',
 | ||
|         '\u0072': '\u0279',
 | ||
|         '\u0074': '\u0287',
 | ||
|         '\u0076': '\u028C',
 | ||
|         '\u0077': '\u028D',
 | ||
|         '\u0079': '\u028E',
 | ||
|         '\u203F': '\u2040',
 | ||
|         '\u2234': '\u2235'
 | ||
|     }
 | ||
| 
 | ||
|     matches = 0
 | ||
|     possible_result = ''
 | ||
|     for ch_test in text:
 | ||
|         ch_result = ch_test
 | ||
|         for ch1, ch_inv in flip_table.items():
 | ||
|             if ch_test == ch_inv:
 | ||
|                 matches += 1
 | ||
|                 ch_result = ch1
 | ||
|                 break
 | ||
|         possible_result = ch_result + possible_result
 | ||
| 
 | ||
|     result = text
 | ||
|     if matches > len(text)/2:
 | ||
|         result = possible_result
 | ||
|         new_result = ''
 | ||
|         extra_replace = {
 | ||
|             '[': ']',
 | ||
|             ']': '[',
 | ||
|             '(': ')',
 | ||
|             ')': '(',
 | ||
|             '<': '>',
 | ||
|             '>': '<',
 | ||
|             '9': '6',
 | ||
|             '6': '9'
 | ||
|         }
 | ||
|         for ch1 in result:
 | ||
|             ch_result = ch1
 | ||
|             for ch2, rep in extra_replace.items():
 | ||
|                 if ch1 == ch2:
 | ||
|                     ch_result = rep
 | ||
|                     break
 | ||
|             new_result += ch_result
 | ||
|         result = new_result
 | ||
|     return result
 | ||
| 
 | ||
| 
 | ||
| def remove_inverted_text(text: str, system_language: str) -> str:
 | ||
|     """Removes any inverted text from the given string
 | ||
|     """
 | ||
|     if system_language != 'en':
 | ||
|         return text
 | ||
| 
 | ||
|     text = uninvert_text(text)
 | ||
| 
 | ||
|     inverted_lower = [*"_ʎ_ʍʌ_ʇ_ɹ____ɯʃʞɾıɥƃɟǝ_ɔ_ɐ"]
 | ||
|     inverted_upper = [*"_⅄__ᴧ∩⊥_ᴚΌԀ_ᴎ_⅂⋊ſ__⅁ℲƎ◖Ↄ𐐒∀"]
 | ||
| 
 | ||
|     start_separator = ''
 | ||
|     separator = '\n'
 | ||
|     if '</p>' in text:
 | ||
|         text = text.replace('<p>', '')
 | ||
|         start_separator = '<p>'
 | ||
|         separator = '</p>'
 | ||
|     paragraphs = text.split(separator)
 | ||
|     new_text = ''
 | ||
|     inverted_list = (inverted_lower, inverted_upper)
 | ||
|     z_value = (ord('z'), ord('Z'))
 | ||
|     for para in paragraphs:
 | ||
|         replaced_chars = 0
 | ||
| 
 | ||
|         for idx in range(2):
 | ||
|             index = 0
 | ||
|             for test_ch in inverted_list[idx]:
 | ||
|                 if test_ch == '_':
 | ||
|                     index += 1
 | ||
|                     continue
 | ||
|                 if test_ch in para:
 | ||
|                     para = para.replace(test_ch, chr(z_value[idx] - index))
 | ||
|                     replaced_chars += 1
 | ||
|                 index += 1
 | ||
| 
 | ||
|         if replaced_chars > 2:
 | ||
|             para = para[::-1]
 | ||
|         if para:
 | ||
|             new_text += start_separator + para
 | ||
|             if separator in text:
 | ||
|                 new_text += separator
 | ||
| 
 | ||
|     return new_text
 | ||
| 
 | ||
| 
 | ||
| def remove_square_capitals(text: str, system_language: str) -> str:
 | ||
|     """Removes any square capital text from the given string
 | ||
|     """
 | ||
|     if system_language != 'en':
 | ||
|         return text
 | ||
|     offset = ord('A')
 | ||
|     start_value = ord('🅰')
 | ||
|     end_value = start_value + 26
 | ||
|     result = ''
 | ||
|     for text_ch in text:
 | ||
|         text_value = ord(text_ch)
 | ||
|         if text_value < start_value or text_value > end_value:
 | ||
|             result += text_ch
 | ||
|         else:
 | ||
|             result += chr(offset + text_value - start_value)
 | ||
|     return result
 | ||
| 
 | ||
| 
 | ||
| def _standardize_text_range(text: str,
 | ||
|                             range_start: int, range_end: int,
 | ||
|                             offset: str) -> str:
 | ||
|     """Convert any fancy characters within the given range into ordinary ones
 | ||
|     """
 | ||
|     offset = ord(offset)
 | ||
|     ctr = 0
 | ||
|     text = list(text)
 | ||
|     while ctr < len(text):
 | ||
|         val = ord(text[ctr])
 | ||
|         if val in range(range_start, range_end):
 | ||
|             text[ctr] = chr(val - range_start + offset)
 | ||
|         ctr += 1
 | ||
|     return "".join(text)
 | ||
| 
 | ||
| 
 | ||
| def standardize_text(text: str) -> str:
 | ||
|     """Converts fancy unicode text to ordinary letters
 | ||
|     """
 | ||
|     if not text:
 | ||
|         return text
 | ||
| 
 | ||
|     char_ranges = (
 | ||
|         [65345, 'a'],
 | ||
|         [119886, 'a'],
 | ||
|         [119990, 'a'],
 | ||
|         [120042, 'a'],
 | ||
|         [120094, 'a'],
 | ||
|         [120146, 'a'],
 | ||
|         [120198, 'a'],
 | ||
|         [120302, 'a'],
 | ||
|         [120354, 'a'],
 | ||
|         [120406, 'a'],
 | ||
|         [65313, 'A'],
 | ||
|         [119912, 'A'],
 | ||
|         [119964, 'A'],
 | ||
|         [120016, 'A'],
 | ||
|         [120068, 'A'],
 | ||
|         [120120, 'A'],
 | ||
|         [120172, 'A'],
 | ||
|         [120224, 'A'],
 | ||
|         [120328, 'A'],
 | ||
|         [120380, 'A'],
 | ||
|         [120432, 'A'],
 | ||
|         [127344, 'A'],
 | ||
|         [127312, 'A'],
 | ||
|         [127280, 'A'],
 | ||
|         [127248, 'A']
 | ||
|     )
 | ||
|     for char_range in char_ranges:
 | ||
|         range_start = char_range[0]
 | ||
|         range_end = range_start + 26
 | ||
|         offset = char_range[1]
 | ||
|         text = _standardize_text_range(text, range_start, range_end, offset)
 | ||
| 
 | ||
|     return uninvert_text(text)
 |