mirror of https://gitlab.com/bashrc2/epicyon
				
				
				
			
		
			
	
	
		
			229 lines
		
	
	
		
			6.0 KiB
		
	
	
	
		
			Python
		
	
	
		
		
			
		
	
	
			229 lines
		
	
	
		
			6.0 KiB
		
	
	
	
		
			Python
		
	
	
|  | __filename__ = "unicodetext.py" | |||
|  | __author__ = "Bob Mottram" | |||
|  | __license__ = "AGPL3+" | |||
|  | __version__ = "1.6.0" | |||
|  | __maintainer__ = "Bob Mottram" | |||
|  | __email__ = "bob@libreserver.org" | |||
|  | __status__ = "Production" | |||
|  | __module_group__ = "Core" | |||
|  | 
 | |||
|  | # functions which deal with fancy unicode text characters. | |||
|  | # Such text is "clever", but fucks up screen readers and accessibility | |||
|  | # in general | |||
|  | 
 | |||
|  | 
 | |||
|  | def uninvert_text(text: str) -> str: | |||
|  |     """uninverts inverted text
 | |||
|  |     """
 | |||
|  |     if len(text) < 4: | |||
|  |         return text | |||
|  | 
 | |||
|  |     flip_table = { | |||
|  |         '\u0021': '\u00A1', | |||
|  |         '\u0022': '\u201E', | |||
|  |         '\u0026': '\u214B', | |||
|  |         '\u002E': '\u02D9', | |||
|  |         '\u0033': '\u0190', | |||
|  |         '\u0034': '\u152D', | |||
|  |         '\u0037': '\u2C62', | |||
|  |         '\u003B': '\u061B', | |||
|  |         '\u003F': '\u00BF', | |||
|  |         '\u0041': '\u2200', | |||
|  |         '\u0042': '\u10412', | |||
|  |         '\u0043': '\u2183', | |||
|  |         '\u0044': '\u25D6', | |||
|  |         '\u0045': '\u018E', | |||
|  |         '\u0046': '\u2132', | |||
|  |         '\u0047': '\u2141', | |||
|  |         '\u004A': '\u017F', | |||
|  |         '\u004B': '\u22CA', | |||
|  |         '\u004C': '\u2142', | |||
|  |         '\u004D': '\u0057', | |||
|  |         '\u004E': '\u1D0E', | |||
|  |         '\u0050': '\u0500', | |||
|  |         '\u0051': '\u038C', | |||
|  |         '\u0052': '\u1D1A', | |||
|  |         '\u0054': '\u22A5', | |||
|  |         '\u0055': '\u2229', | |||
|  |         '\u0056': '\u1D27', | |||
|  |         '\u0059': '\u2144', | |||
|  |         '\u005F': '\u203E', | |||
|  |         '\u0061': '\u0250', | |||
|  |         '\u0062': '\u0071', | |||
|  |         '\u0063': '\u0254', | |||
|  |         '\u0064': '\u0070', | |||
|  |         '\u0065': '\u01DD', | |||
|  |         '\u0066': '\u025F', | |||
|  |         '\u0067': '\u0183', | |||
|  |         '\u0068': '\u0265', | |||
|  |         '\u0069': '\u0131', | |||
|  |         '\u006A': '\u027E', | |||
|  |         '\u006B': '\u029E', | |||
|  |         '\u006C': '\u0283', | |||
|  |         '\u006D': '\u026F', | |||
|  |         '\u006E': '\u0075', | |||
|  |         '\u0072': '\u0279', | |||
|  |         '\u0074': '\u0287', | |||
|  |         '\u0076': '\u028C', | |||
|  |         '\u0077': '\u028D', | |||
|  |         '\u0079': '\u028E', | |||
|  |         '\u203F': '\u2040', | |||
|  |         '\u2234': '\u2235' | |||
|  |     } | |||
|  | 
 | |||
|  |     matches = 0 | |||
|  |     possible_result = '' | |||
|  |     for ch_test in text: | |||
|  |         ch_result = ch_test | |||
|  |         for ch1, ch_inv in flip_table.items(): | |||
|  |             if ch_test == ch_inv: | |||
|  |                 matches += 1 | |||
|  |                 ch_result = ch1 | |||
|  |                 break | |||
|  |         possible_result = ch_result + possible_result | |||
|  | 
 | |||
|  |     result = text | |||
|  |     if matches > len(text)/2: | |||
|  |         result = possible_result | |||
|  |         new_result = '' | |||
|  |         extra_replace = { | |||
|  |             '[': ']', | |||
|  |             ']': '[', | |||
|  |             '(': ')', | |||
|  |             ')': '(', | |||
|  |             '<': '>', | |||
|  |             '>': '<', | |||
|  |             '9': '6', | |||
|  |             '6': '9' | |||
|  |         } | |||
|  |         for ch1 in result: | |||
|  |             ch_result = ch1 | |||
|  |             for ch2, rep in extra_replace.items(): | |||
|  |                 if ch1 == ch2: | |||
|  |                     ch_result = rep | |||
|  |                     break | |||
|  |             new_result += ch_result | |||
|  |         result = new_result | |||
|  |     return result | |||
|  | 
 | |||
|  | 
 | |||
|  | def remove_inverted_text(text: str, system_language: str) -> str: | |||
|  |     """Removes any inverted text from the given string
 | |||
|  |     """
 | |||
|  |     if system_language != 'en': | |||
|  |         return text | |||
|  | 
 | |||
|  |     text = uninvert_text(text) | |||
|  | 
 | |||
|  |     inverted_lower = [*"_ʎ_ʍʌ_ʇ_ɹ____ɯʃʞɾıɥƃɟǝ_ɔ_ɐ"] | |||
|  |     inverted_upper = [*"_⅄__ᴧ∩⊥_ᴚΌԀ_ᴎ_⅂⋊ſ__⅁ℲƎ◖Ↄ𐐒∀"] | |||
|  | 
 | |||
|  |     start_separator = '' | |||
|  |     separator = '\n' | |||
|  |     if '</p>' in text: | |||
|  |         text = text.replace('<p>', '') | |||
|  |         start_separator = '<p>' | |||
|  |         separator = '</p>' | |||
|  |     paragraphs = text.split(separator) | |||
|  |     new_text = '' | |||
|  |     inverted_list = (inverted_lower, inverted_upper) | |||
|  |     z_value = (ord('z'), ord('Z')) | |||
|  |     for para in paragraphs: | |||
|  |         replaced_chars = 0 | |||
|  | 
 | |||
|  |         for idx in range(2): | |||
|  |             index = 0 | |||
|  |             for test_ch in inverted_list[idx]: | |||
|  |                 if test_ch == '_': | |||
|  |                     index += 1 | |||
|  |                     continue | |||
|  |                 if test_ch in para: | |||
|  |                     para = para.replace(test_ch, chr(z_value[idx] - index)) | |||
|  |                     replaced_chars += 1 | |||
|  |                 index += 1 | |||
|  | 
 | |||
|  |         if replaced_chars > 2: | |||
|  |             para = para[::-1] | |||
|  |         if para: | |||
|  |             new_text += start_separator + para | |||
|  |             if separator in text: | |||
|  |                 new_text += separator | |||
|  | 
 | |||
|  |     return new_text | |||
|  | 
 | |||
|  | 
 | |||
|  | def remove_square_capitals(text: str, system_language: str) -> str: | |||
|  |     """Removes any square capital text from the given string
 | |||
|  |     """
 | |||
|  |     if system_language != 'en': | |||
|  |         return text | |||
|  |     offset = ord('A') | |||
|  |     start_value = ord('🅰') | |||
|  |     end_value = start_value + 26 | |||
|  |     result = '' | |||
|  |     for text_ch in text: | |||
|  |         text_value = ord(text_ch) | |||
|  |         if text_value < start_value or text_value > end_value: | |||
|  |             result += text_ch | |||
|  |         else: | |||
|  |             result += chr(offset + text_value - start_value) | |||
|  |     return result | |||
|  | 
 | |||
|  | 
 | |||
|  | def _standardize_text_range(text: str, | |||
|  |                             range_start: int, range_end: int, | |||
|  |                             offset: str) -> str: | |||
|  |     """Convert any fancy characters within the given range into ordinary ones
 | |||
|  |     """
 | |||
|  |     offset = ord(offset) | |||
|  |     ctr = 0 | |||
|  |     text = list(text) | |||
|  |     while ctr < len(text): | |||
|  |         val = ord(text[ctr]) | |||
|  |         if val in range(range_start, range_end): | |||
|  |             text[ctr] = chr(val - range_start + offset) | |||
|  |         ctr += 1 | |||
|  |     return "".join(text) | |||
|  | 
 | |||
|  | 
 | |||
|  | def standardize_text(text: str) -> str: | |||
|  |     """Converts fancy unicode text to ordinary letters
 | |||
|  |     """
 | |||
|  |     if not text: | |||
|  |         return text | |||
|  | 
 | |||
|  |     char_ranges = ( | |||
|  |         [65345, 'a'], | |||
|  |         [119886, 'a'], | |||
|  |         [119990, 'a'], | |||
|  |         [120042, 'a'], | |||
|  |         [120094, 'a'], | |||
|  |         [120146, 'a'], | |||
|  |         [120198, 'a'], | |||
|  |         [120302, 'a'], | |||
|  |         [120354, 'a'], | |||
|  |         [120406, 'a'], | |||
|  |         [65313, 'A'], | |||
|  |         [119912, 'A'], | |||
|  |         [119964, 'A'], | |||
|  |         [120016, 'A'], | |||
|  |         [120068, 'A'], | |||
|  |         [120120, 'A'], | |||
|  |         [120172, 'A'], | |||
|  |         [120224, 'A'], | |||
|  |         [120328, 'A'], | |||
|  |         [120380, 'A'], | |||
|  |         [120432, 'A'], | |||
|  |         [127344, 'A'], | |||
|  |         [127312, 'A'], | |||
|  |         [127280, 'A'], | |||
|  |         [127248, 'A'] | |||
|  |     ) | |||
|  |     for char_range in char_ranges: | |||
|  |         range_start = char_range[0] | |||
|  |         range_end = range_start + 26 | |||
|  |         offset = char_range[1] | |||
|  |         text = _standardize_text_range(text, range_start, range_end, offset) | |||
|  | 
 | |||
|  |     return uninvert_text(text) |