Remove more bold unicode characters

Fancy characters are 'clever', but fuck up screen readers
main
Bob Mottram 2025-07-15 10:33:47 +01:00
parent 0e1dc80506
commit 6c3ebac1c6
2 changed files with 24 additions and 2 deletions

View File

@ -8350,6 +8350,22 @@ def _test_hashtag_maps():
assert int(longitude * 1000) == -3150
def _test_unbold():
print('test_unbold')
text = "Latest 𝗩aluable 𝗡ews - 𝟮𝟬𝟮𝟱/𝟬𝟳/𝟭𝟰 Valuable News"
expected = "Latest Valuable News - 2025/07/14 Valuable News"
result = standardize_text(text)
if result != expected:
print('text: ' + text)
print('expected: ' + expected)
print('result: ' + result)
test = ''
for char in text:
test += char + '(' + str(ord(char)) + ') '
print(test)
assert result == expected
def _test_uninvert():
print('test_uninvert')
text = 'ʇsƎʇ ɐ sı sıɥ⊥'
@ -9413,6 +9429,7 @@ def run_all_tests():
_test_reply_language(base_dir)
_test_emoji_in_actor_name(base_dir)
_test_uninvert()
_test_unbold()
_test_hashtag_maps()
_test_combine_lines()
_test_text_standardize()

View File

@ -217,11 +217,16 @@ def standardize_text(text: str) -> str:
[127344, 'A'],
[127312, 'A'],
[127280, 'A'],
[127248, 'A']
[127248, 'A'],
[120276, 'A'],
[120812, '0']
)
for char_range in char_ranges:
range_start = char_range[0]
range_end = range_start + 26
if char_range[1] == '0':
range_end = range_start + 10
else:
range_end = range_start + 26
offset = char_range[1]
text = _standardize_text_range(text, range_start, range_end, offset)