From 6c3ebac1c6b7322c89868313667ecc1d6c4cc4f9 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Tue, 15 Jul 2025 10:33:47 +0100 Subject: [PATCH] Remove more bold unicode characters Fancy characters are 'clever', but fuck up screen readers --- tests.py | 17 +++++++++++++++++ unicodetext.py | 9 +++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/tests.py b/tests.py index 21b66e06b..543707d2a 100644 --- a/tests.py +++ b/tests.py @@ -8350,6 +8350,22 @@ def _test_hashtag_maps(): assert int(longitude * 1000) == -3150 +def _test_unbold(): + print('test_unbold') + text = "Latest 𝗩aluable 𝗡ews - 𝟮𝟬𝟮𝟱/𝟬𝟳/𝟭𝟰 Valuable News" + expected = "Latest Valuable News - 2025/07/14 Valuable News" + result = standardize_text(text) + if result != expected: + print('text: ' + text) + print('expected: ' + expected) + print('result: ' + result) + test = '' + for char in text: + test += char + '(' + str(ord(char)) + ') ' + print(test) + assert result == expected + + def _test_uninvert(): print('test_uninvert') text = 'ʇsƎʇ ɐ sı sıɥ⊥' @@ -9413,6 +9429,7 @@ def run_all_tests(): _test_reply_language(base_dir) _test_emoji_in_actor_name(base_dir) _test_uninvert() + _test_unbold() _test_hashtag_maps() _test_combine_lines() _test_text_standardize() diff --git a/unicodetext.py b/unicodetext.py index c2d6d7149..6e25ab970 100644 --- a/unicodetext.py +++ b/unicodetext.py @@ -217,11 +217,16 @@ def standardize_text(text: str) -> str: [127344, 'A'], [127312, 'A'], [127280, 'A'], - [127248, 'A'] + [127248, 'A'], + [120276, 'A'], + [120812, '0'] ) for char_range in char_ranges: range_start = char_range[0] - range_end = range_start + 26 + if char_range[1] == '0': + range_end = range_start + 10 + else: + range_end = range_start + 26 offset = char_range[1] text = _standardize_text_range(text, range_start, range_end, offset)