From 66fcf918a84e60e6e2191a76155d52e45eb6ddfb Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 9 Jul 2022 12:24:11 +0100 Subject: [PATCH] More fancy unicode character ranges --- utils.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/utils.py b/utils.py index 73c0cf4b4..9d0603b66 100644 --- a/utils.py +++ b/utils.py @@ -67,6 +67,9 @@ def standardize_text(text: str) -> str: [120094, 'a'], [120146, 'a'], [120198, 'a'], + [120302, 'a'], + [120354, 'a'], + [120406, 'a'], [65313, 'A'], [119912, 'A'], [119964, 'A'], @@ -74,7 +77,10 @@ def standardize_text(text: str) -> str: [120068, 'A'], [120120, 'A'], [120172, 'A'], - [120224, 'A'] + [120224, 'A'], + [120328, 'A'], + [120380, 'A'], + [120432, 'A'] ) for char_range in char_ranges: range_start = char_range[0]