From 4dbb6a07404b51771ff0378443bfeb3ae6e2dddc Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Tue, 5 Jul 2022 12:37:35 +0100 Subject: [PATCH] Unit test for dogwhistles --- content.py | 39 +++++++++++++++++++++++++++++++++++++++ tests.py | 22 ++++++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/content.py b/content.py index 756b94f3b..e3b254909 100644 --- a/content.py +++ b/content.py @@ -1020,6 +1020,45 @@ def _get_simplified_content(content: str) -> str: return content_simplified +def get_dogwhistles(content: str, dogwhistles: {}) -> {}: + """Returns a dict containing any detected dogwhistle words + """ + result = {} + words = _get_simplified_content(content).split(' ') + for whistle, category in dogwhistles.items(): + ending = False + if whistle.lower().startswith('x-'): + whistle = whistle[2:] + ending = True + elif (whistle.startswith('*') or + whistle.startswith('~') or + whistle.startswith('-')): + whistle = whistle[1:] + ending = True + + if ending: + for wrd in words: + if wrd.endswith(whistle): + if not result.get(whistle): + result[whistle] = { + "count": 1, + "category": category + } + else: + result[whistle]['count'] += 1 + else: + for wrd in words: + if wrd == whistle: + if not result.get(whistle): + result[whistle] = { + "count": 1, + "category": category + } + else: + result[whistle]['count'] += 1 + return result + + def add_html_tags(base_dir: str, http_prefix: str, nickname: str, domain: str, content: str, recipients: [], hashtags: {}, diff --git a/tests.py b/tests.py index e6ed196a1..31a886c8d 100644 --- a/tests.py +++ b/tests.py @@ -132,6 +132,7 @@ from inbox import valid_inbox from inbox import valid_inbox_filenames from inbox import cache_svg_images from categories import guess_hashtag_category +from content import get_dogwhistles from content import remove_script from content import create_edits_html from content import content_diff @@ -7326,7 +7327,27 @@ def _test_remove_end_of_line(): assert remove_eol(text) == expected +def _test_dogwhistles(): + print('dogwhistles') + dogwhistles = { + "X-hamstered": "hamsterism", + "gerbil": "rodent", + } + content = 'This text does not contain any dogwhistles' + assert not get_dogwhistles(content, dogwhistles) + content = 'A gerbil named joe' + assert get_dogwhistles(content, dogwhistles) + content = 'This content is unhamstered and yhamstered.' + result = get_dogwhistles(content, dogwhistles) + assert result + assert result.get('hamstered') + assert result['hamstered']['count'] == 2 + assert result['hamstered']['category'] == "hamsterism" + + def run_all_tests(): + _test_dogwhistles() + return base_dir = os.getcwd() print('Running tests...') update_default_themes_list(os.getcwd()) @@ -7343,6 +7364,7 @@ def run_all_tests(): _test_checkbox_names() _test_thread_functions() _test_functions() + _test_dogwhistles() _test_remove_end_of_line() _test_translation_labels() _test_color_contrast_value(base_dir)