diff --git a/content.py b/content.py index 3ade69850..24e7afeda 100644 --- a/content.py +++ b/content.py @@ -1027,6 +1027,8 @@ def detect_dogwhistles(content: str, dogwhistles: {}) -> {}: words = _get_simplified_content(content).split(' ') for whistle, category in dogwhistles.items(): ending = False + starting = False + if whistle.lower().startswith('x-'): whistle = whistle[2:] ending = True @@ -1047,15 +1049,35 @@ def detect_dogwhistles(content: str, dogwhistles: {}) -> {}: else: result[whistle]['count'] += 1 else: - for wrd in words: - if wrd == whistle: - if not result.get(whistle): - result[whistle] = { - "count": 1, - "category": category - } - else: - result[whistle]['count'] += 1 + if whistle.lower().endswith('-x'): + whistle = whistle[:len(whistle)-2] + starting = True + elif (whistle.endswith('*') or + whistle.endswith('~') or + whistle.endswith('-')): + whistle = whistle[:len(whistle)-1] + starting = True + + if starting: + for wrd in words: + if wrd.startswith(whistle): + if not result.get(whistle): + result[whistle] = { + "count": 1, + "category": category + } + else: + result[whistle]['count'] += 1 + else: + for wrd in words: + if wrd == whistle: + if not result.get(whistle): + result[whistle] = { + "count": 1, + "category": category + } + else: + result[whistle]['count'] += 1 return result @@ -1071,7 +1093,7 @@ def load_dogwhistles(filename: str) -> {}: except OSError: print('EX: unable to load dogwhistles from ' + filename) return {} - separators = ('->', ',', ';') + separators = ('->', ',', ';', '|') dogwhistles = {} for line in dogwhistle_lines: line = line.remove_eol(line).strip() diff --git a/tests.py b/tests.py index db51f142c..a84b4aee8 100644 --- a/tests.py +++ b/tests.py @@ -7332,11 +7332,14 @@ def _test_dogwhistles(): dogwhistles = { "X-hamstered": "hamsterism", "gerbil": "rodent", + "*snake": "slither" } content = 'This text does not contain any dogwhistles' assert not detect_dogwhistles(content, dogwhistles) content = 'A gerbil named joe' assert detect_dogwhistles(content, dogwhistles) + content = 'A rattlesnake.' + assert detect_dogwhistles(content, dogwhistles) content = 'This content is unhamstered and yhamstered.' result = detect_dogwhistles(content, dogwhistles) assert result diff --git a/webapp_post.py b/webapp_post.py index 1d793dcb1..bbb2b21e8 100644 --- a/webapp_post.py +++ b/webapp_post.py @@ -62,6 +62,7 @@ from utils import get_domain_from_actor from utils import acct_dir from utils import local_actor_url from utils import is_unlisted_post +from content import detect_dogwhistles from content import create_edits_html from content import bold_reading_string from content import limit_repeated_words @@ -1555,6 +1556,30 @@ def _substitute_onion_domains(base_dir: str, content: str) -> str: return content +def _add_dogwhistle_warnings(summary: str, content: str, + dogwhistles: {}, translate: {}) -> {}: + """Adds dogwhistle warnings for the given content + """ + if not dogwhistles: + return summary + detected = detect_dogwhistles(content, dogwhistles) + if not detected: + return summary + + for whistle, item in detected.items(): + if not item.get('category'): + continue + if 'dogwhistle' not in whistle: + whistle_str = whistle + ' dogwhistle' + else: + whistle_str = whistle + if summary: + summary += ', ' + whistle_str + else: + summary = whistle_str + return summary + + def individual_post_as_html(signing_priv_key_pem: str, allow_downloads: bool, recent_posts_cache: {}, max_recent_posts: int, @@ -2143,6 +2168,10 @@ def individual_post_as_html(signing_priv_key_pem: str, if content_str: summary_str = get_summary_from_post(post_json_object, system_language, languages_understood) + # add dogwhistle warnings to summary + summary_str = _add_dogwhistle_warnings(summary_str, content_str, + dogwhistles, translate) + content_all_str = str(summary_str) + ' ' + content_str # does an emoji indicate a no boost preference? # if so then don't show the repeat/announce icon