Add dogwhistle warnings

merge-requests/30/head
Bob Mottram 2022-07-05 17:21:48 +01:00
parent 542f0c6605
commit 8101835c05
3 changed files with 64 additions and 10 deletions

View File

@ -1027,6 +1027,8 @@ def detect_dogwhistles(content: str, dogwhistles: {}) -> {}:
words = _get_simplified_content(content).split(' ') words = _get_simplified_content(content).split(' ')
for whistle, category in dogwhistles.items(): for whistle, category in dogwhistles.items():
ending = False ending = False
starting = False
if whistle.lower().startswith('x-'): if whistle.lower().startswith('x-'):
whistle = whistle[2:] whistle = whistle[2:]
ending = True ending = True
@ -1047,15 +1049,35 @@ def detect_dogwhistles(content: str, dogwhistles: {}) -> {}:
else: else:
result[whistle]['count'] += 1 result[whistle]['count'] += 1
else: else:
for wrd in words: if whistle.lower().endswith('-x'):
if wrd == whistle: whistle = whistle[:len(whistle)-2]
if not result.get(whistle): starting = True
result[whistle] = { elif (whistle.endswith('*') or
"count": 1, whistle.endswith('~') or
"category": category whistle.endswith('-')):
} whistle = whistle[:len(whistle)-1]
else: starting = True
result[whistle]['count'] += 1
if starting:
for wrd in words:
if wrd.startswith(whistle):
if not result.get(whistle):
result[whistle] = {
"count": 1,
"category": category
}
else:
result[whistle]['count'] += 1
else:
for wrd in words:
if wrd == whistle:
if not result.get(whistle):
result[whistle] = {
"count": 1,
"category": category
}
else:
result[whistle]['count'] += 1
return result return result
@ -1071,7 +1093,7 @@ def load_dogwhistles(filename: str) -> {}:
except OSError: except OSError:
print('EX: unable to load dogwhistles from ' + filename) print('EX: unable to load dogwhistles from ' + filename)
return {} return {}
separators = ('->', ',', ';') separators = ('->', ',', ';', '|')
dogwhistles = {} dogwhistles = {}
for line in dogwhistle_lines: for line in dogwhistle_lines:
line = line.remove_eol(line).strip() line = line.remove_eol(line).strip()

View File

@ -7332,11 +7332,14 @@ def _test_dogwhistles():
dogwhistles = { dogwhistles = {
"X-hamstered": "hamsterism", "X-hamstered": "hamsterism",
"gerbil": "rodent", "gerbil": "rodent",
"*snake": "slither"
} }
content = 'This text does not contain any dogwhistles' content = 'This text does not contain any dogwhistles'
assert not detect_dogwhistles(content, dogwhistles) assert not detect_dogwhistles(content, dogwhistles)
content = 'A gerbil named joe' content = 'A gerbil named joe'
assert detect_dogwhistles(content, dogwhistles) assert detect_dogwhistles(content, dogwhistles)
content = 'A rattlesnake.'
assert detect_dogwhistles(content, dogwhistles)
content = 'This content is unhamstered and yhamstered.' content = 'This content is unhamstered and yhamstered.'
result = detect_dogwhistles(content, dogwhistles) result = detect_dogwhistles(content, dogwhistles)
assert result assert result

View File

@ -62,6 +62,7 @@ from utils import get_domain_from_actor
from utils import acct_dir from utils import acct_dir
from utils import local_actor_url from utils import local_actor_url
from utils import is_unlisted_post from utils import is_unlisted_post
from content import detect_dogwhistles
from content import create_edits_html from content import create_edits_html
from content import bold_reading_string from content import bold_reading_string
from content import limit_repeated_words from content import limit_repeated_words
@ -1555,6 +1556,30 @@ def _substitute_onion_domains(base_dir: str, content: str) -> str:
return content return content
def _add_dogwhistle_warnings(summary: str, content: str,
dogwhistles: {}, translate: {}) -> {}:
"""Adds dogwhistle warnings for the given content
"""
if not dogwhistles:
return summary
detected = detect_dogwhistles(content, dogwhistles)
if not detected:
return summary
for whistle, item in detected.items():
if not item.get('category'):
continue
if 'dogwhistle' not in whistle:
whistle_str = whistle + ' dogwhistle'
else:
whistle_str = whistle
if summary:
summary += ', ' + whistle_str
else:
summary = whistle_str
return summary
def individual_post_as_html(signing_priv_key_pem: str, def individual_post_as_html(signing_priv_key_pem: str,
allow_downloads: bool, allow_downloads: bool,
recent_posts_cache: {}, max_recent_posts: int, recent_posts_cache: {}, max_recent_posts: int,
@ -2143,6 +2168,10 @@ def individual_post_as_html(signing_priv_key_pem: str,
if content_str: if content_str:
summary_str = get_summary_from_post(post_json_object, system_language, summary_str = get_summary_from_post(post_json_object, system_language,
languages_understood) languages_understood)
# add dogwhistle warnings to summary
summary_str = _add_dogwhistle_warnings(summary_str, content_str,
dogwhistles, translate)
content_all_str = str(summary_str) + ' ' + content_str content_all_str = str(summary_str) + ' ' + content_str
# does an emoji indicate a no boost preference? # does an emoji indicate a no boost preference?
# if so then don't show the repeat/announce icon # if so then don't show the repeat/announce icon