mirror of https://gitlab.com/bashrc2/epicyon
Add dogwhistle warnings
parent
542f0c6605
commit
8101835c05
42
content.py
42
content.py
|
@ -1027,6 +1027,8 @@ def detect_dogwhistles(content: str, dogwhistles: {}) -> {}:
|
|||
words = _get_simplified_content(content).split(' ')
|
||||
for whistle, category in dogwhistles.items():
|
||||
ending = False
|
||||
starting = False
|
||||
|
||||
if whistle.lower().startswith('x-'):
|
||||
whistle = whistle[2:]
|
||||
ending = True
|
||||
|
@ -1047,15 +1049,35 @@ def detect_dogwhistles(content: str, dogwhistles: {}) -> {}:
|
|||
else:
|
||||
result[whistle]['count'] += 1
|
||||
else:
|
||||
for wrd in words:
|
||||
if wrd == whistle:
|
||||
if not result.get(whistle):
|
||||
result[whistle] = {
|
||||
"count": 1,
|
||||
"category": category
|
||||
}
|
||||
else:
|
||||
result[whistle]['count'] += 1
|
||||
if whistle.lower().endswith('-x'):
|
||||
whistle = whistle[:len(whistle)-2]
|
||||
starting = True
|
||||
elif (whistle.endswith('*') or
|
||||
whistle.endswith('~') or
|
||||
whistle.endswith('-')):
|
||||
whistle = whistle[:len(whistle)-1]
|
||||
starting = True
|
||||
|
||||
if starting:
|
||||
for wrd in words:
|
||||
if wrd.startswith(whistle):
|
||||
if not result.get(whistle):
|
||||
result[whistle] = {
|
||||
"count": 1,
|
||||
"category": category
|
||||
}
|
||||
else:
|
||||
result[whistle]['count'] += 1
|
||||
else:
|
||||
for wrd in words:
|
||||
if wrd == whistle:
|
||||
if not result.get(whistle):
|
||||
result[whistle] = {
|
||||
"count": 1,
|
||||
"category": category
|
||||
}
|
||||
else:
|
||||
result[whistle]['count'] += 1
|
||||
return result
|
||||
|
||||
|
||||
|
@ -1071,7 +1093,7 @@ def load_dogwhistles(filename: str) -> {}:
|
|||
except OSError:
|
||||
print('EX: unable to load dogwhistles from ' + filename)
|
||||
return {}
|
||||
separators = ('->', ',', ';')
|
||||
separators = ('->', ',', ';', '|')
|
||||
dogwhistles = {}
|
||||
for line in dogwhistle_lines:
|
||||
line = line.remove_eol(line).strip()
|
||||
|
|
3
tests.py
3
tests.py
|
@ -7332,11 +7332,14 @@ def _test_dogwhistles():
|
|||
dogwhistles = {
|
||||
"X-hamstered": "hamsterism",
|
||||
"gerbil": "rodent",
|
||||
"*snake": "slither"
|
||||
}
|
||||
content = 'This text does not contain any dogwhistles'
|
||||
assert not detect_dogwhistles(content, dogwhistles)
|
||||
content = 'A gerbil named joe'
|
||||
assert detect_dogwhistles(content, dogwhistles)
|
||||
content = 'A rattlesnake.'
|
||||
assert detect_dogwhistles(content, dogwhistles)
|
||||
content = 'This content is unhamstered and yhamstered.'
|
||||
result = detect_dogwhistles(content, dogwhistles)
|
||||
assert result
|
||||
|
|
|
@ -62,6 +62,7 @@ from utils import get_domain_from_actor
|
|||
from utils import acct_dir
|
||||
from utils import local_actor_url
|
||||
from utils import is_unlisted_post
|
||||
from content import detect_dogwhistles
|
||||
from content import create_edits_html
|
||||
from content import bold_reading_string
|
||||
from content import limit_repeated_words
|
||||
|
@ -1555,6 +1556,30 @@ def _substitute_onion_domains(base_dir: str, content: str) -> str:
|
|||
return content
|
||||
|
||||
|
||||
def _add_dogwhistle_warnings(summary: str, content: str,
|
||||
dogwhistles: {}, translate: {}) -> {}:
|
||||
"""Adds dogwhistle warnings for the given content
|
||||
"""
|
||||
if not dogwhistles:
|
||||
return summary
|
||||
detected = detect_dogwhistles(content, dogwhistles)
|
||||
if not detected:
|
||||
return summary
|
||||
|
||||
for whistle, item in detected.items():
|
||||
if not item.get('category'):
|
||||
continue
|
||||
if 'dogwhistle' not in whistle:
|
||||
whistle_str = whistle + ' dogwhistle'
|
||||
else:
|
||||
whistle_str = whistle
|
||||
if summary:
|
||||
summary += ', ' + whistle_str
|
||||
else:
|
||||
summary = whistle_str
|
||||
return summary
|
||||
|
||||
|
||||
def individual_post_as_html(signing_priv_key_pem: str,
|
||||
allow_downloads: bool,
|
||||
recent_posts_cache: {}, max_recent_posts: int,
|
||||
|
@ -2143,6 +2168,10 @@ def individual_post_as_html(signing_priv_key_pem: str,
|
|||
if content_str:
|
||||
summary_str = get_summary_from_post(post_json_object, system_language,
|
||||
languages_understood)
|
||||
# add dogwhistle warnings to summary
|
||||
summary_str = _add_dogwhistle_warnings(summary_str, content_str,
|
||||
dogwhistles, translate)
|
||||
|
||||
content_all_str = str(summary_str) + ' ' + content_str
|
||||
# does an emoji indicate a no boost preference?
|
||||
# if so then don't show the repeat/announce icon
|
||||
|
|
Loading…
Reference in New Issue