Load dogwhistles from file

main
Bob Mottram 2022-07-05 13:30:21 +01:00
parent cb0bf43ce1
commit d2a39c8b50
4 changed files with 47 additions and 5 deletions

View File

@ -1020,7 +1020,7 @@ def _get_simplified_content(content: str) -> str:
return content_simplified
def get_dogwhistles(content: str, dogwhistles: {}) -> {}:
def detect_dogwhistles(content: str, dogwhistles: {}) -> {}:
"""Returns a dict containing any detected dogwhistle words
"""
result = {}
@ -1059,6 +1059,39 @@ def get_dogwhistles(content: str, dogwhistles: {}) -> {}:
return result
def load_dogwhistles(filename: str) -> {}:
"""Loads a list of dogwhistles from file
"""
if not os.path.isfile(filename):
return {}
dogwhistle_lines = []
try:
with open(filename, 'r', encoding='utf-8') as fp_dogwhistles:
dogwhistle_lines = fp_dogwhistles.readlines()
except OSError:
print('EX: unable to load dogwhistles from ' + filename)
return {}
separators = ('->', ',', ';')
dogwhistles = {}
for line in dogwhistle_lines:
line = line.remove_eol(line).strip()
if not line:
continue
if line.startswith('#'):
continue
whistle = None
category = None
for sep in separators:
if sep in line:
whistle = line.split(sep, 1)[0]
category = line.split(sep, 1)[1]
break
if not whistle:
whistle = line
dogwhistles[whistle] = category
return dogwhistles
def add_html_tags(base_dir: str, http_prefix: str,
nickname: str, domain: str, content: str,
recipients: [], hashtags: {},

View File

@ -326,6 +326,7 @@ from utils import has_group_type
from manualapprove import manual_deny_follow_request_thread
from manualapprove import manual_approve_follow_request_thread
from announce import create_announce
from content import load_dogwhistles
from content import valid_url_lengths
from content import contains_invalid_local_links
from content import get_price_from_string
@ -20967,6 +20968,12 @@ def run_daemon(preferred_podcast_formats: [],
# scan the theme directory for any svg files containing scripts
assert not scan_themes_for_scripts(base_dir)
# load a list of dogwhistle words
dogwhistles_filename = base_dir + '/accounts/dogwhistles.txt'
if not os.path.isfile(dogwhistles_filename):
dogwhistles_filename = base_dir + '/default_dogwhistles.txt'
httpd.dogwhistles = load_dogwhistles(dogwhistles_filename)
# list of preferred podcast formats
# eg ['audio/opus', 'audio/mp3']
httpd.preferred_podcast_formats = preferred_podcast_formats

View File

@ -0,0 +1,2 @@
X-pilled, chud
chad, chud

View File

@ -132,7 +132,7 @@ from inbox import valid_inbox
from inbox import valid_inbox_filenames
from inbox import cache_svg_images
from categories import guess_hashtag_category
from content import get_dogwhistles
from content import detect_dogwhistles
from content import remove_script
from content import create_edits_html
from content import content_diff
@ -7334,11 +7334,11 @@ def _test_dogwhistles():
"gerbil": "rodent",
}
content = 'This text does not contain any dogwhistles'
assert not get_dogwhistles(content, dogwhistles)
assert not detect_dogwhistles(content, dogwhistles)
content = 'A gerbil named joe'
assert get_dogwhistles(content, dogwhistles)
assert detect_dogwhistles(content, dogwhistles)
content = 'This content is unhamstered and yhamstered.'
result = get_dogwhistles(content, dogwhistles)
result = detect_dogwhistles(content, dogwhistles)
assert result
assert result.get('hamstered')
assert result['hamstered']['count'] == 2