mirror of https://gitlab.com/bashrc2/epicyon
Load dogwhistles from file
parent
cb0bf43ce1
commit
d2a39c8b50
35
content.py
35
content.py
|
@ -1020,7 +1020,7 @@ def _get_simplified_content(content: str) -> str:
|
||||||
return content_simplified
|
return content_simplified
|
||||||
|
|
||||||
|
|
||||||
def get_dogwhistles(content: str, dogwhistles: {}) -> {}:
|
def detect_dogwhistles(content: str, dogwhistles: {}) -> {}:
|
||||||
"""Returns a dict containing any detected dogwhistle words
|
"""Returns a dict containing any detected dogwhistle words
|
||||||
"""
|
"""
|
||||||
result = {}
|
result = {}
|
||||||
|
@ -1059,6 +1059,39 @@ def get_dogwhistles(content: str, dogwhistles: {}) -> {}:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def load_dogwhistles(filename: str) -> {}:
|
||||||
|
"""Loads a list of dogwhistles from file
|
||||||
|
"""
|
||||||
|
if not os.path.isfile(filename):
|
||||||
|
return {}
|
||||||
|
dogwhistle_lines = []
|
||||||
|
try:
|
||||||
|
with open(filename, 'r', encoding='utf-8') as fp_dogwhistles:
|
||||||
|
dogwhistle_lines = fp_dogwhistles.readlines()
|
||||||
|
except OSError:
|
||||||
|
print('EX: unable to load dogwhistles from ' + filename)
|
||||||
|
return {}
|
||||||
|
separators = ('->', ',', ';')
|
||||||
|
dogwhistles = {}
|
||||||
|
for line in dogwhistle_lines:
|
||||||
|
line = line.remove_eol(line).strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
if line.startswith('#'):
|
||||||
|
continue
|
||||||
|
whistle = None
|
||||||
|
category = None
|
||||||
|
for sep in separators:
|
||||||
|
if sep in line:
|
||||||
|
whistle = line.split(sep, 1)[0]
|
||||||
|
category = line.split(sep, 1)[1]
|
||||||
|
break
|
||||||
|
if not whistle:
|
||||||
|
whistle = line
|
||||||
|
dogwhistles[whistle] = category
|
||||||
|
return dogwhistles
|
||||||
|
|
||||||
|
|
||||||
def add_html_tags(base_dir: str, http_prefix: str,
|
def add_html_tags(base_dir: str, http_prefix: str,
|
||||||
nickname: str, domain: str, content: str,
|
nickname: str, domain: str, content: str,
|
||||||
recipients: [], hashtags: {},
|
recipients: [], hashtags: {},
|
||||||
|
|
|
@ -326,6 +326,7 @@ from utils import has_group_type
|
||||||
from manualapprove import manual_deny_follow_request_thread
|
from manualapprove import manual_deny_follow_request_thread
|
||||||
from manualapprove import manual_approve_follow_request_thread
|
from manualapprove import manual_approve_follow_request_thread
|
||||||
from announce import create_announce
|
from announce import create_announce
|
||||||
|
from content import load_dogwhistles
|
||||||
from content import valid_url_lengths
|
from content import valid_url_lengths
|
||||||
from content import contains_invalid_local_links
|
from content import contains_invalid_local_links
|
||||||
from content import get_price_from_string
|
from content import get_price_from_string
|
||||||
|
@ -20967,6 +20968,12 @@ def run_daemon(preferred_podcast_formats: [],
|
||||||
# scan the theme directory for any svg files containing scripts
|
# scan the theme directory for any svg files containing scripts
|
||||||
assert not scan_themes_for_scripts(base_dir)
|
assert not scan_themes_for_scripts(base_dir)
|
||||||
|
|
||||||
|
# load a list of dogwhistle words
|
||||||
|
dogwhistles_filename = base_dir + '/accounts/dogwhistles.txt'
|
||||||
|
if not os.path.isfile(dogwhistles_filename):
|
||||||
|
dogwhistles_filename = base_dir + '/default_dogwhistles.txt'
|
||||||
|
httpd.dogwhistles = load_dogwhistles(dogwhistles_filename)
|
||||||
|
|
||||||
# list of preferred podcast formats
|
# list of preferred podcast formats
|
||||||
# eg ['audio/opus', 'audio/mp3']
|
# eg ['audio/opus', 'audio/mp3']
|
||||||
httpd.preferred_podcast_formats = preferred_podcast_formats
|
httpd.preferred_podcast_formats = preferred_podcast_formats
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
X-pilled, chud
|
||||||
|
chad, chud
|
8
tests.py
8
tests.py
|
@ -132,7 +132,7 @@ from inbox import valid_inbox
|
||||||
from inbox import valid_inbox_filenames
|
from inbox import valid_inbox_filenames
|
||||||
from inbox import cache_svg_images
|
from inbox import cache_svg_images
|
||||||
from categories import guess_hashtag_category
|
from categories import guess_hashtag_category
|
||||||
from content import get_dogwhistles
|
from content import detect_dogwhistles
|
||||||
from content import remove_script
|
from content import remove_script
|
||||||
from content import create_edits_html
|
from content import create_edits_html
|
||||||
from content import content_diff
|
from content import content_diff
|
||||||
|
@ -7334,11 +7334,11 @@ def _test_dogwhistles():
|
||||||
"gerbil": "rodent",
|
"gerbil": "rodent",
|
||||||
}
|
}
|
||||||
content = 'This text does not contain any dogwhistles'
|
content = 'This text does not contain any dogwhistles'
|
||||||
assert not get_dogwhistles(content, dogwhistles)
|
assert not detect_dogwhistles(content, dogwhistles)
|
||||||
content = 'A gerbil named joe'
|
content = 'A gerbil named joe'
|
||||||
assert get_dogwhistles(content, dogwhistles)
|
assert detect_dogwhistles(content, dogwhistles)
|
||||||
content = 'This content is unhamstered and yhamstered.'
|
content = 'This content is unhamstered and yhamstered.'
|
||||||
result = get_dogwhistles(content, dogwhistles)
|
result = detect_dogwhistles(content, dogwhistles)
|
||||||
assert result
|
assert result
|
||||||
assert result.get('hamstered')
|
assert result.get('hamstered')
|
||||||
assert result['hamstered']['count'] == 2
|
assert result['hamstered']['count'] == 2
|
||||||
|
|
Loading…
Reference in New Issue