cwlists in a separate module

main
Bob Mottram 2023-03-20 14:50:19 +00:00
parent 06dbd6e51c
commit b12932e075
7 changed files with 128 additions and 115 deletions

View File

@ -1377,109 +1377,6 @@ def broch_modeLapses(base_dir: str, lapseDays: int) -> bool:
return False
def load_cw_lists(base_dir: str, verbose: bool) -> {}:
"""Load lists used for content warnings
"""
if not os.path.isdir(base_dir + '/cwlists'):
return {}
result = {}
# NOTE: here we do want to allow recursive walk through
# possible subdirectories
for _, _, files in os.walk(base_dir + '/cwlists'):
for fname in files:
if not fname.endswith('.json'):
continue
list_filename = os.path.join(base_dir + '/cwlists', fname)
print('list_filename: ' + list_filename)
list_json = load_json(list_filename, 0, 1)
if not list_json:
continue
if not list_json.get('name'):
continue
if not list_json.get('words') and not list_json.get('domains'):
continue
name = list_json['name']
if verbose:
print('List: ' + name)
result[name] = list_json
return result
def add_cw_from_lists(post_json_object: {}, cw_lists: {}, translate: {},
lists_enabled: str, system_language: str) -> None:
"""Adds content warnings by matching the post content
against domains or keywords
"""
if not lists_enabled:
return
if 'content' not in post_json_object['object']:
if 'contentMap' not in post_json_object['object']:
return
cw_text = ''
if post_json_object['object'].get('summary'):
cw_text = post_json_object['object']['summary']
content = None
if 'contentMap' in post_json_object['object']:
if post_json_object['object']['contentMap'].get(system_language):
content = \
post_json_object['object']['contentMap'][system_language]
if not content:
if post_json_object['object'].get('content'):
content = post_json_object['object']['content']
if not content:
return
for name, item in cw_lists.items():
if name not in lists_enabled:
continue
if not item.get('warning'):
continue
warning = item['warning']
# is there a translated version of the warning?
if translate.get(warning):
warning = translate[warning]
# is the warning already in the CW?
if warning in cw_text:
continue
matched = False
# match domains within the content
if item.get('domains'):
for domain in item['domains']:
if domain in content:
if cw_text:
cw_text = warning + ' / ' + cw_text
else:
cw_text = warning
matched = True
break
if matched:
continue
# match words within the content
if item.get('words'):
for word_str in item['words']:
if word_str in content or word_str.title() in content:
if cw_text:
cw_text = warning + ' / ' + cw_text
else:
cw_text = warning
break
if cw_text:
post_json_object['object']['summary'] = cw_text
post_json_object['object']['sensitive'] = True
def get_cw_list_variable(list_name: str) -> str:
"""Returns the variable associated with a CW list
"""
return 'list' + list_name.replace(' ', '').replace("'", '')
def import_blocking_file(base_dir: str, nickname: str, domain: str,
lines: []) -> bool:
"""Imports blocked domains for a given account

110
cwlists.py 100644
View File

@ -0,0 +1,110 @@
__filename__ = "cwlists.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
__version__ = "1.4.0"
__maintainer__ = "Bob Mottram"
__email__ = "bob@libreserver.org"
__status__ = "Production"
__module_group__ = "Core"
import os
from utils import load_json
from utils import get_content_from_post
def load_cw_lists(base_dir: str, verbose: bool) -> {}:
"""Load lists used for content warnings
"""
if not os.path.isdir(base_dir + '/cwlists'):
return {}
result = {}
# NOTE: here we do want to allow recursive walk through
# possible subdirectories
for _, _, files in os.walk(base_dir + '/cwlists'):
for fname in files:
if not fname.endswith('.json'):
continue
list_filename = os.path.join(base_dir + '/cwlists', fname)
print('list_filename: ' + list_filename)
list_json = load_json(list_filename, 0, 1)
if not list_json:
continue
if not list_json.get('name'):
continue
if not list_json.get('words') and not list_json.get('domains'):
continue
name = list_json['name']
if verbose:
print('List: ' + name)
result[name] = list_json
return result
def add_cw_from_lists(post_json_object: {}, cw_lists: {}, translate: {},
lists_enabled: str, system_language: str,
languages_understood: []) -> None:
"""Adds content warnings by matching the post content
against domains or keywords
"""
if not lists_enabled:
return
if 'content' not in post_json_object['object']:
if 'contentMap' not in post_json_object['object']:
return
cw_text = ''
if post_json_object['object'].get('summary'):
cw_text = post_json_object['object']['summary']
content = get_content_from_post(post_json_object, system_language,
languages_understood, "content")
if not content:
return
for name, item in cw_lists.items():
if name not in lists_enabled:
continue
if not item.get('warning'):
continue
warning = item['warning']
# is there a translated version of the warning?
if translate.get(warning):
warning = translate[warning]
# is the warning already in the CW?
if warning in cw_text:
continue
matched = False
# match domains within the content
if item.get('domains'):
for domain in item['domains']:
if domain in content:
if cw_text:
cw_text = warning + ' / ' + cw_text
else:
cw_text = warning
matched = True
break
if matched:
continue
# match words within the content
if item.get('words'):
for word_str in item['words']:
if word_str in content or word_str.title() in content:
if cw_text:
cw_text = warning + ' / ' + cw_text
else:
cw_text = warning
break
if cw_text:
post_json_object['object']['summary'] = cw_text
post_json_object['object']['sensitive'] = True
def get_cw_list_variable(list_name: str) -> str:
"""Returns the variable associated with a CW list
"""
return 'list' + list_name.replace(' ', '').replace("'", '')

View File

@ -149,11 +149,11 @@ from media import attach_media
from media import path_is_video
from media import path_is_transcript
from media import path_is_audio
from cwlists import get_cw_list_variable
from cwlists import load_cw_lists
from blocking import import_blocking_file
from blocking import export_blocking_file
from blocking import add_account_blocks
from blocking import get_cw_list_variable
from blocking import load_cw_lists
from blocking import update_blocked_cache
from blocking import mute_post
from blocking import unmute_post

View File

@ -195,8 +195,8 @@ from shares import update_shared_item_federation_token
from shares import merge_shared_item_tokens
from shares import send_share_via_server
from shares import get_shared_items_catalog_via_server
from blocking import load_cw_lists
from blocking import add_cw_from_lists
from cwlists import add_cw_from_lists
from cwlists import load_cw_lists
from happening import dav_month_via_server
from happening import dav_day_via_server
from webapp_theme_designer import color_contrast
@ -2622,7 +2622,10 @@ def test_group_follow(base_dir: str) -> None:
if os.path.isfile(os.path.join(queue_path, name))]) == 0
os.chdir(base_dir)
shutil.rmtree(base_dir + '/.tests', ignore_errors=False, onerror=None)
try:
shutil.rmtree(base_dir + '/.tests', ignore_errors=False, onerror=None)
except OSError:
print('Unable to remove directory ' + base_dir + '/.tests')
print('Testing following of a group is complete')
@ -6952,6 +6955,7 @@ def _test_add_cw_lists(base_dir: str) -> None:
print('test_add_CW_from_lists')
translate = {}
system_language = "en"
languages_understood = ["en"]
cw_lists = load_cw_lists(base_dir, True)
assert cw_lists
@ -6963,7 +6967,7 @@ def _test_add_cw_lists(base_dir: str) -> None:
}
}
add_cw_from_lists(post_json_object, cw_lists, translate, 'Murdoch press',
system_language)
system_language, languages_understood)
assert post_json_object['object']['sensitive'] is False
assert post_json_object['object']['summary'] is None
@ -6977,7 +6981,7 @@ def _test_add_cw_lists(base_dir: str) -> None:
}
}
add_cw_from_lists(post_json_object, cw_lists, translate, 'Murdoch press',
system_language)
system_language, languages_understood)
assert post_json_object['object']['sensitive'] is True
assert post_json_object['object']['summary'] == "Murdoch Press"
@ -6989,7 +6993,7 @@ def _test_add_cw_lists(base_dir: str) -> None:
}
}
add_cw_from_lists(post_json_object, cw_lists, translate, 'Murdoch press',
system_language)
system_language, languages_understood)
assert post_json_object['object']['sensitive'] is True
assert post_json_object['object']['summary'] == \
"Murdoch Press / Existing CW"

View File

@ -201,7 +201,9 @@ def get_content_from_post(post_json_object: {}, system_language: str,
this_post_json = post_json_object
if has_object_dict(post_json_object):
this_post_json = post_json_object['object']
if not this_post_json.get(content_type):
map_dict = content_type + 'Map'
if not this_post_json.get(content_type) and \
not this_post_json.get(map_dict):
return ''
content = ''
map_dict = content_type + 'Map'

View File

@ -101,8 +101,8 @@ from devices import e2e_edecrypt_message_from_device
from webfinger import webfinger_handle
from speaker import update_speaker
from languages import auto_translate_post
from cwlists import add_cw_from_lists
from blocking import is_blocked
from blocking import add_cw_from_lists
from reaction import html_emoji_reactions
from maps import html_open_street_map
from maps import set_map_preferences_coords
@ -2494,7 +2494,7 @@ def individual_post_as_html(signing_priv_key_pem: str,
# add any content warning from the cwlists directory
add_cw_from_lists(post_json_object, cw_lists, translate, lists_enabled,
system_language)
system_language, languages_understood)
post_is_sensitive = False
if post_json_object['object'].get('sensitive'):

View File

@ -84,8 +84,8 @@ from blog import get_blog_address
from webapp_post import individual_post_as_html
from webapp_timeline import html_individual_share
from webapp_timeline import page_number_buttons
from cwlists import get_cw_list_variable
from blocking import get_account_blocks
from blocking import get_cw_list_variable
from blocking import is_blocked
from content import bold_reading_string
from roles import is_devops