__filename__ = "cwlists.py" __author__ = "Bob Mottram" __license__ = "AGPL3+" __version__ = "1.5.0" __maintainer__ = "Bob Mottram" __email__ = "bob@libreserver.org" __status__ = "Production" __module_group__ = "Core" import os from utils import load_json from utils import get_content_from_post def load_cw_lists(base_dir: str, verbose: bool) -> {}: """Load lists used for content warnings """ if not os.path.isdir(base_dir + '/cwlists'): return {} result = {} # NOTE: here we do want to allow recursive walk through # possible subdirectories for _, _, files in os.walk(base_dir + '/cwlists'): for fname in files: if not fname.endswith('.json'): continue list_filename = os.path.join(base_dir + '/cwlists', fname) print('list_filename: ' + list_filename) list_json = load_json(list_filename, 0, 1) if not list_json: continue if not list_json.get('name'): continue if not list_json.get('words') and \ not list_json.get('hashtags') and \ not list_json.get('domains'): continue name = list_json['name'] if verbose: print('List: ' + name) result[name] = list_json return result def _add_cw_match_tags(item: {}, post_tags: {}, cw_text: str, warning: str) -> (bool, str): """Updates content warning text using hashtags from within the post content """ matched = False for tag in item['hashtags']: tag = tag.strip() if not tag: continue if not tag.startswith('#'): tag = '#' + tag tag = tag.lower() for tag_dict in post_tags: if not isinstance(tag_dict, dict): continue if not tag_dict.get('Hashtag'): continue if not tag_dict.get('name'): continue if tag_dict['name'].lower() == tag: if cw_text: cw_text = warning + ' / ' + cw_text else: cw_text = warning matched = True break if matched: break return matched, cw_text def _add_cw_match_domains(item: {}, content: str, cw_text: str, warning: str) -> (bool, str): """Updates content warning text using domains from within the post content """ matched = False for domain in item['domains']: if '.' in domain: first_section = domain.split('.')[0] if len(first_section) < 4: if '.' + domain in content or \ '/' + domain in content: if cw_text: cw_text = warning + ' / ' + cw_text else: cw_text = warning matched = True break continue if domain in content: if cw_text: cw_text = warning + ' / ' + cw_text else: cw_text = warning matched = True break return matched, cw_text def add_cw_from_lists(post_json_object: {}, cw_lists: {}, translate: {}, lists_enabled: str, system_language: str, languages_understood: []) -> None: """Adds content warnings by matching the post content against domains or keywords """ if not lists_enabled: return if 'content' not in post_json_object['object']: if 'contentMap' not in post_json_object['object']: return cw_text = '' if post_json_object['object'].get('summary'): cw_text = post_json_object['object']['summary'] content = get_content_from_post(post_json_object, system_language, languages_understood, "content") if not content: return post_tags = [] if post_json_object['object'].get('tag'): if isinstance(post_json_object['object']['tag'], list): post_tags = post_json_object['object']['tag'] for name, item in cw_lists.items(): if name not in lists_enabled: continue if not item.get('warning'): continue warning = item['warning'] # is there a translated version of the warning? if translate.get(warning): warning = translate[warning] # is the warning already in the CW? if warning in cw_text: continue matched = False # match hashtags within the post if post_tags and item.get('hashtags'): matched, cw_text = \ _add_cw_match_tags(item, post_tags, cw_text, warning) if matched: continue # match domains within the content if item.get('domains'): matched, cw_text = \ _add_cw_match_domains(item, content, cw_text, warning) if matched: continue # match words within the content if item.get('words'): for word_str in item['words']: if word_str in content or word_str.title() in content: if cw_text: cw_text = warning + ' / ' + cw_text else: cw_text = warning break if cw_text: post_json_object['object']['summary'] = cw_text post_json_object['object']['sensitive'] = True def get_cw_list_variable(list_name: str) -> str: """Returns the variable associated with a CW list """ return 'list' + list_name.replace(' ', '').replace("'", '')