mirror of https://gitlab.com/bashrc2/epicyon
				
				
				
			
		
			
				
	
	
		
			183 lines
		
	
	
		
			5.7 KiB
		
	
	
	
		
			Python
		
	
	
			
		
		
	
	
			183 lines
		
	
	
		
			5.7 KiB
		
	
	
	
		
			Python
		
	
	
| __filename__ = "cwlists.py"
 | |
| __author__ = "Bob Mottram"
 | |
| __license__ = "AGPL3+"
 | |
| __version__ = "1.6.0"
 | |
| __maintainer__ = "Bob Mottram"
 | |
| __email__ = "bob@libreserver.org"
 | |
| __status__ = "Production"
 | |
| __module_group__ = "Core"
 | |
| 
 | |
| import os
 | |
| from utils import load_json
 | |
| from utils import get_content_from_post
 | |
| 
 | |
| 
 | |
| def load_cw_lists(base_dir: str, verbose: bool) -> {}:
 | |
|     """Load lists used for content warnings
 | |
|     """
 | |
|     if not os.path.isdir(base_dir + '/cwlists'):
 | |
|         return {}
 | |
|     result = {}
 | |
|     # NOTE: here we do want to allow recursive walk through
 | |
|     # possible subdirectories
 | |
|     for _, _, files in os.walk(base_dir + '/cwlists'):
 | |
|         for fname in files:
 | |
|             if not fname.endswith('.json'):
 | |
|                 continue
 | |
|             list_filename = os.path.join(base_dir + '/cwlists', fname)
 | |
|             print('list_filename: ' + list_filename)
 | |
|             list_json = load_json(list_filename)
 | |
|             if not list_json:
 | |
|                 continue
 | |
|             if not list_json.get('name'):
 | |
|                 continue
 | |
|             if not list_json.get('words') and \
 | |
|                not list_json.get('hashtags') and \
 | |
|                not list_json.get('domains'):
 | |
|                 continue
 | |
|             name = list_json['name']
 | |
|             if verbose:
 | |
|                 print('List: ' + name)
 | |
|             result[name] = list_json
 | |
|     return result
 | |
| 
 | |
| 
 | |
| def _add_cw_match_tags(item: {}, post_tags: {}, cw_text: str,
 | |
|                        warning: str) -> (bool, str):
 | |
|     """Updates content warning text using hashtags from within
 | |
|     the post content
 | |
|     """
 | |
|     matched = False
 | |
|     for tag in item['hashtags']:
 | |
|         tag = tag.strip()
 | |
|         if not tag:
 | |
|             continue
 | |
|         if not tag.startswith('#'):
 | |
|             tag = '#' + tag
 | |
|         tag = tag.lower()
 | |
|         for tag_dict in post_tags:
 | |
|             if not isinstance(tag_dict, dict):
 | |
|                 continue
 | |
|             if not tag_dict.get('Hashtag'):
 | |
|                 continue
 | |
|             if not tag_dict.get('name'):
 | |
|                 continue
 | |
|             if tag_dict['name'].lower() == tag:
 | |
|                 if cw_text:
 | |
|                     cw_text = warning + ' / ' + cw_text
 | |
|                 else:
 | |
|                     cw_text = warning
 | |
|                 matched = True
 | |
|                 break
 | |
|         if matched:
 | |
|             break
 | |
|     return matched, cw_text
 | |
| 
 | |
| 
 | |
| def _add_cw_match_domains(item: {}, content: str, cw_text: str,
 | |
|                           warning: str) -> (bool, str):
 | |
|     """Updates content warning text using domains from within
 | |
|     the post content
 | |
|     """
 | |
|     matched = False
 | |
|     for domain in item['domains']:
 | |
|         if '.' in domain:
 | |
|             first_section = domain.split('.')[0]
 | |
|             if len(first_section) < 4:
 | |
|                 if '.' + domain in content or \
 | |
|                    '/' + domain in content:
 | |
|                     if cw_text:
 | |
|                         cw_text = warning + ' / ' + cw_text
 | |
|                     else:
 | |
|                         cw_text = warning
 | |
|                     matched = True
 | |
|                     break
 | |
|                 continue
 | |
| 
 | |
|         if domain in content:
 | |
|             if cw_text:
 | |
|                 cw_text = warning + ' / ' + cw_text
 | |
|             else:
 | |
|                 cw_text = warning
 | |
|             matched = True
 | |
|             break
 | |
|     return matched, cw_text
 | |
| 
 | |
| 
 | |
| def add_cw_from_lists(post_json_object: {}, cw_lists: {}, translate: {},
 | |
|                       lists_enabled: str, system_language: str,
 | |
|                       languages_understood: []) -> None:
 | |
|     """Adds content warnings by matching the post content
 | |
|     against domains or keywords
 | |
|     """
 | |
|     if not lists_enabled:
 | |
|         return
 | |
|     if 'content' not in post_json_object['object']:
 | |
|         if 'contentMap' not in post_json_object['object']:
 | |
|             return
 | |
|     cw_text = ''
 | |
|     if post_json_object['object'].get('summary'):
 | |
|         cw_text = post_json_object['object']['summary']
 | |
| 
 | |
|     content = get_content_from_post(post_json_object, system_language,
 | |
|                                     languages_understood, "content")
 | |
|     if not content:
 | |
|         return
 | |
| 
 | |
|     post_tags: list[dict] = []
 | |
|     if post_json_object['object'].get('tag'):
 | |
|         if isinstance(post_json_object['object']['tag'], list):
 | |
|             post_tags = post_json_object['object']['tag']
 | |
| 
 | |
|     for name, item in cw_lists.items():
 | |
|         if name not in lists_enabled:
 | |
|             continue
 | |
|         if not item.get('warning'):
 | |
|             continue
 | |
|         warning = item['warning']
 | |
| 
 | |
|         # is there a translated version of the warning?
 | |
|         if translate.get(warning):
 | |
|             warning = translate[warning]
 | |
| 
 | |
|         # is the warning already in the CW?
 | |
|         if warning in cw_text:
 | |
|             continue
 | |
| 
 | |
|         matched = False
 | |
| 
 | |
|         # match hashtags within the post
 | |
|         if post_tags and item.get('hashtags'):
 | |
|             matched, cw_text = \
 | |
|                 _add_cw_match_tags(item, post_tags, cw_text, warning)
 | |
| 
 | |
|         if matched:
 | |
|             continue
 | |
| 
 | |
|         # match domains within the content
 | |
|         if item.get('domains'):
 | |
|             matched, cw_text = \
 | |
|                 _add_cw_match_domains(item, content, cw_text, warning)
 | |
| 
 | |
|         if matched:
 | |
|             continue
 | |
| 
 | |
|         # match words within the content
 | |
|         if item.get('words'):
 | |
|             for word_str in item['words']:
 | |
|                 if word_str in content or word_str.title() in content:
 | |
|                     if cw_text:
 | |
|                         cw_text = warning + ' / ' + cw_text
 | |
|                     else:
 | |
|                         cw_text = warning
 | |
|                     break
 | |
|     if cw_text:
 | |
|         post_json_object['object']['summary'] = cw_text
 | |
|         post_json_object['object']['sensitive'] = True
 | |
| 
 | |
| 
 | |
| def get_cw_list_variable(list_name: str) -> str:
 | |
|     """Returns the variable associated with a CW list
 | |
|     """
 | |
|     return 'list' + list_name.replace(' ', '').replace("'", '')
 |