2020-04-03 10:11:54 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								__filename__ = "filters.py"
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								__author__ = "Bob Mottram"
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								__license__ = "AGPL3+"
							 | 
						
					
						
							
								
									
										
										
										
											2024-12-22 23:37:30 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								__version__ = "1.6.0"
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-03 10:11:54 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								__maintainer__ = "Bob Mottram"
							 | 
						
					
						
							
								
									
										
										
										
											2021-09-10 16:14:50 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								__email__ = "bob@libreserver.org"
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-03 10:11:54 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								__status__ = "Production"
							 | 
						
					
						
							
								
									
										
										
										
											2021-06-26 11:16:41 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								__module_group__ = "Moderation"
							 | 
						
					
						
							
								
									
										
										
										
											2019-07-14 20:50:27 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								import os
							 | 
						
					
						
							
								
									
										
										
										
											2025-05-04 09:52:44 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								import fnmatch
							 | 
						
					
						
							
								
									
										
										
										
											2024-05-12 12:35:26 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								from utils import data_dir
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-26 12:02:29 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								from utils import acct_dir
							 | 
						
					
						
							
								
									
										
										
										
											2022-06-10 11:43:33 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								from utils import text_in_file
							 | 
						
					
						
							
								
									
										
										
										
											2022-06-21 11:58:50 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								from utils import remove_eol
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-09 10:54:05 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								from utils import standardize_text
							 | 
						
					
						
							
								
									
										
										
										
											2022-09-25 17:26:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								from utils import remove_inverted_text
							 | 
						
					
						
							
								
									
										
										
										
											2022-10-05 17:55:24 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								from utils import remove_square_capitals
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-09 10:37:33 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								def add_filter(base_dir: str, nickname: str, domain: str, words: str) -> bool:
							 | 
						
					
						
							
								
									
										
										
										
											2022-09-21 20:00:57 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    """Adds a filter for particular words within the content of a
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    incoming posts
							 | 
						
					
						
							
								
									
										
										
										
											2019-07-14 20:50:27 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							
								
									
										
										
										
											2022-01-02 12:53:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    filters_filename = acct_dir(base_dir, nickname, domain) + '/filters.txt'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if os.path.isfile(filters_filename):
							 | 
						
					
						
							
								
									
										
										
										
											2022-06-10 13:01:39 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        if text_in_file(words, filters_filename):
							 | 
						
					
						
							
								
									
										
										
										
											2019-07-14 20:50:27 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            return False
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-26 14:35:26 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    try:
							 | 
						
					
						
							
								
									
										
										
										
											2022-06-09 14:46:30 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        with open(filters_filename, 'a+',
							 | 
						
					
						
							
								
									
										
										
										
											2024-07-14 13:01:46 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                  encoding='utf-8') as fp_filters:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            fp_filters.write(words + '\n')
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-26 14:35:26 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    except OSError:
							 | 
						
					
						
							
								
									
										
										
										
											2022-01-02 12:53:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        print('EX: unable to append filters ' + filters_filename)
							 | 
						
					
						
							
								
									
										
										
										
											2023-05-02 11:47:42 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return False
							 | 
						
					
						
							
								
									
										
										
										
											2019-07-14 20:50:27 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    return True
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-03 10:11:54 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								def add_global_filter(base_dir: str, words: str) -> bool:
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-19 11:29:55 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    """Adds a global filter for particular words within
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    the content of a incoming posts
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-19 13:10:32 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if not words:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return False
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if len(words) < 2:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return False
							 | 
						
					
						
							
								
									
										
										
										
											2024-05-12 12:35:26 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    filters_filename = data_dir(base_dir) + '/filters.txt'
							 | 
						
					
						
							
								
									
										
										
										
											2022-01-02 12:53:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if os.path.isfile(filters_filename):
							 | 
						
					
						
							
								
									
										
										
										
											2022-06-10 13:01:39 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        if text_in_file(words, filters_filename):
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-19 11:29:55 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            return False
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-26 14:35:26 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    try:
							 | 
						
					
						
							
								
									
										
										
										
											2024-07-14 13:01:46 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        with open(filters_filename, 'a+', encoding='utf-8') as fp_filters:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            fp_filters.write(words + '\n')
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-26 14:35:26 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    except OSError:
							 | 
						
					
						
							
								
									
										
										
										
											2022-01-02 12:53:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        print('EX: unable to append filters ' + filters_filename)
							 | 
						
					
						
							
								
									
										
										
										
											2023-05-02 11:47:42 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return False
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-19 11:29:55 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    return True
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								def remove_filter(base_dir: str, nickname: str, domain: str,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                  words: str) -> bool:
							 | 
						
					
						
							
								
									
										
										
										
											2019-07-14 20:50:27 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """Removes a word filter
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							
								
									
										
										
										
											2022-01-02 12:53:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    filters_filename = acct_dir(base_dir, nickname, domain) + '/filters.txt'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if not os.path.isfile(filters_filename):
							 | 
						
					
						
							
								
									
										
										
										
											2021-06-07 08:56:08 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return False
							 | 
						
					
						
							
								
									
										
										
										
											2022-06-10 11:43:33 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if not text_in_file(words, filters_filename):
							 | 
						
					
						
							
								
									
										
										
										
											2021-06-07 08:56:08 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return False
							 | 
						
					
						
							
								
									
										
										
										
											2022-01-02 12:53:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    new_filters_filename = filters_filename + '.new'
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-26 14:35:26 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    try:
							 | 
						
					
						
							
								
									
										
										
										
											2022-06-09 14:46:30 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        with open(filters_filename, 'r', encoding='utf-8') as fp_filt:
							 | 
						
					
						
							
								
									
										
										
										
											2024-07-16 12:20:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            with open(new_filters_filename, 'w+', encoding='utf-8') as fp_new:
							 | 
						
					
						
							
								
									
										
										
										
											2022-01-02 12:53:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                for line in fp_filt:
							 | 
						
					
						
							
								
									
										
										
										
											2022-06-21 11:58:50 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                    line = remove_eol(line)
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-26 14:35:26 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                    if line != words:
							 | 
						
					
						
							
								
									
										
										
										
											2024-07-16 12:20:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                        fp_new.write(line + '\n')
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-25 15:28:52 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    except OSError as ex:
							 | 
						
					
						
							
								
									
										
										
										
											2022-01-02 12:53:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        print('EX: unable to remove filter ' +
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								              filters_filename + ' ' + str(ex))
							 | 
						
					
						
							
								
									
										
										
										
											2023-05-02 11:47:42 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return False
							 | 
						
					
						
							
								
									
										
										
										
											2022-01-02 12:53:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if os.path.isfile(new_filters_filename):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        os.rename(new_filters_filename, filters_filename)
							 | 
						
					
						
							
								
									
										
										
										
											2021-06-07 08:56:08 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return True
							 | 
						
					
						
							
								
									
										
										
										
											2019-07-14 20:50:27 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    return False
							 | 
						
					
						
							
								
									
										
										
										
											2020-02-05 14:57:10 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-03 10:11:54 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								def remove_global_filter(base_dir: str, words: str) -> bool:
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-19 11:29:55 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    """Removes a global word filter
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							
								
									
										
										
										
											2024-05-12 12:35:26 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    filters_filename = data_dir(base_dir) + '/filters.txt'
							 | 
						
					
						
							
								
									
										
										
										
											2022-01-02 12:53:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if not os.path.isfile(filters_filename):
							 | 
						
					
						
							
								
									
										
										
										
											2021-06-07 08:56:08 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return False
							 | 
						
					
						
							
								
									
										
										
										
											2022-06-10 11:43:33 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if not text_in_file(words, filters_filename):
							 | 
						
					
						
							
								
									
										
										
										
											2021-06-07 08:56:08 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return False
							 | 
						
					
						
							
								
									
										
										
										
											2022-01-02 12:53:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    new_filters_filename = filters_filename + '.new'
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-26 14:35:26 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    try:
							 | 
						
					
						
							
								
									
										
										
										
											2022-06-09 14:46:30 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        with open(filters_filename, 'r', encoding='utf-8') as fp_filt:
							 | 
						
					
						
							
								
									
										
										
										
											2024-07-16 12:20:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            with open(new_filters_filename, 'w+', encoding='utf-8') as fp_new:
							 | 
						
					
						
							
								
									
										
										
										
											2022-01-02 12:53:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                for line in fp_filt:
							 | 
						
					
						
							
								
									
										
										
										
											2022-06-21 11:58:50 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                    line = remove_eol(line)
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-26 14:35:26 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                    if line != words:
							 | 
						
					
						
							
								
									
										
										
										
											2024-07-16 12:20:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                        fp_new.write(line + '\n')
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-25 15:28:52 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    except OSError as ex:
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-26 14:35:26 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        print('EX: unable to remove global filter ' +
							 | 
						
					
						
							
								
									
										
										
										
											2022-01-02 12:53:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								              filters_filename + ' ' + str(ex))
							 | 
						
					
						
							
								
									
										
										
										
											2023-05-02 11:47:42 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return False
							 | 
						
					
						
							
								
									
										
										
										
											2022-01-02 12:53:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if os.path.isfile(new_filters_filename):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        os.rename(new_filters_filename, filters_filename)
							 | 
						
					
						
							
								
									
										
										
										
											2021-06-07 08:56:08 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return True
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-19 11:29:55 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    return False
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								def _is_twitter_post(content: str) -> bool:
							 | 
						
					
						
							
								
									
										
										
										
											2020-02-05 14:57:10 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    """Returns true if the given post content is a retweet or twitter crosspost
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							
								
									
										
										
										
											2022-06-02 13:54:17 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    features = (
							 | 
						
					
						
							
								
									
										
										
										
											2024-08-03 10:05:55 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        '/x.com', '/twitter.', '/nitter.',
							 | 
						
					
						
							
								
									
										
										
										
											2024-08-03 15:00:28 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        '@twitter.', '@nitter.', '@x.com',
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-17 15:56:06 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        '>RT <', '_tw<', '_tw@', 'tweet', 'Tweet', '🐦🔗'
							 | 
						
					
						
							
								
									
										
										
										
											2022-06-02 13:54:17 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    for feat in features:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if feat in content:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            return True
							 | 
						
					
						
							
								
									
										
										
										
											2020-02-05 14:57:10 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    return False
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-03 10:11:54 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2025-05-04 09:52:44 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								def filtered_match(filter_str: str, content: str) -> bool:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """Does the given filter match the content?
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if '*' not in filter_str:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if filter_str in content:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            return True
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    elif fnmatch.fnmatchcase(content, filter_str):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return True
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    return False
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-09-25 17:26:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								def _is_filtered_base(filename: str, content: str,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                      system_language: str) -> bool:
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-19 13:21:06 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    """Uses the given file containing filtered words to check
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    the given content
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-19 13:23:30 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if not os.path.isfile(filename):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return False
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-09-25 17:26:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    content = remove_inverted_text(content, system_language)
							 | 
						
					
						
							
								
									
										
										
										
											2022-10-05 17:55:24 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    content = remove_square_capitals(content, system_language)
							 | 
						
					
						
							
								
									
										
										
										
											2022-09-25 17:26:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-09 10:37:33 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    # convert any fancy characters to ordinary ones
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    content = standardize_text(content)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-26 14:35:26 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    try:
							 | 
						
					
						
							
								
									
										
										
										
											2022-06-09 14:46:30 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        with open(filename, 'r', encoding='utf-8') as fp_filt:
							 | 
						
					
						
							
								
									
										
										
										
											2022-01-02 12:53:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            for line in fp_filt:
							 | 
						
					
						
							
								
									
										
										
										
											2022-06-21 11:58:50 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                filter_str = remove_eol(line)
							 | 
						
					
						
							
								
									
										
										
										
											2022-01-02 12:53:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                if not filter_str:
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-26 14:35:26 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                    continue
							 | 
						
					
						
							
								
									
										
										
										
											2022-01-02 12:53:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                if len(filter_str) < 2:
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-26 14:35:26 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                    continue
							 | 
						
					
						
							
								
									
										
										
										
											2022-01-02 12:53:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                if '+' not in filter_str:
							 | 
						
					
						
							
								
									
										
										
										
											2025-05-04 09:52:44 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                    if filtered_match(filter_str, content):
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-26 14:35:26 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                        return True
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                else:
							 | 
						
					
						
							
								
									
										
										
										
											2022-01-02 12:53:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                    filter_words = filter_str.replace('"', '').split('+')
							 | 
						
					
						
							
								
									
										
										
										
											2025-05-04 09:52:44 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                    for filter_wrd in filter_words:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                        if not filtered_match(filter_wrd, content):
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-26 14:35:26 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                            return False
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-19 13:21:06 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                    return True
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-25 15:28:52 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    except OSError as ex:
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        print('EX: _is_filtered_base ' + filename + ' ' + str(ex))
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-19 13:21:06 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    return False
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-09-25 17:26:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								def is_filtered_globally(base_dir: str, content: str,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                         system_language: str) -> bool:
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-28 21:28:41 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    """Is the given content globally filtered?
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							
								
									
										
										
										
											2024-05-12 12:35:26 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    global_filters_filename = data_dir(base_dir) + '/filters.txt'
							 | 
						
					
						
							
								
									
										
										
										
											2022-09-25 17:26:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if _is_filtered_base(global_filters_filename, content,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                         system_language):
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-28 21:28:41 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return True
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    return False
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								def is_filtered_bio(base_dir: str,
							 | 
						
					
						
							
								
									
										
										
										
											2022-09-25 17:26:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                    nickname: str, domain: str, bio: str,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    system_language: str) -> bool:
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-14 13:27:00 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    """Should the given actor bio be filtered out?
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							
								
									
										
										
										
											2022-09-25 17:26:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if is_filtered_globally(base_dir, bio, system_language):
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-14 13:27:00 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return True
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if not nickname or not domain:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return False
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-01-02 12:53:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    account_filters_filename = \
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-26 12:02:29 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        acct_dir(base_dir, nickname, domain) + '/filters_bio.txt'
							 | 
						
					
						
							
								
									
										
										
										
											2022-09-25 17:26:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    return _is_filtered_base(account_filters_filename, bio, system_language)
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-14 13:27:00 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								def is_filtered(base_dir: str, nickname: str, domain: str,
							 | 
						
					
						
							
								
									
										
										
										
											2022-09-25 17:26:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                content: str, system_language: str) -> bool:
							 | 
						
					
						
							
								
									
										
										
										
											2019-07-14 20:50:27 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """Should the given content be filtered out?
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    This is a simple type of filter which just matches words, not a regex
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    You can add individual words or use word1+word2 to indicate that two
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    words must be present although not necessarily adjacent
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							
								
									
										
										
										
											2022-09-25 17:26:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if is_filtered_globally(base_dir, content, system_language):
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-19 13:21:06 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return True
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-19 11:41:40 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if not nickname or not domain:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return False
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2020-02-05 14:57:10 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    # optionally remove retweets
							 | 
						
					
						
							
								
									
										
										
										
											2022-01-02 12:53:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    remove_twitter = acct_dir(base_dir, nickname, domain) + '/.removeTwitter'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if os.path.isfile(remove_twitter):
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-29 21:55:09 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        if _is_twitter_post(content):
							 | 
						
					
						
							
								
									
										
										
										
											2020-02-05 14:57:10 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            return True
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-01-02 12:53:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    account_filters_filename = \
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-26 12:02:29 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        acct_dir(base_dir, nickname, domain) + '/filters.txt'
							 | 
						
					
						
							
								
									
										
										
										
											2022-09-25 17:26:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    return _is_filtered_base(account_filters_filename, content,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                             system_language)
							 | 
						
					
						
							
								
									
										
										
										
											2023-02-28 17:38:04 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								def is_question_filtered(base_dir: str, nickname: str, domain: str,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                         system_language: str, question_json: {}) -> bool:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """is the given question filtered based on its options?
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if question_json.get('oneOf'):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        question_options = question_json['oneOf']
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    else:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        question_options = question_json['object']['oneOf']
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    for option in question_options:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if option.get('name'):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if is_filtered(base_dir, nickname, domain, option['name'],
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                           system_language):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                return True
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    return False
							 |