2020-04-03 10:11:54 +00:00
|
|
|
__filename__ = "filters.py"
|
|
|
|
__author__ = "Bob Mottram"
|
|
|
|
__license__ = "AGPL3+"
|
2024-01-21 19:01:20 +00:00
|
|
|
__version__ = "1.5.0"
|
2020-04-03 10:11:54 +00:00
|
|
|
__maintainer__ = "Bob Mottram"
|
2021-09-10 16:14:50 +00:00
|
|
|
__email__ = "bob@libreserver.org"
|
2020-04-03 10:11:54 +00:00
|
|
|
__status__ = "Production"
|
2021-06-26 11:16:41 +00:00
|
|
|
__module_group__ = "Moderation"
|
2019-07-14 20:50:27 +00:00
|
|
|
|
|
|
|
import os
|
2024-05-12 12:35:26 +00:00
|
|
|
from utils import data_dir
|
2021-12-26 12:02:29 +00:00
|
|
|
from utils import acct_dir
|
2022-06-10 11:43:33 +00:00
|
|
|
from utils import text_in_file
|
2022-06-21 11:58:50 +00:00
|
|
|
from utils import remove_eol
|
2022-07-09 10:54:05 +00:00
|
|
|
from utils import standardize_text
|
2022-09-25 17:26:11 +00:00
|
|
|
from utils import remove_inverted_text
|
2022-10-05 17:55:24 +00:00
|
|
|
from utils import remove_square_capitals
|
2022-07-09 10:37:33 +00:00
|
|
|
|
|
|
|
|
2021-12-29 21:55:09 +00:00
|
|
|
def add_filter(base_dir: str, nickname: str, domain: str, words: str) -> bool:
|
2022-09-21 20:00:57 +00:00
|
|
|
"""Adds a filter for particular words within the content of a
|
|
|
|
incoming posts
|
2019-07-14 20:50:27 +00:00
|
|
|
"""
|
2022-01-02 12:53:25 +00:00
|
|
|
filters_filename = acct_dir(base_dir, nickname, domain) + '/filters.txt'
|
|
|
|
if os.path.isfile(filters_filename):
|
2022-06-10 13:01:39 +00:00
|
|
|
if text_in_file(words, filters_filename):
|
2019-07-14 20:50:27 +00:00
|
|
|
return False
|
2021-11-26 14:35:26 +00:00
|
|
|
try:
|
2022-06-09 14:46:30 +00:00
|
|
|
with open(filters_filename, 'a+',
|
2024-07-14 13:01:46 +00:00
|
|
|
encoding='utf-8') as fp_filters:
|
|
|
|
fp_filters.write(words + '\n')
|
2021-11-26 14:35:26 +00:00
|
|
|
except OSError:
|
2022-01-02 12:53:25 +00:00
|
|
|
print('EX: unable to append filters ' + filters_filename)
|
2023-05-02 11:47:42 +00:00
|
|
|
return False
|
2019-07-14 20:50:27 +00:00
|
|
|
return True
|
|
|
|
|
2020-04-03 10:11:54 +00:00
|
|
|
|
2021-12-29 21:55:09 +00:00
|
|
|
def add_global_filter(base_dir: str, words: str) -> bool:
|
2020-12-19 11:29:55 +00:00
|
|
|
"""Adds a global filter for particular words within
|
|
|
|
the content of a incoming posts
|
|
|
|
"""
|
2020-12-19 13:10:32 +00:00
|
|
|
if not words:
|
|
|
|
return False
|
|
|
|
if len(words) < 2:
|
|
|
|
return False
|
2024-05-12 12:35:26 +00:00
|
|
|
filters_filename = data_dir(base_dir) + '/filters.txt'
|
2022-01-02 12:53:25 +00:00
|
|
|
if os.path.isfile(filters_filename):
|
2022-06-10 13:01:39 +00:00
|
|
|
if text_in_file(words, filters_filename):
|
2020-12-19 11:29:55 +00:00
|
|
|
return False
|
2021-11-26 14:35:26 +00:00
|
|
|
try:
|
2024-07-14 13:01:46 +00:00
|
|
|
with open(filters_filename, 'a+', encoding='utf-8') as fp_filters:
|
|
|
|
fp_filters.write(words + '\n')
|
2021-11-26 14:35:26 +00:00
|
|
|
except OSError:
|
2022-01-02 12:53:25 +00:00
|
|
|
print('EX: unable to append filters ' + filters_filename)
|
2023-05-02 11:47:42 +00:00
|
|
|
return False
|
2020-12-19 11:29:55 +00:00
|
|
|
return True
|
|
|
|
|
|
|
|
|
2021-12-29 21:55:09 +00:00
|
|
|
def remove_filter(base_dir: str, nickname: str, domain: str,
|
|
|
|
words: str) -> bool:
|
2019-07-14 20:50:27 +00:00
|
|
|
"""Removes a word filter
|
|
|
|
"""
|
2022-01-02 12:53:25 +00:00
|
|
|
filters_filename = acct_dir(base_dir, nickname, domain) + '/filters.txt'
|
|
|
|
if not os.path.isfile(filters_filename):
|
2021-06-07 08:56:08 +00:00
|
|
|
return False
|
2022-06-10 11:43:33 +00:00
|
|
|
if not text_in_file(words, filters_filename):
|
2021-06-07 08:56:08 +00:00
|
|
|
return False
|
2022-01-02 12:53:25 +00:00
|
|
|
new_filters_filename = filters_filename + '.new'
|
2021-11-26 14:35:26 +00:00
|
|
|
try:
|
2022-06-09 14:46:30 +00:00
|
|
|
with open(filters_filename, 'r', encoding='utf-8') as fp_filt:
|
2024-07-16 12:20:58 +00:00
|
|
|
with open(new_filters_filename, 'w+', encoding='utf-8') as fp_new:
|
2022-01-02 12:53:25 +00:00
|
|
|
for line in fp_filt:
|
2022-06-21 11:58:50 +00:00
|
|
|
line = remove_eol(line)
|
2021-11-26 14:35:26 +00:00
|
|
|
if line != words:
|
2024-07-16 12:20:58 +00:00
|
|
|
fp_new.write(line + '\n')
|
2021-12-25 15:28:52 +00:00
|
|
|
except OSError as ex:
|
2022-01-02 12:53:25 +00:00
|
|
|
print('EX: unable to remove filter ' +
|
|
|
|
filters_filename + ' ' + str(ex))
|
2023-05-02 11:47:42 +00:00
|
|
|
return False
|
2022-01-02 12:53:25 +00:00
|
|
|
if os.path.isfile(new_filters_filename):
|
|
|
|
os.rename(new_filters_filename, filters_filename)
|
2021-06-07 08:56:08 +00:00
|
|
|
return True
|
2019-07-14 20:50:27 +00:00
|
|
|
return False
|
2020-02-05 14:57:10 +00:00
|
|
|
|
2020-04-03 10:11:54 +00:00
|
|
|
|
2021-12-29 21:55:09 +00:00
|
|
|
def remove_global_filter(base_dir: str, words: str) -> bool:
|
2020-12-19 11:29:55 +00:00
|
|
|
"""Removes a global word filter
|
|
|
|
"""
|
2024-05-12 12:35:26 +00:00
|
|
|
filters_filename = data_dir(base_dir) + '/filters.txt'
|
2022-01-02 12:53:25 +00:00
|
|
|
if not os.path.isfile(filters_filename):
|
2021-06-07 08:56:08 +00:00
|
|
|
return False
|
2022-06-10 11:43:33 +00:00
|
|
|
if not text_in_file(words, filters_filename):
|
2021-06-07 08:56:08 +00:00
|
|
|
return False
|
2022-01-02 12:53:25 +00:00
|
|
|
new_filters_filename = filters_filename + '.new'
|
2021-11-26 14:35:26 +00:00
|
|
|
try:
|
2022-06-09 14:46:30 +00:00
|
|
|
with open(filters_filename, 'r', encoding='utf-8') as fp_filt:
|
2024-07-16 12:20:58 +00:00
|
|
|
with open(new_filters_filename, 'w+', encoding='utf-8') as fp_new:
|
2022-01-02 12:53:25 +00:00
|
|
|
for line in fp_filt:
|
2022-06-21 11:58:50 +00:00
|
|
|
line = remove_eol(line)
|
2021-11-26 14:35:26 +00:00
|
|
|
if line != words:
|
2024-07-16 12:20:58 +00:00
|
|
|
fp_new.write(line + '\n')
|
2021-12-25 15:28:52 +00:00
|
|
|
except OSError as ex:
|
2021-11-26 14:35:26 +00:00
|
|
|
print('EX: unable to remove global filter ' +
|
2022-01-02 12:53:25 +00:00
|
|
|
filters_filename + ' ' + str(ex))
|
2023-05-02 11:47:42 +00:00
|
|
|
return False
|
2022-01-02 12:53:25 +00:00
|
|
|
if os.path.isfile(new_filters_filename):
|
|
|
|
os.rename(new_filters_filename, filters_filename)
|
2021-06-07 08:56:08 +00:00
|
|
|
return True
|
2020-12-19 11:29:55 +00:00
|
|
|
return False
|
|
|
|
|
|
|
|
|
2021-12-29 21:55:09 +00:00
|
|
|
def _is_twitter_post(content: str) -> bool:
|
2020-02-05 14:57:10 +00:00
|
|
|
"""Returns true if the given post content is a retweet or twitter crosspost
|
|
|
|
"""
|
2022-06-02 13:54:17 +00:00
|
|
|
features = (
|
2024-08-03 10:05:55 +00:00
|
|
|
'/x.com', '/twitter.', '/nitter.',
|
2024-08-03 15:00:28 +00:00
|
|
|
'@twitter.', '@nitter.', '@x.com',
|
2022-07-17 15:56:06 +00:00
|
|
|
'>RT <', '_tw<', '_tw@', 'tweet', 'Tweet', '🐦🔗'
|
2022-06-02 13:54:17 +00:00
|
|
|
)
|
|
|
|
for feat in features:
|
|
|
|
if feat in content:
|
|
|
|
return True
|
2020-02-05 14:57:10 +00:00
|
|
|
return False
|
|
|
|
|
2020-04-03 10:11:54 +00:00
|
|
|
|
2022-09-25 17:26:11 +00:00
|
|
|
def _is_filtered_base(filename: str, content: str,
|
|
|
|
system_language: str) -> bool:
|
2020-12-19 13:21:06 +00:00
|
|
|
"""Uses the given file containing filtered words to check
|
|
|
|
the given content
|
|
|
|
"""
|
2020-12-19 13:23:30 +00:00
|
|
|
if not os.path.isfile(filename):
|
|
|
|
return False
|
|
|
|
|
2022-09-25 17:26:11 +00:00
|
|
|
content = remove_inverted_text(content, system_language)
|
2022-10-05 17:55:24 +00:00
|
|
|
content = remove_square_capitals(content, system_language)
|
2022-09-25 17:26:11 +00:00
|
|
|
|
2022-07-09 10:37:33 +00:00
|
|
|
# convert any fancy characters to ordinary ones
|
|
|
|
content = standardize_text(content)
|
|
|
|
|
2021-11-26 14:35:26 +00:00
|
|
|
try:
|
2022-06-09 14:46:30 +00:00
|
|
|
with open(filename, 'r', encoding='utf-8') as fp_filt:
|
2022-01-02 12:53:25 +00:00
|
|
|
for line in fp_filt:
|
2022-06-21 11:58:50 +00:00
|
|
|
filter_str = remove_eol(line)
|
2022-01-02 12:53:25 +00:00
|
|
|
if not filter_str:
|
2021-11-26 14:35:26 +00:00
|
|
|
continue
|
2022-01-02 12:53:25 +00:00
|
|
|
if len(filter_str) < 2:
|
2021-11-26 14:35:26 +00:00
|
|
|
continue
|
2022-01-02 12:53:25 +00:00
|
|
|
if '+' not in filter_str:
|
|
|
|
if filter_str in content:
|
2021-11-26 14:35:26 +00:00
|
|
|
return True
|
|
|
|
else:
|
2022-01-02 12:53:25 +00:00
|
|
|
filter_words = filter_str.replace('"', '').split('+')
|
|
|
|
for word in filter_words:
|
2021-11-26 14:35:26 +00:00
|
|
|
if word not in content:
|
|
|
|
return False
|
2020-12-19 13:21:06 +00:00
|
|
|
return True
|
2021-12-25 15:28:52 +00:00
|
|
|
except OSError as ex:
|
2021-12-29 21:55:09 +00:00
|
|
|
print('EX: _is_filtered_base ' + filename + ' ' + str(ex))
|
2020-12-19 13:21:06 +00:00
|
|
|
return False
|
|
|
|
|
|
|
|
|
2022-09-25 17:26:11 +00:00
|
|
|
def is_filtered_globally(base_dir: str, content: str,
|
|
|
|
system_language: str) -> bool:
|
2021-07-28 21:28:41 +00:00
|
|
|
"""Is the given content globally filtered?
|
|
|
|
"""
|
2024-05-12 12:35:26 +00:00
|
|
|
global_filters_filename = data_dir(base_dir) + '/filters.txt'
|
2022-09-25 17:26:11 +00:00
|
|
|
if _is_filtered_base(global_filters_filename, content,
|
|
|
|
system_language):
|
2021-07-28 21:28:41 +00:00
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
2021-12-29 21:55:09 +00:00
|
|
|
def is_filtered_bio(base_dir: str,
|
2022-09-25 17:26:11 +00:00
|
|
|
nickname: str, domain: str, bio: str,
|
|
|
|
system_language: str) -> bool:
|
2021-12-14 13:27:00 +00:00
|
|
|
"""Should the given actor bio be filtered out?
|
|
|
|
"""
|
2022-09-25 17:26:11 +00:00
|
|
|
if is_filtered_globally(base_dir, bio, system_language):
|
2021-12-14 13:27:00 +00:00
|
|
|
return True
|
|
|
|
|
|
|
|
if not nickname or not domain:
|
|
|
|
return False
|
|
|
|
|
2022-01-02 12:53:25 +00:00
|
|
|
account_filters_filename = \
|
2021-12-26 12:02:29 +00:00
|
|
|
acct_dir(base_dir, nickname, domain) + '/filters_bio.txt'
|
2022-09-25 17:26:11 +00:00
|
|
|
return _is_filtered_base(account_filters_filename, bio, system_language)
|
2021-12-14 13:27:00 +00:00
|
|
|
|
|
|
|
|
2021-12-29 21:55:09 +00:00
|
|
|
def is_filtered(base_dir: str, nickname: str, domain: str,
|
2022-09-25 17:26:11 +00:00
|
|
|
content: str, system_language: str) -> bool:
|
2019-07-14 20:50:27 +00:00
|
|
|
"""Should the given content be filtered out?
|
|
|
|
This is a simple type of filter which just matches words, not a regex
|
|
|
|
You can add individual words or use word1+word2 to indicate that two
|
|
|
|
words must be present although not necessarily adjacent
|
|
|
|
"""
|
2022-09-25 17:26:11 +00:00
|
|
|
if is_filtered_globally(base_dir, content, system_language):
|
2020-12-19 13:21:06 +00:00
|
|
|
return True
|
2020-12-19 11:41:40 +00:00
|
|
|
|
|
|
|
if not nickname or not domain:
|
|
|
|
return False
|
|
|
|
|
2020-02-05 14:57:10 +00:00
|
|
|
# optionally remove retweets
|
2022-01-02 12:53:25 +00:00
|
|
|
remove_twitter = acct_dir(base_dir, nickname, domain) + '/.removeTwitter'
|
|
|
|
if os.path.isfile(remove_twitter):
|
2021-12-29 21:55:09 +00:00
|
|
|
if _is_twitter_post(content):
|
2020-02-05 14:57:10 +00:00
|
|
|
return True
|
|
|
|
|
2022-01-02 12:53:25 +00:00
|
|
|
account_filters_filename = \
|
2021-12-26 12:02:29 +00:00
|
|
|
acct_dir(base_dir, nickname, domain) + '/filters.txt'
|
2022-09-25 17:26:11 +00:00
|
|
|
return _is_filtered_base(account_filters_filename, content,
|
|
|
|
system_language)
|
2023-02-28 17:38:04 +00:00
|
|
|
|
|
|
|
|
|
|
|
def is_question_filtered(base_dir: str, nickname: str, domain: str,
|
|
|
|
system_language: str, question_json: {}) -> bool:
|
|
|
|
"""is the given question filtered based on its options?
|
|
|
|
"""
|
|
|
|
if question_json.get('oneOf'):
|
|
|
|
question_options = question_json['oneOf']
|
|
|
|
else:
|
|
|
|
question_options = question_json['object']['oneOf']
|
|
|
|
for option in question_options:
|
|
|
|
if option.get('name'):
|
|
|
|
if is_filtered(base_dir, nickname, domain, option['name'],
|
|
|
|
system_language):
|
|
|
|
return True
|
|
|
|
return False
|