epicyon/filters.py

185 lines
6.1 KiB
Python
Raw Normal View History

2020-04-03 10:11:54 +00:00
__filename__ = "filters.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
2021-01-26 10:07:42 +00:00
__version__ = "1.2.0"
2020-04-03 10:11:54 +00:00
__maintainer__ = "Bob Mottram"
2021-09-10 16:14:50 +00:00
__email__ = "bob@libreserver.org"
2020-04-03 10:11:54 +00:00
__status__ = "Production"
2021-06-26 11:16:41 +00:00
__module_group__ = "Moderation"
2019-07-14 20:50:27 +00:00
import os
2021-12-26 12:02:29 +00:00
from utils import acct_dir
2019-07-14 20:50:27 +00:00
2020-04-03 10:11:54 +00:00
2021-12-29 21:55:09 +00:00
def add_filter(base_dir: str, nickname: str, domain: str, words: str) -> bool:
2019-07-14 20:50:27 +00:00
"""Adds a filter for particular words within the content of a incoming posts
"""
2022-01-02 12:53:25 +00:00
filters_filename = acct_dir(base_dir, nickname, domain) + '/filters.txt'
if os.path.isfile(filters_filename):
if words in open(filters_filename).read():
2019-07-14 20:50:27 +00:00
return False
2021-11-26 14:35:26 +00:00
try:
2022-01-02 12:53:25 +00:00
with open(filters_filename, 'a+') as filters_file:
filters_file.write(words + '\n')
2021-11-26 14:35:26 +00:00
except OSError:
2022-01-02 12:53:25 +00:00
print('EX: unable to append filters ' + filters_filename)
2019-07-14 20:50:27 +00:00
return True
2020-04-03 10:11:54 +00:00
2021-12-29 21:55:09 +00:00
def add_global_filter(base_dir: str, words: str) -> bool:
"""Adds a global filter for particular words within
the content of a incoming posts
"""
2020-12-19 13:10:32 +00:00
if not words:
return False
if len(words) < 2:
return False
2022-01-02 12:53:25 +00:00
filters_filename = base_dir + '/accounts/filters.txt'
if os.path.isfile(filters_filename):
if words in open(filters_filename).read():
return False
2021-11-26 14:35:26 +00:00
try:
2022-01-02 12:53:25 +00:00
with open(filters_filename, 'a+') as filters_file:
filters_file.write(words + '\n')
2021-11-26 14:35:26 +00:00
except OSError:
2022-01-02 12:53:25 +00:00
print('EX: unable to append filters ' + filters_filename)
return True
2021-12-29 21:55:09 +00:00
def remove_filter(base_dir: str, nickname: str, domain: str,
words: str) -> bool:
2019-07-14 20:50:27 +00:00
"""Removes a word filter
"""
2022-01-02 12:53:25 +00:00
filters_filename = acct_dir(base_dir, nickname, domain) + '/filters.txt'
if not os.path.isfile(filters_filename):
2021-06-07 08:56:08 +00:00
return False
2022-01-02 12:53:25 +00:00
if words not in open(filters_filename).read():
2021-06-07 08:56:08 +00:00
return False
2022-01-02 12:53:25 +00:00
new_filters_filename = filters_filename + '.new'
2021-11-26 14:35:26 +00:00
try:
2022-01-02 12:53:25 +00:00
with open(filters_filename, 'r') as fp_filt:
with open(new_filters_filename, 'w+') as fpnew:
for line in fp_filt:
2021-11-26 14:35:26 +00:00
line = line.replace('\n', '')
if line != words:
fpnew.write(line + '\n')
2021-12-25 15:28:52 +00:00
except OSError as ex:
2022-01-02 12:53:25 +00:00
print('EX: unable to remove filter ' +
filters_filename + ' ' + str(ex))
if os.path.isfile(new_filters_filename):
os.rename(new_filters_filename, filters_filename)
2021-06-07 08:56:08 +00:00
return True
2019-07-14 20:50:27 +00:00
return False
2020-02-05 14:57:10 +00:00
2020-04-03 10:11:54 +00:00
2021-12-29 21:55:09 +00:00
def remove_global_filter(base_dir: str, words: str) -> bool:
"""Removes a global word filter
"""
2022-01-02 12:53:25 +00:00
filters_filename = base_dir + '/accounts/filters.txt'
if not os.path.isfile(filters_filename):
2021-06-07 08:56:08 +00:00
return False
2022-01-02 12:53:25 +00:00
if words not in open(filters_filename).read():
2021-06-07 08:56:08 +00:00
return False
2022-01-02 12:53:25 +00:00
new_filters_filename = filters_filename + '.new'
2021-11-26 14:35:26 +00:00
try:
2022-01-02 12:53:25 +00:00
with open(filters_filename, 'r') as fp_filt:
with open(new_filters_filename, 'w+') as fpnew:
for line in fp_filt:
2021-11-26 14:35:26 +00:00
line = line.replace('\n', '')
if line != words:
fpnew.write(line + '\n')
2021-12-25 15:28:52 +00:00
except OSError as ex:
2021-11-26 14:35:26 +00:00
print('EX: unable to remove global filter ' +
2022-01-02 12:53:25 +00:00
filters_filename + ' ' + str(ex))
if os.path.isfile(new_filters_filename):
os.rename(new_filters_filename, filters_filename)
2021-06-07 08:56:08 +00:00
return True
return False
2021-12-29 21:55:09 +00:00
def _is_twitter_post(content: str) -> bool:
2020-02-05 14:57:10 +00:00
"""Returns true if the given post content is a retweet or twitter crosspost
"""
2020-02-05 16:56:45 +00:00
if '/twitter.' in content or '@twitter.' in content:
2020-02-05 14:57:10 +00:00
return True
2022-01-02 12:53:25 +00:00
if '>RT <' in content:
2020-02-05 14:57:10 +00:00
return True
return False
2020-04-03 10:11:54 +00:00
2021-12-29 21:55:09 +00:00
def _is_filtered_base(filename: str, content: str) -> bool:
"""Uses the given file containing filtered words to check
the given content
"""
2020-12-19 13:23:30 +00:00
if not os.path.isfile(filename):
return False
2021-11-26 14:35:26 +00:00
try:
2022-01-02 12:53:25 +00:00
with open(filename, 'r') as fp_filt:
for line in fp_filt:
filter_str = line.replace('\n', '').replace('\r', '')
if not filter_str:
2021-11-26 14:35:26 +00:00
continue
2022-01-02 12:53:25 +00:00
if len(filter_str) < 2:
2021-11-26 14:35:26 +00:00
continue
2022-01-02 12:53:25 +00:00
if '+' not in filter_str:
if filter_str in content:
2021-11-26 14:35:26 +00:00
return True
else:
2022-01-02 12:53:25 +00:00
filter_words = filter_str.replace('"', '').split('+')
for word in filter_words:
2021-11-26 14:35:26 +00:00
if word not in content:
return False
return True
2021-12-25 15:28:52 +00:00
except OSError as ex:
2021-12-29 21:55:09 +00:00
print('EX: _is_filtered_base ' + filename + ' ' + str(ex))
return False
2021-12-29 21:55:09 +00:00
def is_filtered_globally(base_dir: str, content: str) -> bool:
2021-07-28 21:28:41 +00:00
"""Is the given content globally filtered?
"""
2022-01-02 12:53:25 +00:00
global_filters_filename = base_dir + '/accounts/filters.txt'
if _is_filtered_base(global_filters_filename, content):
2021-07-28 21:28:41 +00:00
return True
return False
2021-12-29 21:55:09 +00:00
def is_filtered_bio(base_dir: str,
nickname: str, domain: str, bio: str) -> bool:
"""Should the given actor bio be filtered out?
"""
2021-12-29 21:55:09 +00:00
if is_filtered_globally(base_dir, bio):
return True
if not nickname or not domain:
return False
2022-01-02 12:53:25 +00:00
account_filters_filename = \
2021-12-26 12:02:29 +00:00
acct_dir(base_dir, nickname, domain) + '/filters_bio.txt'
2022-01-02 12:53:25 +00:00
return _is_filtered_base(account_filters_filename, bio)
2021-12-29 21:55:09 +00:00
def is_filtered(base_dir: str, nickname: str, domain: str,
content: str) -> bool:
2019-07-14 20:50:27 +00:00
"""Should the given content be filtered out?
This is a simple type of filter which just matches words, not a regex
You can add individual words or use word1+word2 to indicate that two
words must be present although not necessarily adjacent
"""
2021-12-29 21:55:09 +00:00
if is_filtered_globally(base_dir, content):
return True
2020-12-19 11:41:40 +00:00
if not nickname or not domain:
return False
2020-02-05 14:57:10 +00:00
# optionally remove retweets
2022-01-02 12:53:25 +00:00
remove_twitter = acct_dir(base_dir, nickname, domain) + '/.removeTwitter'
if os.path.isfile(remove_twitter):
2021-12-29 21:55:09 +00:00
if _is_twitter_post(content):
2020-02-05 14:57:10 +00:00
return True
2022-01-02 12:53:25 +00:00
account_filters_filename = \
2021-12-26 12:02:29 +00:00
acct_dir(base_dir, nickname, domain) + '/filters.txt'
2022-01-02 12:53:25 +00:00
return _is_filtered_base(account_filters_filename, content)