epicyon/filters.py

232 lines
7.6 KiB
Python

__filename__ = "filters.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
__version__ = "1.3.0"
__maintainer__ = "Bob Mottram"
__email__ = "bob@libreserver.org"
__status__ = "Production"
__module_group__ = "Moderation"
import os
from utils import acct_dir
from utils import text_in_file
from utils import remove_eol
def _standardize_text_range(text: str,
range_start: int, range_end: int,
offset: str) -> str:
"""Convert any fancy characters within the given range into ordinary ones
"""
offset = ord(offset)
ctr = 0
text = list(text)
while ctr < len(text):
val = ord(text[ctr])
if val in range(range_start, range_end):
text[ctr] = chr(val - range_start + offset)
ctr += 1
return "".join(text)
def standardize_text(text: str) -> str:
"""Converts fancy unicode text to ordinary letters
"""
fancy_ranges = (
119990, 120094, 120198, 120042, 119990, 120146, 119886
)
for range_start in fancy_ranges:
range_end = range_start + 26
text = _standardize_text_range(text, range_start, range_end, 'a')
range_start = range_end
range_end = range_start + 26
text = _standardize_text_range(text, range_start, range_end, 'A')
text = _standardize_text_range(text, 65345, 65345 + 26, 'a')
text = _standardize_text_range(text, 65313, 65313 + 26, 'A')
text = _standardize_text_range(text, 119964, 119964 + 26, 'A')
return text
def add_filter(base_dir: str, nickname: str, domain: str, words: str) -> bool:
"""Adds a filter for particular words within the content of a incoming posts
"""
filters_filename = acct_dir(base_dir, nickname, domain) + '/filters.txt'
if os.path.isfile(filters_filename):
if text_in_file(words, filters_filename):
return False
try:
with open(filters_filename, 'a+',
encoding='utf-8') as filters_file:
filters_file.write(words + '\n')
except OSError:
print('EX: unable to append filters ' + filters_filename)
return True
def add_global_filter(base_dir: str, words: str) -> bool:
"""Adds a global filter for particular words within
the content of a incoming posts
"""
if not words:
return False
if len(words) < 2:
return False
filters_filename = base_dir + '/accounts/filters.txt'
if os.path.isfile(filters_filename):
if text_in_file(words, filters_filename):
return False
try:
with open(filters_filename, 'a+', encoding='utf-8') as filters_file:
filters_file.write(words + '\n')
except OSError:
print('EX: unable to append filters ' + filters_filename)
return True
def remove_filter(base_dir: str, nickname: str, domain: str,
words: str) -> bool:
"""Removes a word filter
"""
filters_filename = acct_dir(base_dir, nickname, domain) + '/filters.txt'
if not os.path.isfile(filters_filename):
return False
if not text_in_file(words, filters_filename):
return False
new_filters_filename = filters_filename + '.new'
try:
with open(filters_filename, 'r', encoding='utf-8') as fp_filt:
with open(new_filters_filename, 'w+', encoding='utf-8') as fpnew:
for line in fp_filt:
line = remove_eol(line)
if line != words:
fpnew.write(line + '\n')
except OSError as ex:
print('EX: unable to remove filter ' +
filters_filename + ' ' + str(ex))
if os.path.isfile(new_filters_filename):
os.rename(new_filters_filename, filters_filename)
return True
return False
def remove_global_filter(base_dir: str, words: str) -> bool:
"""Removes a global word filter
"""
filters_filename = base_dir + '/accounts/filters.txt'
if not os.path.isfile(filters_filename):
return False
if not text_in_file(words, filters_filename):
return False
new_filters_filename = filters_filename + '.new'
try:
with open(filters_filename, 'r', encoding='utf-8') as fp_filt:
with open(new_filters_filename, 'w+', encoding='utf-8') as fpnew:
for line in fp_filt:
line = remove_eol(line)
if line != words:
fpnew.write(line + '\n')
except OSError as ex:
print('EX: unable to remove global filter ' +
filters_filename + ' ' + str(ex))
if os.path.isfile(new_filters_filename):
os.rename(new_filters_filename, filters_filename)
return True
return False
def _is_twitter_post(content: str) -> bool:
"""Returns true if the given post content is a retweet or twitter crosspost
"""
features = (
'/twitter.', '/nitter.', '@twitter.', '@nitter.',
'>RT <', '_tw<', '_tw@', 'tweet', 'Tweet'
)
for feat in features:
if feat in content:
return True
return False
def _is_filtered_base(filename: str, content: str) -> bool:
"""Uses the given file containing filtered words to check
the given content
"""
if not os.path.isfile(filename):
return False
# convert any fancy characters to ordinary ones
content = standardize_text(content)
try:
with open(filename, 'r', encoding='utf-8') as fp_filt:
for line in fp_filt:
filter_str = remove_eol(line)
if not filter_str:
continue
if len(filter_str) < 2:
continue
if '+' not in filter_str:
if filter_str in content:
return True
else:
filter_words = filter_str.replace('"', '').split('+')
for word in filter_words:
if word not in content:
return False
return True
except OSError as ex:
print('EX: _is_filtered_base ' + filename + ' ' + str(ex))
return False
def is_filtered_globally(base_dir: str, content: str) -> bool:
"""Is the given content globally filtered?
"""
global_filters_filename = base_dir + '/accounts/filters.txt'
if _is_filtered_base(global_filters_filename, content):
return True
return False
def is_filtered_bio(base_dir: str,
nickname: str, domain: str, bio: str) -> bool:
"""Should the given actor bio be filtered out?
"""
if is_filtered_globally(base_dir, bio):
return True
if not nickname or not domain:
return False
account_filters_filename = \
acct_dir(base_dir, nickname, domain) + '/filters_bio.txt'
return _is_filtered_base(account_filters_filename, bio)
def is_filtered(base_dir: str, nickname: str, domain: str,
content: str) -> bool:
"""Should the given content be filtered out?
This is a simple type of filter which just matches words, not a regex
You can add individual words or use word1+word2 to indicate that two
words must be present although not necessarily adjacent
"""
if is_filtered_globally(base_dir, content):
return True
if not nickname or not domain:
return False
# optionally remove retweets
remove_twitter = acct_dir(base_dir, nickname, domain) + '/.removeTwitter'
if os.path.isfile(remove_twitter):
if _is_twitter_post(content):
return True
account_filters_filename = \
acct_dir(base_dir, nickname, domain) + '/filters.txt'
return _is_filtered_base(account_filters_filename, content)