Post filtering includes summary

main
Bob Mottram 2022-06-02 18:47:56 +01:00
parent be5360dc79
commit 1708a928ef
3 changed files with 17 additions and 5 deletions

View File

@ -102,7 +102,7 @@ def _is_twitter_post(content: str) -> bool:
"""
features = (
'/twitter.', '/nitter.', '@twitter.', '@nitter.',
'>RT <', '_tw<', '_tw@'
'>RT <', '_tw<', '_tw@', 'tweet', 'Tweet'
)
for feat in features:
if feat in content:

View File

@ -18,6 +18,7 @@ from languages import understood_post_language
from like import update_likes_collection
from reaction import update_reaction_collection
from reaction import valid_emoji_content
from utils import get_summary_from_post
from utils import delete_cached_html
from utils import get_account_timezone
from utils import domain_permitted
@ -636,7 +637,11 @@ def save_post_to_inbox_queue(base_dir: str, http_prefix: str,
content_str = \
get_base_content_from_post(post_json_object, system_language)
if content_str:
if is_filtered(base_dir, nickname, domain, content_str):
summary_str = \
get_summary_from_post(post_json_object,
system_language, [])
if is_filtered(base_dir, nickname, domain,
summary_str + ' ' + content_str):
if debug:
print('WARN: post was filtered out due to content')
return None
@ -2683,7 +2688,10 @@ def _valid_post_content(base_dir: str, nickname: str, domain: str,
return False
# check for filtered content
if is_filtered(base_dir, nickname, domain, content_str):
content_all = content_str
if summary:
content_all = summary + ' ' + content_str
if is_filtered(base_dir, nickname, domain, content_all):
print('REJECT: content filtered')
return False
if message_json['object'].get('inReplyTo'):

View File

@ -5002,8 +5002,12 @@ def download_announce(session, base_dir: str, http_prefix: str,
base_dir, nickname, domain, post_id,
recent_posts_cache)
return None
if is_filtered(base_dir, nickname, domain, content_str):
summary_str = \
get_summary_from_post(announced_json, system_language, [])
content_all = content_str
if summary_str:
content_all = summary_str + ' ' + content_str
if is_filtered(base_dir, nickname, domain, content_all):
print('WARN: announced post has been filtered ' +
str(announced_json))
_reject_announce(announce_filename,