Post filtering includes summary

main
Bob Mottram 2022-06-02 18:47:56 +01:00
parent be5360dc79
commit 1708a928ef
3 changed files with 17 additions and 5 deletions

View File

@ -102,7 +102,7 @@ def _is_twitter_post(content: str) -> bool:
""" """
features = ( features = (
'/twitter.', '/nitter.', '@twitter.', '@nitter.', '/twitter.', '/nitter.', '@twitter.', '@nitter.',
'>RT <', '_tw<', '_tw@' '>RT <', '_tw<', '_tw@', 'tweet', 'Tweet'
) )
for feat in features: for feat in features:
if feat in content: if feat in content:

View File

@ -18,6 +18,7 @@ from languages import understood_post_language
from like import update_likes_collection from like import update_likes_collection
from reaction import update_reaction_collection from reaction import update_reaction_collection
from reaction import valid_emoji_content from reaction import valid_emoji_content
from utils import get_summary_from_post
from utils import delete_cached_html from utils import delete_cached_html
from utils import get_account_timezone from utils import get_account_timezone
from utils import domain_permitted from utils import domain_permitted
@ -636,7 +637,11 @@ def save_post_to_inbox_queue(base_dir: str, http_prefix: str,
content_str = \ content_str = \
get_base_content_from_post(post_json_object, system_language) get_base_content_from_post(post_json_object, system_language)
if content_str: if content_str:
if is_filtered(base_dir, nickname, domain, content_str): summary_str = \
get_summary_from_post(post_json_object,
system_language, [])
if is_filtered(base_dir, nickname, domain,
summary_str + ' ' + content_str):
if debug: if debug:
print('WARN: post was filtered out due to content') print('WARN: post was filtered out due to content')
return None return None
@ -2683,7 +2688,10 @@ def _valid_post_content(base_dir: str, nickname: str, domain: str,
return False return False
# check for filtered content # check for filtered content
if is_filtered(base_dir, nickname, domain, content_str): content_all = content_str
if summary:
content_all = summary + ' ' + content_str
if is_filtered(base_dir, nickname, domain, content_all):
print('REJECT: content filtered') print('REJECT: content filtered')
return False return False
if message_json['object'].get('inReplyTo'): if message_json['object'].get('inReplyTo'):

View File

@ -5002,8 +5002,12 @@ def download_announce(session, base_dir: str, http_prefix: str,
base_dir, nickname, domain, post_id, base_dir, nickname, domain, post_id,
recent_posts_cache) recent_posts_cache)
return None return None
summary_str = \
if is_filtered(base_dir, nickname, domain, content_str): get_summary_from_post(announced_json, system_language, [])
content_all = content_str
if summary_str:
content_all = summary_str + ' ' + content_str
if is_filtered(base_dir, nickname, domain, content_all):
print('WARN: announced post has been filtered ' + print('WARN: announced post has been filtered ' +
str(announced_json)) str(announced_json))
_reject_announce(announce_filename, _reject_announce(announce_filename,