diff --git a/inbox.py b/inbox.py index f2f4b4761..1ed30a05f 100644 --- a/inbox.py +++ b/inbox.py @@ -18,6 +18,7 @@ from languages import understood_post_language from like import update_likes_collection from reaction import update_reaction_collection from reaction import valid_emoji_content +from utils import get_media_descriptions_from_post from utils import get_summary_from_post from utils import delete_cached_html from utils import get_account_timezone @@ -640,8 +641,11 @@ def save_post_to_inbox_queue(base_dir: str, http_prefix: str, summary_str = \ get_summary_from_post(post_json_object, system_language, []) - if is_filtered(base_dir, nickname, domain, - summary_str + ' ' + content_str): + media_descriptions = \ + get_media_descriptions_from_post(post_json_object) + content_all = \ + summary_str + ' ' + content_str + ' ' + media_descriptions + if is_filtered(base_dir, nickname, domain, content_all): if debug: print('WARN: post was filtered out due to content') return None @@ -2688,9 +2692,10 @@ def _valid_post_content(base_dir: str, nickname: str, domain: str, return False # check for filtered content + media_descriptions = get_media_descriptions_from_post(message_json) content_all = content_str if summary: - content_all = summary + ' ' + content_str + content_all = summary + ' ' + content_str + ' ' + media_descriptions if is_filtered(base_dir, nickname, domain, content_all): print('REJECT: content filtered') return False diff --git a/posts.py b/posts.py index 64cb9183f..733a34c8b 100644 --- a/posts.py +++ b/posts.py @@ -32,6 +32,7 @@ from webfinger import webfinger_handle from httpsig import create_signed_header from siteactive import site_is_active from languages import understood_post_language +from utils import get_media_descriptions_from_post from utils import valid_hash_tag from utils import get_audio_extensions from utils import get_summary_from_post @@ -5004,9 +5005,12 @@ def download_announce(session, base_dir: str, http_prefix: str, return None summary_str = \ get_summary_from_post(announced_json, system_language, []) + media_descriptions = \ + get_media_descriptions_from_post(announced_json) content_all = content_str if summary_str: - content_all = summary_str + ' ' + content_str + content_all = \ + summary_str + ' ' + content_str + ' ' + media_descriptions if is_filtered(base_dir, nickname, domain, content_all): print('WARN: announced post has been filtered ' + str(announced_json)) diff --git a/utils.py b/utils.py index e9e985d13..b741cb520 100644 --- a/utils.py +++ b/utils.py @@ -138,6 +138,25 @@ def get_content_from_post(post_json_object: {}, system_language: str, return content +def get_media_descriptions_from_post(post_json_object: {}) -> str: + """Returns all attached media descriptions as a single text. + This is used for filtering + """ + this_post_json = post_json_object + if has_object_dict(post_json_object): + this_post_json = post_json_object['object'] + if not this_post_json.get('attachment'): + return '' + descriptions = '' + for attach in this_post_json['attachment']: + if not attach.get('name'): + continue + descriptions += attach['name'] + ' ' + if attach.get('url'): + descriptions += attach['url'] + ' ' + return descriptions.strip() + + def get_summary_from_post(post_json_object: {}, system_language: str, languages_understood: []) -> str: """Returns the summary from the post in the given language