mirror of https://gitlab.com/bashrc2/epicyon
Filtering include image descriptions and urls
parent
1708a928ef
commit
ec31deb739
11
inbox.py
11
inbox.py
|
@ -18,6 +18,7 @@ from languages import understood_post_language
|
|||
from like import update_likes_collection
|
||||
from reaction import update_reaction_collection
|
||||
from reaction import valid_emoji_content
|
||||
from utils import get_media_descriptions_from_post
|
||||
from utils import get_summary_from_post
|
||||
from utils import delete_cached_html
|
||||
from utils import get_account_timezone
|
||||
|
@ -640,8 +641,11 @@ def save_post_to_inbox_queue(base_dir: str, http_prefix: str,
|
|||
summary_str = \
|
||||
get_summary_from_post(post_json_object,
|
||||
system_language, [])
|
||||
if is_filtered(base_dir, nickname, domain,
|
||||
summary_str + ' ' + content_str):
|
||||
media_descriptions = \
|
||||
get_media_descriptions_from_post(post_json_object)
|
||||
content_all = \
|
||||
summary_str + ' ' + content_str + ' ' + media_descriptions
|
||||
if is_filtered(base_dir, nickname, domain, content_all):
|
||||
if debug:
|
||||
print('WARN: post was filtered out due to content')
|
||||
return None
|
||||
|
@ -2688,9 +2692,10 @@ def _valid_post_content(base_dir: str, nickname: str, domain: str,
|
|||
return False
|
||||
|
||||
# check for filtered content
|
||||
media_descriptions = get_media_descriptions_from_post(message_json)
|
||||
content_all = content_str
|
||||
if summary:
|
||||
content_all = summary + ' ' + content_str
|
||||
content_all = summary + ' ' + content_str + ' ' + media_descriptions
|
||||
if is_filtered(base_dir, nickname, domain, content_all):
|
||||
print('REJECT: content filtered')
|
||||
return False
|
||||
|
|
6
posts.py
6
posts.py
|
@ -32,6 +32,7 @@ from webfinger import webfinger_handle
|
|||
from httpsig import create_signed_header
|
||||
from siteactive import site_is_active
|
||||
from languages import understood_post_language
|
||||
from utils import get_media_descriptions_from_post
|
||||
from utils import valid_hash_tag
|
||||
from utils import get_audio_extensions
|
||||
from utils import get_summary_from_post
|
||||
|
@ -5004,9 +5005,12 @@ def download_announce(session, base_dir: str, http_prefix: str,
|
|||
return None
|
||||
summary_str = \
|
||||
get_summary_from_post(announced_json, system_language, [])
|
||||
media_descriptions = \
|
||||
get_media_descriptions_from_post(announced_json)
|
||||
content_all = content_str
|
||||
if summary_str:
|
||||
content_all = summary_str + ' ' + content_str
|
||||
content_all = \
|
||||
summary_str + ' ' + content_str + ' ' + media_descriptions
|
||||
if is_filtered(base_dir, nickname, domain, content_all):
|
||||
print('WARN: announced post has been filtered ' +
|
||||
str(announced_json))
|
||||
|
|
19
utils.py
19
utils.py
|
@ -138,6 +138,25 @@ def get_content_from_post(post_json_object: {}, system_language: str,
|
|||
return content
|
||||
|
||||
|
||||
def get_media_descriptions_from_post(post_json_object: {}) -> str:
|
||||
"""Returns all attached media descriptions as a single text.
|
||||
This is used for filtering
|
||||
"""
|
||||
this_post_json = post_json_object
|
||||
if has_object_dict(post_json_object):
|
||||
this_post_json = post_json_object['object']
|
||||
if not this_post_json.get('attachment'):
|
||||
return ''
|
||||
descriptions = ''
|
||||
for attach in this_post_json['attachment']:
|
||||
if not attach.get('name'):
|
||||
continue
|
||||
descriptions += attach['name'] + ' '
|
||||
if attach.get('url'):
|
||||
descriptions += attach['url'] + ' '
|
||||
return descriptions.strip()
|
||||
|
||||
|
||||
def get_summary_from_post(post_json_object: {}, system_language: str,
|
||||
languages_understood: []) -> str:
|
||||
"""Returns the summary from the post in the given language
|
||||
|
|
Loading…
Reference in New Issue