mirror of https://gitlab.com/bashrc2/epicyon
Filtering include image descriptions and urls
parent
1708a928ef
commit
ec31deb739
11
inbox.py
11
inbox.py
|
@ -18,6 +18,7 @@ from languages import understood_post_language
|
||||||
from like import update_likes_collection
|
from like import update_likes_collection
|
||||||
from reaction import update_reaction_collection
|
from reaction import update_reaction_collection
|
||||||
from reaction import valid_emoji_content
|
from reaction import valid_emoji_content
|
||||||
|
from utils import get_media_descriptions_from_post
|
||||||
from utils import get_summary_from_post
|
from utils import get_summary_from_post
|
||||||
from utils import delete_cached_html
|
from utils import delete_cached_html
|
||||||
from utils import get_account_timezone
|
from utils import get_account_timezone
|
||||||
|
@ -640,8 +641,11 @@ def save_post_to_inbox_queue(base_dir: str, http_prefix: str,
|
||||||
summary_str = \
|
summary_str = \
|
||||||
get_summary_from_post(post_json_object,
|
get_summary_from_post(post_json_object,
|
||||||
system_language, [])
|
system_language, [])
|
||||||
if is_filtered(base_dir, nickname, domain,
|
media_descriptions = \
|
||||||
summary_str + ' ' + content_str):
|
get_media_descriptions_from_post(post_json_object)
|
||||||
|
content_all = \
|
||||||
|
summary_str + ' ' + content_str + ' ' + media_descriptions
|
||||||
|
if is_filtered(base_dir, nickname, domain, content_all):
|
||||||
if debug:
|
if debug:
|
||||||
print('WARN: post was filtered out due to content')
|
print('WARN: post was filtered out due to content')
|
||||||
return None
|
return None
|
||||||
|
@ -2688,9 +2692,10 @@ def _valid_post_content(base_dir: str, nickname: str, domain: str,
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# check for filtered content
|
# check for filtered content
|
||||||
|
media_descriptions = get_media_descriptions_from_post(message_json)
|
||||||
content_all = content_str
|
content_all = content_str
|
||||||
if summary:
|
if summary:
|
||||||
content_all = summary + ' ' + content_str
|
content_all = summary + ' ' + content_str + ' ' + media_descriptions
|
||||||
if is_filtered(base_dir, nickname, domain, content_all):
|
if is_filtered(base_dir, nickname, domain, content_all):
|
||||||
print('REJECT: content filtered')
|
print('REJECT: content filtered')
|
||||||
return False
|
return False
|
||||||
|
|
6
posts.py
6
posts.py
|
@ -32,6 +32,7 @@ from webfinger import webfinger_handle
|
||||||
from httpsig import create_signed_header
|
from httpsig import create_signed_header
|
||||||
from siteactive import site_is_active
|
from siteactive import site_is_active
|
||||||
from languages import understood_post_language
|
from languages import understood_post_language
|
||||||
|
from utils import get_media_descriptions_from_post
|
||||||
from utils import valid_hash_tag
|
from utils import valid_hash_tag
|
||||||
from utils import get_audio_extensions
|
from utils import get_audio_extensions
|
||||||
from utils import get_summary_from_post
|
from utils import get_summary_from_post
|
||||||
|
@ -5004,9 +5005,12 @@ def download_announce(session, base_dir: str, http_prefix: str,
|
||||||
return None
|
return None
|
||||||
summary_str = \
|
summary_str = \
|
||||||
get_summary_from_post(announced_json, system_language, [])
|
get_summary_from_post(announced_json, system_language, [])
|
||||||
|
media_descriptions = \
|
||||||
|
get_media_descriptions_from_post(announced_json)
|
||||||
content_all = content_str
|
content_all = content_str
|
||||||
if summary_str:
|
if summary_str:
|
||||||
content_all = summary_str + ' ' + content_str
|
content_all = \
|
||||||
|
summary_str + ' ' + content_str + ' ' + media_descriptions
|
||||||
if is_filtered(base_dir, nickname, domain, content_all):
|
if is_filtered(base_dir, nickname, domain, content_all):
|
||||||
print('WARN: announced post has been filtered ' +
|
print('WARN: announced post has been filtered ' +
|
||||||
str(announced_json))
|
str(announced_json))
|
||||||
|
|
19
utils.py
19
utils.py
|
@ -138,6 +138,25 @@ def get_content_from_post(post_json_object: {}, system_language: str,
|
||||||
return content
|
return content
|
||||||
|
|
||||||
|
|
||||||
|
def get_media_descriptions_from_post(post_json_object: {}) -> str:
|
||||||
|
"""Returns all attached media descriptions as a single text.
|
||||||
|
This is used for filtering
|
||||||
|
"""
|
||||||
|
this_post_json = post_json_object
|
||||||
|
if has_object_dict(post_json_object):
|
||||||
|
this_post_json = post_json_object['object']
|
||||||
|
if not this_post_json.get('attachment'):
|
||||||
|
return ''
|
||||||
|
descriptions = ''
|
||||||
|
for attach in this_post_json['attachment']:
|
||||||
|
if not attach.get('name'):
|
||||||
|
continue
|
||||||
|
descriptions += attach['name'] + ' '
|
||||||
|
if attach.get('url'):
|
||||||
|
descriptions += attach['url'] + ' '
|
||||||
|
return descriptions.strip()
|
||||||
|
|
||||||
|
|
||||||
def get_summary_from_post(post_json_object: {}, system_language: str,
|
def get_summary_from_post(post_json_object: {}, system_language: str,
|
||||||
languages_understood: []) -> str:
|
languages_understood: []) -> str:
|
||||||
"""Returns the summary from the post in the given language
|
"""Returns the summary from the post in the given language
|
||||||
|
|
Loading…
Reference in New Issue