mirror of https://gitlab.com/bashrc2/epicyon
Merge branch 'main' of gitlab.com:bashrc2/epicyon
commit
8f9962ec71
11
filters.py
11
filters.py
|
@ -100,10 +100,13 @@ def remove_global_filter(base_dir: str, words: str) -> bool:
|
|||
def _is_twitter_post(content: str) -> bool:
|
||||
"""Returns true if the given post content is a retweet or twitter crosspost
|
||||
"""
|
||||
if '/twitter.' in content or '@twitter.' in content:
|
||||
return True
|
||||
if '>RT <' in content:
|
||||
return True
|
||||
features = (
|
||||
'/twitter.', '/nitter.', '@twitter.', '@nitter.',
|
||||
'>RT <', '_tw<', '_tw@', 'tweet', 'Tweet'
|
||||
)
|
||||
for feat in features:
|
||||
if feat in content:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
|
|
17
inbox.py
17
inbox.py
|
@ -18,6 +18,8 @@ from languages import understood_post_language
|
|||
from like import update_likes_collection
|
||||
from reaction import update_reaction_collection
|
||||
from reaction import valid_emoji_content
|
||||
from utils import get_media_descriptions_from_post
|
||||
from utils import get_summary_from_post
|
||||
from utils import delete_cached_html
|
||||
from utils import get_account_timezone
|
||||
from utils import domain_permitted
|
||||
|
@ -636,7 +638,14 @@ def save_post_to_inbox_queue(base_dir: str, http_prefix: str,
|
|||
content_str = \
|
||||
get_base_content_from_post(post_json_object, system_language)
|
||||
if content_str:
|
||||
if is_filtered(base_dir, nickname, domain, content_str):
|
||||
summary_str = \
|
||||
get_summary_from_post(post_json_object,
|
||||
system_language, [])
|
||||
media_descriptions = \
|
||||
get_media_descriptions_from_post(post_json_object)
|
||||
content_all = \
|
||||
summary_str + ' ' + content_str + ' ' + media_descriptions
|
||||
if is_filtered(base_dir, nickname, domain, content_all):
|
||||
if debug:
|
||||
print('WARN: post was filtered out due to content')
|
||||
return None
|
||||
|
@ -2683,7 +2692,11 @@ def _valid_post_content(base_dir: str, nickname: str, domain: str,
|
|||
return False
|
||||
|
||||
# check for filtered content
|
||||
if is_filtered(base_dir, nickname, domain, content_str):
|
||||
media_descriptions = get_media_descriptions_from_post(message_json)
|
||||
content_all = content_str
|
||||
if summary:
|
||||
content_all = summary + ' ' + content_str + ' ' + media_descriptions
|
||||
if is_filtered(base_dir, nickname, domain, content_all):
|
||||
print('REJECT: content filtered')
|
||||
return False
|
||||
if message_json['object'].get('inReplyTo'):
|
||||
|
|
12
posts.py
12
posts.py
|
@ -32,6 +32,7 @@ from webfinger import webfinger_handle
|
|||
from httpsig import create_signed_header
|
||||
from siteactive import site_is_active
|
||||
from languages import understood_post_language
|
||||
from utils import get_media_descriptions_from_post
|
||||
from utils import valid_hash_tag
|
||||
from utils import get_audio_extensions
|
||||
from utils import get_summary_from_post
|
||||
|
@ -5002,8 +5003,15 @@ def download_announce(session, base_dir: str, http_prefix: str,
|
|||
base_dir, nickname, domain, post_id,
|
||||
recent_posts_cache)
|
||||
return None
|
||||
|
||||
if is_filtered(base_dir, nickname, domain, content_str):
|
||||
summary_str = \
|
||||
get_summary_from_post(announced_json, system_language, [])
|
||||
media_descriptions = \
|
||||
get_media_descriptions_from_post(announced_json)
|
||||
content_all = content_str
|
||||
if summary_str:
|
||||
content_all = \
|
||||
summary_str + ' ' + content_str + ' ' + media_descriptions
|
||||
if is_filtered(base_dir, nickname, domain, content_all):
|
||||
print('WARN: announced post has been filtered ' +
|
||||
str(announced_json))
|
||||
_reject_announce(announce_filename,
|
||||
|
|
19
utils.py
19
utils.py
|
@ -138,6 +138,25 @@ def get_content_from_post(post_json_object: {}, system_language: str,
|
|||
return content
|
||||
|
||||
|
||||
def get_media_descriptions_from_post(post_json_object: {}) -> str:
|
||||
"""Returns all attached media descriptions as a single text.
|
||||
This is used for filtering
|
||||
"""
|
||||
this_post_json = post_json_object
|
||||
if has_object_dict(post_json_object):
|
||||
this_post_json = post_json_object['object']
|
||||
if not this_post_json.get('attachment'):
|
||||
return ''
|
||||
descriptions = ''
|
||||
for attach in this_post_json['attachment']:
|
||||
if not attach.get('name'):
|
||||
continue
|
||||
descriptions += attach['name'] + ' '
|
||||
if attach.get('url'):
|
||||
descriptions += attach['url'] + ' '
|
||||
return descriptions.strip()
|
||||
|
||||
|
||||
def get_summary_from_post(post_json_object: {}, system_language: str,
|
||||
languages_understood: []) -> str:
|
||||
"""Returns the summary from the post in the given language
|
||||
|
|
Loading…
Reference in New Issue