mirror of https://gitlab.com/bashrc2/epicyon
Define function first
parent
171808f7ee
commit
1acf4987ac
322
inbox.py
322
inbox.py
|
@ -1282,6 +1282,167 @@ def _receive_update_to_question(recent_posts_cache: {}, message_json: {},
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def _valid_post_content(base_dir: str, nickname: str, domain: str,
|
||||||
|
message_json: {}, max_mentions: int, max_emoji: int,
|
||||||
|
allow_local_network_access: bool, debug: bool,
|
||||||
|
system_language: str,
|
||||||
|
http_prefix: str, domain_full: str,
|
||||||
|
person_cache: {},
|
||||||
|
max_hashtags: int) -> bool:
|
||||||
|
"""Is the content of a received post valid?
|
||||||
|
Check for bad html
|
||||||
|
Check for hellthreads
|
||||||
|
Check that the language is understood
|
||||||
|
Check if it's a git patch
|
||||||
|
Check number of tags and mentions is reasonable
|
||||||
|
"""
|
||||||
|
if not has_object_dict(message_json):
|
||||||
|
return True
|
||||||
|
if 'content' not in message_json['object']:
|
||||||
|
return True
|
||||||
|
|
||||||
|
if not message_json['object'].get('published'):
|
||||||
|
return False
|
||||||
|
published = message_json['object']['published']
|
||||||
|
if 'T' not in published:
|
||||||
|
return False
|
||||||
|
if 'Z' not in published:
|
||||||
|
print('REJECT inbox post does not use Zulu time format. ' +
|
||||||
|
published)
|
||||||
|
return False
|
||||||
|
if '.' in published:
|
||||||
|
# converts 2022-03-30T17:37:58.734Z into 2022-03-30T17:37:58Z
|
||||||
|
published = published.split('.')[0] + 'Z'
|
||||||
|
message_json['object']['published'] = published
|
||||||
|
if not valid_post_date(published, 90, debug):
|
||||||
|
return False
|
||||||
|
|
||||||
|
# if the post has been edited then check its edit date
|
||||||
|
if message_json['object'].get('updated'):
|
||||||
|
published_update = message_json['object']['updated']
|
||||||
|
if 'T' not in published_update:
|
||||||
|
return False
|
||||||
|
if 'Z' not in published_update:
|
||||||
|
return False
|
||||||
|
if '.' in published_update:
|
||||||
|
# converts 2022-03-30T17:37:58.734Z into 2022-03-30T17:37:58Z
|
||||||
|
published_update = published_update.split('.')[0] + 'Z'
|
||||||
|
message_json['object']['updated'] = published_update
|
||||||
|
if not valid_post_date(published_update, 90, debug):
|
||||||
|
return False
|
||||||
|
|
||||||
|
summary = None
|
||||||
|
if message_json['object'].get('summary'):
|
||||||
|
summary = message_json['object']['summary']
|
||||||
|
if not isinstance(summary, str):
|
||||||
|
print('WARN: content warning is not a string')
|
||||||
|
return False
|
||||||
|
if summary != valid_content_warning(summary):
|
||||||
|
print('WARN: invalid content warning ' + summary)
|
||||||
|
return False
|
||||||
|
if dangerous_markup(summary, allow_local_network_access):
|
||||||
|
if message_json['object'].get('id'):
|
||||||
|
print('REJECT ARBITRARY HTML: ' + message_json['object']['id'])
|
||||||
|
print('REJECT ARBITRARY HTML: bad string in summary - ' +
|
||||||
|
summary)
|
||||||
|
return False
|
||||||
|
|
||||||
|
# check for patches before dangeousMarkup, which excludes code
|
||||||
|
if is_git_patch(base_dir, nickname, domain,
|
||||||
|
message_json['object']['type'],
|
||||||
|
summary,
|
||||||
|
message_json['object']['content']):
|
||||||
|
return True
|
||||||
|
|
||||||
|
if is_question(message_json):
|
||||||
|
if is_question_filtered(base_dir, nickname, domain,
|
||||||
|
system_language, message_json):
|
||||||
|
print('REJECT: incoming question options filter')
|
||||||
|
return False
|
||||||
|
if dangerous_question(message_json, allow_local_network_access):
|
||||||
|
print('REJECT: incoming question markup filter')
|
||||||
|
return False
|
||||||
|
|
||||||
|
content_str = get_base_content_from_post(message_json, system_language)
|
||||||
|
if dangerous_markup(content_str, allow_local_network_access):
|
||||||
|
if message_json['object'].get('id'):
|
||||||
|
print('REJECT ARBITRARY HTML: ' + message_json['object']['id'])
|
||||||
|
print('REJECT ARBITRARY HTML: bad string in post - ' +
|
||||||
|
content_str)
|
||||||
|
return False
|
||||||
|
|
||||||
|
# check (rough) number of mentions
|
||||||
|
mentions_est = _estimate_number_of_mentions(content_str)
|
||||||
|
if mentions_est > max_mentions:
|
||||||
|
if message_json['object'].get('id'):
|
||||||
|
print('REJECT HELLTHREAD: ' + message_json['object']['id'])
|
||||||
|
print('REJECT HELLTHREAD: Too many mentions in post - ' +
|
||||||
|
content_str)
|
||||||
|
return False
|
||||||
|
if _estimate_number_of_emoji(content_str) > max_emoji:
|
||||||
|
if message_json['object'].get('id'):
|
||||||
|
print('REJECT EMOJI OVERLOAD: ' + message_json['object']['id'])
|
||||||
|
print('REJECT EMOJI OVERLOAD: Too many emoji in post - ' +
|
||||||
|
content_str)
|
||||||
|
return False
|
||||||
|
if _estimate_number_of_hashtags(content_str) > max_hashtags:
|
||||||
|
if message_json['object'].get('id'):
|
||||||
|
print('REJECT HASHTAG OVERLOAD: ' + message_json['object']['id'])
|
||||||
|
print('REJECT HASHTAG OVERLOAD: Too many hashtags in post - ' +
|
||||||
|
content_str)
|
||||||
|
return False
|
||||||
|
# check number of tags
|
||||||
|
if message_json['object'].get('tag'):
|
||||||
|
if not isinstance(message_json['object']['tag'], list):
|
||||||
|
message_json['object']['tag'] = []
|
||||||
|
else:
|
||||||
|
if len(message_json['object']['tag']) > int(max_mentions * 2):
|
||||||
|
if message_json['object'].get('id'):
|
||||||
|
print('REJECT: ' + message_json['object']['id'])
|
||||||
|
print('REJECT: Too many tags in post - ' +
|
||||||
|
message_json['object']['tag'])
|
||||||
|
return False
|
||||||
|
# check that the post is in a language suitable for this account
|
||||||
|
if not understood_post_language(base_dir, nickname,
|
||||||
|
message_json, system_language,
|
||||||
|
http_prefix, domain_full,
|
||||||
|
person_cache):
|
||||||
|
return False
|
||||||
|
|
||||||
|
# check for urls which are too long
|
||||||
|
if not valid_url_lengths(content_str, 2048):
|
||||||
|
print('REJECT: url within content too long')
|
||||||
|
return False
|
||||||
|
|
||||||
|
# check for filtered content
|
||||||
|
media_descriptions = get_media_descriptions_from_post(message_json)
|
||||||
|
content_all = content_str
|
||||||
|
if summary:
|
||||||
|
content_all = summary + ' ' + content_str + ' ' + media_descriptions
|
||||||
|
if is_filtered(base_dir, nickname, domain, content_all,
|
||||||
|
system_language):
|
||||||
|
print('REJECT: content filtered')
|
||||||
|
return False
|
||||||
|
if message_json['object'].get('inReplyTo'):
|
||||||
|
if isinstance(message_json['object']['inReplyTo'], str):
|
||||||
|
original_post_id = message_json['object']['inReplyTo']
|
||||||
|
post_post_filename = locate_post(base_dir, nickname, domain,
|
||||||
|
original_post_id)
|
||||||
|
if post_post_filename:
|
||||||
|
if not _post_allow_comments(post_post_filename):
|
||||||
|
print('REJECT: reply to post which does not ' +
|
||||||
|
'allow comments: ' + original_post_id)
|
||||||
|
return False
|
||||||
|
if invalid_ciphertext(message_json['object']['content']):
|
||||||
|
print('REJECT: malformed ciphertext in content ' +
|
||||||
|
message_json['object']['id'] + ' ' +
|
||||||
|
message_json['object']['content'])
|
||||||
|
return False
|
||||||
|
if debug:
|
||||||
|
print('ACCEPT: post content is valid')
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
def receive_edit_to_post(recent_posts_cache: {}, message_json: {},
|
def receive_edit_to_post(recent_posts_cache: {}, message_json: {},
|
||||||
base_dir: str,
|
base_dir: str,
|
||||||
nickname: str, domain: str,
|
nickname: str, domain: str,
|
||||||
|
@ -3066,167 +3227,6 @@ def _estimate_number_of_hashtags(content: str) -> int:
|
||||||
return content.count('>#<')
|
return content.count('>#<')
|
||||||
|
|
||||||
|
|
||||||
def _valid_post_content(base_dir: str, nickname: str, domain: str,
|
|
||||||
message_json: {}, max_mentions: int, max_emoji: int,
|
|
||||||
allow_local_network_access: bool, debug: bool,
|
|
||||||
system_language: str,
|
|
||||||
http_prefix: str, domain_full: str,
|
|
||||||
person_cache: {},
|
|
||||||
max_hashtags: int) -> bool:
|
|
||||||
"""Is the content of a received post valid?
|
|
||||||
Check for bad html
|
|
||||||
Check for hellthreads
|
|
||||||
Check that the language is understood
|
|
||||||
Check if it's a git patch
|
|
||||||
Check number of tags and mentions is reasonable
|
|
||||||
"""
|
|
||||||
if not has_object_dict(message_json):
|
|
||||||
return True
|
|
||||||
if 'content' not in message_json['object']:
|
|
||||||
return True
|
|
||||||
|
|
||||||
if not message_json['object'].get('published'):
|
|
||||||
return False
|
|
||||||
published = message_json['object']['published']
|
|
||||||
if 'T' not in published:
|
|
||||||
return False
|
|
||||||
if 'Z' not in published:
|
|
||||||
print('REJECT inbox post does not use Zulu time format. ' +
|
|
||||||
published)
|
|
||||||
return False
|
|
||||||
if '.' in published:
|
|
||||||
# converts 2022-03-30T17:37:58.734Z into 2022-03-30T17:37:58Z
|
|
||||||
published = published.split('.')[0] + 'Z'
|
|
||||||
message_json['object']['published'] = published
|
|
||||||
if not valid_post_date(published, 90, debug):
|
|
||||||
return False
|
|
||||||
|
|
||||||
# if the post has been edited then check its edit date
|
|
||||||
if message_json['object'].get('updated'):
|
|
||||||
published_update = message_json['object']['updated']
|
|
||||||
if 'T' not in published_update:
|
|
||||||
return False
|
|
||||||
if 'Z' not in published_update:
|
|
||||||
return False
|
|
||||||
if '.' in published_update:
|
|
||||||
# converts 2022-03-30T17:37:58.734Z into 2022-03-30T17:37:58Z
|
|
||||||
published_update = published_update.split('.')[0] + 'Z'
|
|
||||||
message_json['object']['updated'] = published_update
|
|
||||||
if not valid_post_date(published_update, 90, debug):
|
|
||||||
return False
|
|
||||||
|
|
||||||
summary = None
|
|
||||||
if message_json['object'].get('summary'):
|
|
||||||
summary = message_json['object']['summary']
|
|
||||||
if not isinstance(summary, str):
|
|
||||||
print('WARN: content warning is not a string')
|
|
||||||
return False
|
|
||||||
if summary != valid_content_warning(summary):
|
|
||||||
print('WARN: invalid content warning ' + summary)
|
|
||||||
return False
|
|
||||||
if dangerous_markup(summary, allow_local_network_access):
|
|
||||||
if message_json['object'].get('id'):
|
|
||||||
print('REJECT ARBITRARY HTML: ' + message_json['object']['id'])
|
|
||||||
print('REJECT ARBITRARY HTML: bad string in summary - ' +
|
|
||||||
summary)
|
|
||||||
return False
|
|
||||||
|
|
||||||
# check for patches before dangeousMarkup, which excludes code
|
|
||||||
if is_git_patch(base_dir, nickname, domain,
|
|
||||||
message_json['object']['type'],
|
|
||||||
summary,
|
|
||||||
message_json['object']['content']):
|
|
||||||
return True
|
|
||||||
|
|
||||||
if is_question(message_json):
|
|
||||||
if is_question_filtered(base_dir, nickname, domain,
|
|
||||||
system_language, message_json):
|
|
||||||
print('REJECT: incoming question options filter')
|
|
||||||
return False
|
|
||||||
if dangerous_question(message_json, allow_local_network_access):
|
|
||||||
print('REJECT: incoming question markup filter')
|
|
||||||
return False
|
|
||||||
|
|
||||||
content_str = get_base_content_from_post(message_json, system_language)
|
|
||||||
if dangerous_markup(content_str, allow_local_network_access):
|
|
||||||
if message_json['object'].get('id'):
|
|
||||||
print('REJECT ARBITRARY HTML: ' + message_json['object']['id'])
|
|
||||||
print('REJECT ARBITRARY HTML: bad string in post - ' +
|
|
||||||
content_str)
|
|
||||||
return False
|
|
||||||
|
|
||||||
# check (rough) number of mentions
|
|
||||||
mentions_est = _estimate_number_of_mentions(content_str)
|
|
||||||
if mentions_est > max_mentions:
|
|
||||||
if message_json['object'].get('id'):
|
|
||||||
print('REJECT HELLTHREAD: ' + message_json['object']['id'])
|
|
||||||
print('REJECT HELLTHREAD: Too many mentions in post - ' +
|
|
||||||
content_str)
|
|
||||||
return False
|
|
||||||
if _estimate_number_of_emoji(content_str) > max_emoji:
|
|
||||||
if message_json['object'].get('id'):
|
|
||||||
print('REJECT EMOJI OVERLOAD: ' + message_json['object']['id'])
|
|
||||||
print('REJECT EMOJI OVERLOAD: Too many emoji in post - ' +
|
|
||||||
content_str)
|
|
||||||
return False
|
|
||||||
if _estimate_number_of_hashtags(content_str) > max_hashtags:
|
|
||||||
if message_json['object'].get('id'):
|
|
||||||
print('REJECT HASHTAG OVERLOAD: ' + message_json['object']['id'])
|
|
||||||
print('REJECT HASHTAG OVERLOAD: Too many hashtags in post - ' +
|
|
||||||
content_str)
|
|
||||||
return False
|
|
||||||
# check number of tags
|
|
||||||
if message_json['object'].get('tag'):
|
|
||||||
if not isinstance(message_json['object']['tag'], list):
|
|
||||||
message_json['object']['tag'] = []
|
|
||||||
else:
|
|
||||||
if len(message_json['object']['tag']) > int(max_mentions * 2):
|
|
||||||
if message_json['object'].get('id'):
|
|
||||||
print('REJECT: ' + message_json['object']['id'])
|
|
||||||
print('REJECT: Too many tags in post - ' +
|
|
||||||
message_json['object']['tag'])
|
|
||||||
return False
|
|
||||||
# check that the post is in a language suitable for this account
|
|
||||||
if not understood_post_language(base_dir, nickname,
|
|
||||||
message_json, system_language,
|
|
||||||
http_prefix, domain_full,
|
|
||||||
person_cache):
|
|
||||||
return False
|
|
||||||
|
|
||||||
# check for urls which are too long
|
|
||||||
if not valid_url_lengths(content_str, 2048):
|
|
||||||
print('REJECT: url within content too long')
|
|
||||||
return False
|
|
||||||
|
|
||||||
# check for filtered content
|
|
||||||
media_descriptions = get_media_descriptions_from_post(message_json)
|
|
||||||
content_all = content_str
|
|
||||||
if summary:
|
|
||||||
content_all = summary + ' ' + content_str + ' ' + media_descriptions
|
|
||||||
if is_filtered(base_dir, nickname, domain, content_all,
|
|
||||||
system_language):
|
|
||||||
print('REJECT: content filtered')
|
|
||||||
return False
|
|
||||||
if message_json['object'].get('inReplyTo'):
|
|
||||||
if isinstance(message_json['object']['inReplyTo'], str):
|
|
||||||
original_post_id = message_json['object']['inReplyTo']
|
|
||||||
post_post_filename = locate_post(base_dir, nickname, domain,
|
|
||||||
original_post_id)
|
|
||||||
if post_post_filename:
|
|
||||||
if not _post_allow_comments(post_post_filename):
|
|
||||||
print('REJECT: reply to post which does not ' +
|
|
||||||
'allow comments: ' + original_post_id)
|
|
||||||
return False
|
|
||||||
if invalid_ciphertext(message_json['object']['content']):
|
|
||||||
print('REJECT: malformed ciphertext in content ' +
|
|
||||||
message_json['object']['id'] + ' ' +
|
|
||||||
message_json['object']['content'])
|
|
||||||
return False
|
|
||||||
if debug:
|
|
||||||
print('ACCEPT: post content is valid')
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def _obtain_avatar_for_reply_post(session, base_dir: str, http_prefix: str,
|
def _obtain_avatar_for_reply_post(session, base_dir: str, http_prefix: str,
|
||||||
domain: str, onion_domain: str,
|
domain: str, onion_domain: str,
|
||||||
i2p_domain: str,
|
i2p_domain: str,
|
||||||
|
|
Loading…
Reference in New Issue