More reject information during inbox validation

main
Bob Mottram 2024-02-18 12:06:16 +00:00
parent 90298b859b
commit 062f998713
2 changed files with 72 additions and 33 deletions

View File

@ -1417,43 +1417,70 @@ def _valid_post_content(base_dir: str, nickname: str, domain: str,
return True return True
if not message_json['object'].get('published'): if not message_json['object'].get('published'):
if message_json['object'].get('id'):
print('REJECT inbox post does not have a published date. ' +
str(message_json['object']['id']))
return False return False
published = message_json['object']['published'] published = message_json['object']['published']
if 'T' not in published: if 'T' not in published:
if message_json['object'].get('id'):
print('REJECT inbox post does not use expected time format. ' +
published + ' ' + str(message_json['object']['id']))
return False return False
if 'Z' not in published: if 'Z' not in published:
print('REJECT inbox post does not use Zulu time format. ' + if message_json['object'].get('id'):
published) print('REJECT inbox post does not use Zulu time format. ' +
published + ' ' + str(message_json['object']['id']))
return False return False
if '.' in published: if '.' in published:
# converts 2022-03-30T17:37:58.734Z into 2022-03-30T17:37:58Z # converts 2022-03-30T17:37:58.734Z into 2022-03-30T17:37:58Z
published = published.split('.')[0] + 'Z' published = published.split('.')[0] + 'Z'
message_json['object']['published'] = published message_json['object']['published'] = published
if not valid_post_date(published, 90, debug): if not valid_post_date(published, 90, debug):
if message_json['object'].get('id'):
print('REJECT: invalid post published date ' +
str(published) + ' ' +
str(message_json['object']['id']))
return False return False
# if the post has been edited then check its edit date # if the post has been edited then check its edit date
if message_json['object'].get('updated'): if message_json['object'].get('updated'):
published_update = message_json['object']['updated'] published_update = message_json['object']['updated']
if 'T' not in published_update: if 'T' not in published_update:
if message_json['object'].get('id'):
print('REJECT: invalid post update date format ' +
str(published_update) + ' ' +
str(message_json['object']['id']))
return False return False
if 'Z' not in published_update: if 'Z' not in published_update:
if message_json['object'].get('id'):
print('REJECT: post update date not in Zulu time ' +
str(published_update) + ' ' +
str(message_json['object']['id']))
return False return False
if '.' in published_update: if '.' in published_update:
# converts 2022-03-30T17:37:58.734Z into 2022-03-30T17:37:58Z # converts 2022-03-30T17:37:58.734Z into 2022-03-30T17:37:58Z
published_update = published_update.split('.')[0] + 'Z' published_update = published_update.split('.')[0] + 'Z'
message_json['object']['updated'] = published_update message_json['object']['updated'] = published_update
if not valid_post_date(published_update, 90, debug): if not valid_post_date(published_update, 90, debug):
if message_json['object'].get('id'):
print('REJECT: invalid post update date ' +
str(published_update) + ' ' +
str(message_json['object']['id']))
return False return False
summary = None summary = None
if message_json['object'].get('summary'): if message_json['object'].get('summary'):
summary = message_json['object']['summary'] summary = message_json['object']['summary']
if not isinstance(summary, str): if not isinstance(summary, str):
print('WARN: content warning is not a string') if message_json['object'].get('id'):
print('REJECT: content warning is not a string ' +
str(summary) + ' ' + str(message_json['object']['id']))
return False return False
if summary != valid_content_warning(summary): if summary != valid_content_warning(summary):
print('WARN: invalid content warning ' + summary) if message_json['object'].get('id'):
print('REJECT: invalid content warning ' + summary + ' ' +
str(message_json['object']['id']))
return False return False
if dangerous_markup(summary, allow_local_network_access, []): if dangerous_markup(summary, allow_local_network_access, []):
if message_json['object'].get('id'): if message_json['object'].get('id'):
@ -1483,7 +1510,7 @@ def _valid_post_content(base_dir: str, nickname: str, domain: str,
if dangerous_markup(content_str, allow_local_network_access, ['pre']): if dangerous_markup(content_str, allow_local_network_access, ['pre']):
if message_json['object'].get('id'): if message_json['object'].get('id'):
print('REJECT ARBITRARY HTML 2: ' + print('REJECT ARBITRARY HTML 2: ' +
message_json['object']['id']) str(message_json['object']['id']))
if debug: if debug:
print('REJECT ARBITRARY HTML: bad string in post - ' + print('REJECT ARBITRARY HTML: bad string in post - ' +
content_str) content_str)
@ -1493,21 +1520,23 @@ def _valid_post_content(base_dir: str, nickname: str, domain: str,
mentions_est = _estimate_number_of_mentions(content_str) mentions_est = _estimate_number_of_mentions(content_str)
if mentions_est > max_mentions: if mentions_est > max_mentions:
if message_json['object'].get('id'): if message_json['object'].get('id'):
print('REJECT HELLTHREAD: ' + message_json['object']['id']) print('REJECT HELLTHREAD: ' + str(message_json['object']['id']))
if debug: if debug:
print('REJECT HELLTHREAD: Too many mentions in post - ' + print('REJECT HELLTHREAD: Too many mentions in post - ' +
content_str) content_str)
return False return False
if _estimate_number_of_emoji(content_str) > max_emoji: if _estimate_number_of_emoji(content_str) > max_emoji:
if message_json['object'].get('id'): if message_json['object'].get('id'):
print('REJECT EMOJI OVERLOAD: ' + message_json['object']['id']) print('REJECT EMOJI OVERLOAD: ' +
str(message_json['object']['id']))
if debug: if debug:
print('REJECT EMOJI OVERLOAD: Too many emoji in post - ' + print('REJECT EMOJI OVERLOAD: Too many emoji in post - ' +
content_str) content_str)
return False return False
if _estimate_number_of_hashtags(content_str) > max_hashtags: if _estimate_number_of_hashtags(content_str) > max_hashtags:
if message_json['object'].get('id'): if message_json['object'].get('id'):
print('REJECT HASHTAG OVERLOAD: ' + message_json['object']['id']) print('REJECT HASHTAG OVERLOAD: ' +
str(message_json['object']['id']))
if debug: if debug:
print('REJECT HASHTAG OVERLOAD: Too many hashtags in post - ' + print('REJECT HASHTAG OVERLOAD: Too many hashtags in post - ' +
content_str) content_str)
@ -1528,6 +1557,9 @@ def _valid_post_content(base_dir: str, nickname: str, domain: str,
message_json, system_language, message_json, system_language,
http_prefix, domain_full, http_prefix, domain_full,
person_cache): person_cache):
if message_json['object'].get('id'):
print('REJECT: content not understood ' +
str(message_json['object']['id']))
return False return False
# check for urls which are too long # check for urls which are too long
@ -1542,8 +1574,9 @@ def _valid_post_content(base_dir: str, nickname: str, domain: str,
content_all = summary + ' ' + content_str + ' ' + media_descriptions content_all = summary + ' ' + content_str + ' ' + media_descriptions
if is_filtered(base_dir, nickname, domain, content_all, if is_filtered(base_dir, nickname, domain, content_all,
system_language): system_language):
if message_json.get('id'): if message_json['object'].get('id'):
print('REJECT: content filtered ' + str(message_json['id'])) print('REJECT: content filtered ' +
str(message_json['object']['id']))
return False return False
reply_id = get_reply_to(message_json['object']) reply_id = get_reply_to(message_json['object'])
if reply_id: if reply_id:
@ -1557,14 +1590,16 @@ def _valid_post_content(base_dir: str, nickname: str, domain: str,
'allow comments: ' + original_post_id) 'allow comments: ' + original_post_id)
return False return False
if contains_private_key(message_json['object']['content']): if contains_private_key(message_json['object']['content']):
print('REJECT: someone posted their private key ' + if message_json['object'].get('id'):
message_json['object']['id'] + ' ' + print('REJECT: someone posted their private key ' +
message_json['object']['content']) str(message_json['object']['id']) + ' ' +
message_json['object']['content'])
return False return False
if invalid_ciphertext(message_json['object']['content']): if invalid_ciphertext(message_json['object']['content']):
print('REJECT: malformed ciphertext in content ' + if message_json['object'].get('id'):
message_json['object']['id'] + ' ' + print('REJECT: malformed ciphertext in content ' +
message_json['object']['content']) str(message_json['object']['id']) + ' ' +
message_json['object']['content'])
return False return False
if debug: if debug:
print('ACCEPT: post content is valid') print('ACCEPT: post content is valid')

View File

@ -125,6 +125,24 @@ from session import get_json
MAX_DISPLAY_NAME_LENGTH = 42 MAX_DISPLAY_NAME_LENGTH = 42
def _enforce_max_display_name_length(display_name: str) -> str:
"""Ensures that the display name does not get too long
"""
# enforce maximum length for the display name
if len(display_name) <= MAX_DISPLAY_NAME_LENGTH:
return display_name
if ':' in display_name:
display_name_short = display_name.split(':')[0].strip()
if len(display_name_short) > 2:
display_name = display_name_short
if len(display_name) > MAX_DISPLAY_NAME_LENGTH:
display_name = display_name[:MAX_DISPLAY_NAME_LENGTH]
return display_name
def _html_post_metadata_open_graph(domain: str, post_json_object: {}, def _html_post_metadata_open_graph(domain: str, post_json_object: {},
system_language: str) -> str: system_language: str) -> str:
"""Returns html OpenGraph metadata for a post """Returns html OpenGraph metadata for a post
@ -1411,15 +1429,8 @@ def _get_post_title_announce_html(base_dir: str,
announce_display_name = None announce_display_name = None
if announce_display_name: if announce_display_name:
# enforce maximum length for display name # enforce maximum length for display name
if len(announce_display_name) > MAX_DISPLAY_NAME_LENGTH: announce_display_name = \
if ':' in announce_display_name: _enforce_max_display_name_length(announce_display_name)
announce_display_name_short = \
announce_display_name.split(':')[0].strip()
if len(announce_display_name_short) > 2:
announce_display_name = announce_display_name_short
if len(announce_display_name) > MAX_DISPLAY_NAME_LENGTH:
announce_display_name = \
announce_display_name[:MAX_DISPLAY_NAME_LENGTH]
if not announce_display_name and announce_domain: if not announce_display_name and announce_domain:
announce_display_name = announce_nickname + '@' + announce_domain announce_display_name = announce_nickname + '@' + announce_domain
@ -2424,14 +2435,7 @@ def individual_post_as_html(signing_priv_key_pem: str,
display_name_is_emoji(display_name): display_name_is_emoji(display_name):
display_name = None display_name = None
if display_name: if display_name:
# enforce maximum length for the display name display_name = _enforce_max_display_name_length(display_name)
if len(display_name) > MAX_DISPLAY_NAME_LENGTH:
if ':' in display_name:
display_name_short = display_name.split(':')[0].strip()
if len(display_name_short) > 2:
display_name = display_name_short
if len(display_name) > MAX_DISPLAY_NAME_LENGTH:
display_name = display_name[:MAX_DISPLAY_NAME_LENGTH]
# add emojis # add emojis
if ':' in display_name: if ':' in display_name:
display_name = \ display_name = \