Define function first

2023-03-06 11:26:56 +00:00 · 2023-03-06 11:26:56 +00:00 · 1acf4987ac
parent 171808f7ee
commit 1acf4987ac
1 changed files with 161 additions and 161 deletions
--- a/inbox.py
+++ b/inbox.py
@ -1282,6 +1282,167 @@ def _receive_update_to_question(recent_posts_cache: {}, message_json: {},
    return True


+def _valid_post_content(base_dir: str, nickname: str, domain: str,
+                        message_json: {}, max_mentions: int, max_emoji: int,
+                        allow_local_network_access: bool, debug: bool,
+                        system_language: str,
+                        http_prefix: str, domain_full: str,
+                        person_cache: {},
+                        max_hashtags: int) -> bool:
+    """Is the content of a received post valid?
+    Check for bad html
+    Check for hellthreads
+    Check that the language is understood
+    Check if it's a git patch
+    Check number of tags and mentions is reasonable
+    """
+    if not has_object_dict(message_json):
+        return True
+    if 'content' not in message_json['object']:
+        return True
+
+    if not message_json['object'].get('published'):
+        return False
+    published = message_json['object']['published']
+    if 'T' not in published:
+        return False
+    if 'Z' not in published:
+        print('REJECT inbox post does not use Zulu time format. ' +
+              published)
+        return False
+    if '.' in published:
+        # converts 2022-03-30T17:37:58.734Z into 2022-03-30T17:37:58Z
+        published = published.split('.')[0] + 'Z'
+        message_json['object']['published'] = published
+    if not valid_post_date(published, 90, debug):
+        return False
+
+    # if the post has been edited then check its edit date
+    if message_json['object'].get('updated'):
+        published_update = message_json['object']['updated']
+        if 'T' not in published_update:
+            return False
+        if 'Z' not in published_update:
+            return False
+        if '.' in published_update:
+            # converts 2022-03-30T17:37:58.734Z into 2022-03-30T17:37:58Z
+            published_update = published_update.split('.')[0] + 'Z'
+            message_json['object']['updated'] = published_update
+        if not valid_post_date(published_update, 90, debug):
+            return False
+
+    summary = None
+    if message_json['object'].get('summary'):
+        summary = message_json['object']['summary']
+        if not isinstance(summary, str):
+            print('WARN: content warning is not a string')
+            return False
+        if summary != valid_content_warning(summary):
+            print('WARN: invalid content warning ' + summary)
+            return False
+        if dangerous_markup(summary, allow_local_network_access):
+            if message_json['object'].get('id'):
+                print('REJECT ARBITRARY HTML: ' + message_json['object']['id'])
+            print('REJECT ARBITRARY HTML: bad string in summary - ' +
+                  summary)
+            return False
+
+    # check for patches before dangeousMarkup, which excludes code
+    if is_git_patch(base_dir, nickname, domain,
+                    message_json['object']['type'],
+                    summary,
+                    message_json['object']['content']):
+        return True
+
+    if is_question(message_json):
+        if is_question_filtered(base_dir, nickname, domain,
+                                system_language, message_json):
+            print('REJECT: incoming question options filter')
+            return False
+        if dangerous_question(message_json, allow_local_network_access):
+            print('REJECT: incoming question markup filter')
+            return False
+
+    content_str = get_base_content_from_post(message_json, system_language)
+    if dangerous_markup(content_str, allow_local_network_access):
+        if message_json['object'].get('id'):
+            print('REJECT ARBITRARY HTML: ' + message_json['object']['id'])
+        print('REJECT ARBITRARY HTML: bad string in post - ' +
+              content_str)
+        return False
+
+    # check (rough) number of mentions
+    mentions_est = _estimate_number_of_mentions(content_str)
+    if mentions_est > max_mentions:
+        if message_json['object'].get('id'):
+            print('REJECT HELLTHREAD: ' + message_json['object']['id'])
+        print('REJECT HELLTHREAD: Too many mentions in post - ' +
+              content_str)
+        return False
+    if _estimate_number_of_emoji(content_str) > max_emoji:
+        if message_json['object'].get('id'):
+            print('REJECT EMOJI OVERLOAD: ' + message_json['object']['id'])
+        print('REJECT EMOJI OVERLOAD: Too many emoji in post - ' +
+              content_str)
+        return False
+    if _estimate_number_of_hashtags(content_str) > max_hashtags:
+        if message_json['object'].get('id'):
+            print('REJECT HASHTAG OVERLOAD: ' + message_json['object']['id'])
+        print('REJECT HASHTAG OVERLOAD: Too many hashtags in post - ' +
+              content_str)
+        return False
+    # check number of tags
+    if message_json['object'].get('tag'):
+        if not isinstance(message_json['object']['tag'], list):
+            message_json['object']['tag'] = []
+        else:
+            if len(message_json['object']['tag']) > int(max_mentions * 2):
+                if message_json['object'].get('id'):
+                    print('REJECT: ' + message_json['object']['id'])
+                print('REJECT: Too many tags in post - ' +
+                      message_json['object']['tag'])
+                return False
+    # check that the post is in a language suitable for this account
+    if not understood_post_language(base_dir, nickname,
+                                    message_json, system_language,
+                                    http_prefix, domain_full,
+                                    person_cache):
+        return False
+
+    # check for urls which are too long
+    if not valid_url_lengths(content_str, 2048):
+        print('REJECT: url within content too long')
+        return False
+
+    # check for filtered content
+    media_descriptions = get_media_descriptions_from_post(message_json)
+    content_all = content_str
+    if summary:
+        content_all = summary + ' ' + content_str + ' ' + media_descriptions
+    if is_filtered(base_dir, nickname, domain, content_all,
+                   system_language):
+        print('REJECT: content filtered')
+        return False
+    if message_json['object'].get('inReplyTo'):
+        if isinstance(message_json['object']['inReplyTo'], str):
+            original_post_id = message_json['object']['inReplyTo']
+            post_post_filename = locate_post(base_dir, nickname, domain,
+                                             original_post_id)
+            if post_post_filename:
+                if not _post_allow_comments(post_post_filename):
+                    print('REJECT: reply to post which does not ' +
+                          'allow comments: ' + original_post_id)
+                    return False
+    if invalid_ciphertext(message_json['object']['content']):
+        print('REJECT: malformed ciphertext in content ' +
+              message_json['object']['id'] + ' ' +
+              message_json['object']['content'])
+        return False
+    if debug:
+        print('ACCEPT: post content is valid')
+    return True
+
+
 def receive_edit_to_post(recent_posts_cache: {}, message_json: {},
                         base_dir: str,
                         nickname: str, domain: str,
@ -3066,167 +3227,6 @@ def _estimate_number_of_hashtags(content: str) -> int:
    return content.count('>#<')


-def _valid_post_content(base_dir: str, nickname: str, domain: str,
-                        message_json: {}, max_mentions: int, max_emoji: int,
-                        allow_local_network_access: bool, debug: bool,
-                        system_language: str,
-                        http_prefix: str, domain_full: str,
-                        person_cache: {},
-                        max_hashtags: int) -> bool:
-    """Is the content of a received post valid?
-    Check for bad html
-    Check for hellthreads
-    Check that the language is understood
-    Check if it's a git patch
-    Check number of tags and mentions is reasonable
-    """
-    if not has_object_dict(message_json):
-        return True
-    if 'content' not in message_json['object']:
-        return True
-
-    if not message_json['object'].get('published'):
-        return False
-    published = message_json['object']['published']
-    if 'T' not in published:
-        return False
-    if 'Z' not in published:
-        print('REJECT inbox post does not use Zulu time format. ' +
-              published)
-        return False
-    if '.' in published:
-        # converts 2022-03-30T17:37:58.734Z into 2022-03-30T17:37:58Z
-        published = published.split('.')[0] + 'Z'
-        message_json['object']['published'] = published
-    if not valid_post_date(published, 90, debug):
-        return False
-
-    # if the post has been edited then check its edit date
-    if message_json['object'].get('updated'):
-        published_update = message_json['object']['updated']
-        if 'T' not in published_update:
-            return False
-        if 'Z' not in published_update:
-            return False
-        if '.' in published_update:
-            # converts 2022-03-30T17:37:58.734Z into 2022-03-30T17:37:58Z
-            published_update = published_update.split('.')[0] + 'Z'
-            message_json['object']['updated'] = published_update
-        if not valid_post_date(published_update, 90, debug):
-            return False
-
-    summary = None
-    if message_json['object'].get('summary'):
-        summary = message_json['object']['summary']
-        if not isinstance(summary, str):
-            print('WARN: content warning is not a string')
-            return False
-        if summary != valid_content_warning(summary):
-            print('WARN: invalid content warning ' + summary)
-            return False
-        if dangerous_markup(summary, allow_local_network_access):
-            if message_json['object'].get('id'):
-                print('REJECT ARBITRARY HTML: ' + message_json['object']['id'])
-            print('REJECT ARBITRARY HTML: bad string in summary - ' +
-                  summary)
-            return False
-
-    # check for patches before dangeousMarkup, which excludes code
-    if is_git_patch(base_dir, nickname, domain,
-                    message_json['object']['type'],
-                    summary,
-                    message_json['object']['content']):
-        return True
-
-    if is_question(message_json):
-        if is_question_filtered(base_dir, nickname, domain,
-                                system_language, message_json):
-            print('REJECT: incoming question options filter')
-            return False
-        if dangerous_question(message_json, allow_local_network_access):
-            print('REJECT: incoming question markup filter')
-            return False
-
-    content_str = get_base_content_from_post(message_json, system_language)
-    if dangerous_markup(content_str, allow_local_network_access):
-        if message_json['object'].get('id'):
-            print('REJECT ARBITRARY HTML: ' + message_json['object']['id'])
-        print('REJECT ARBITRARY HTML: bad string in post - ' +
-              content_str)
-        return False
-
-    # check (rough) number of mentions
-    mentions_est = _estimate_number_of_mentions(content_str)
-    if mentions_est > max_mentions:
-        if message_json['object'].get('id'):
-            print('REJECT HELLTHREAD: ' + message_json['object']['id'])
-        print('REJECT HELLTHREAD: Too many mentions in post - ' +
-              content_str)
-        return False
-    if _estimate_number_of_emoji(content_str) > max_emoji:
-        if message_json['object'].get('id'):
-            print('REJECT EMOJI OVERLOAD: ' + message_json['object']['id'])
-        print('REJECT EMOJI OVERLOAD: Too many emoji in post - ' +
-              content_str)
-        return False
-    if _estimate_number_of_hashtags(content_str) > max_hashtags:
-        if message_json['object'].get('id'):
-            print('REJECT HASHTAG OVERLOAD: ' + message_json['object']['id'])
-        print('REJECT HASHTAG OVERLOAD: Too many hashtags in post - ' +
-              content_str)
-        return False
-    # check number of tags
-    if message_json['object'].get('tag'):
-        if not isinstance(message_json['object']['tag'], list):
-            message_json['object']['tag'] = []
-        else:
-            if len(message_json['object']['tag']) > int(max_mentions * 2):
-                if message_json['object'].get('id'):
-                    print('REJECT: ' + message_json['object']['id'])
-                print('REJECT: Too many tags in post - ' +
-                      message_json['object']['tag'])
-                return False
-    # check that the post is in a language suitable for this account
-    if not understood_post_language(base_dir, nickname,
-                                    message_json, system_language,
-                                    http_prefix, domain_full,
-                                    person_cache):
-        return False
-
-    # check for urls which are too long
-    if not valid_url_lengths(content_str, 2048):
-        print('REJECT: url within content too long')
-        return False
-
-    # check for filtered content
-    media_descriptions = get_media_descriptions_from_post(message_json)
-    content_all = content_str
-    if summary:
-        content_all = summary + ' ' + content_str + ' ' + media_descriptions
-    if is_filtered(base_dir, nickname, domain, content_all,
-                   system_language):
-        print('REJECT: content filtered')
-        return False
-    if message_json['object'].get('inReplyTo'):
-        if isinstance(message_json['object']['inReplyTo'], str):
-            original_post_id = message_json['object']['inReplyTo']
-            post_post_filename = locate_post(base_dir, nickname, domain,
-                                             original_post_id)
-            if post_post_filename:
-                if not _post_allow_comments(post_post_filename):
-                    print('REJECT: reply to post which does not ' +
-                          'allow comments: ' + original_post_id)
-                    return False
-    if invalid_ciphertext(message_json['object']['content']):
-        print('REJECT: malformed ciphertext in content ' +
-              message_json['object']['id'] + ' ' +
-              message_json['object']['content'])
-        return False
-    if debug:
-        print('ACCEPT: post content is valid')
-    return True
-
-
 def _obtain_avatar_for_reply_post(session, base_dir: str, http_prefix: str,
                                  domain: str, onion_domain: str,
                                  i2p_domain: str,