From ad3ad1b045144624a78441e49d8c83cd3b4fda00 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Tue, 17 May 2022 12:40:05 +0100 Subject: [PATCH] Check for very long urls within incoming posts --- content.py | 19 +++++++++++++++++++ daemon.py | 26 ++++++++++++++++++++++++++ inbox.py | 7 +++++++ 3 files changed, 52 insertions(+) diff --git a/content.py b/content.py index 26c08d14b..911e34041 100644 --- a/content.py +++ b/content.py @@ -59,6 +59,25 @@ INVALID_CONTENT_STRINGS = ( ) +def valid_url_lengths(content: str, max_url_length: int) -> bool: + """Returns true if the given content contains urls which are too long + """ + if '://' not in content: + return True + sections = content.split('://') + ctr = 0 + for text in sections: + if ctr == 0: + ctr += 1 + continue + if '"' in text: + url = text.split('"')[0] + if '<' not in url and '>' not in url: + if len(url) > max_url_length: + return False + return True + + def remove_html_tag(html_str: str, tag: str) -> str: """Removes a given tag from a html string """ diff --git a/daemon.py b/daemon.py index 00a0f459d..b534f1de0 100644 --- a/daemon.py +++ b/daemon.py @@ -322,6 +322,7 @@ from utils import has_group_type from manualapprove import manual_deny_follow_request_thread from manualapprove import manual_approve_follow_request_thread from announce import create_announce +from content import valid_url_lengths from content import contains_invalid_local_links from content import get_price_from_string from content import replace_emoji_from_tags @@ -1747,6 +1748,31 @@ class PubServer(BaseHTTPRequestHandler): self._400() self.server.postreq_busy = False return 3 + # check that the content does not contain impossibly long urls + if message_json['object'].get('content'): + content_str = message_json['object']['content'] + if not valid_url_lengths(content_str, 2048): + print('INBOX: content contains urls which are too long ' + + message_json['actor']) + self._400() + self.server.postreq_busy = False + return 3 + # check that the summary does not contain links + if message_json['object'].get('summary'): + if len(message_json['object']['summary']) > 1024: + print('INBOX: summary is too long ' + + message_json['actor'] + ' ' + + message_json['object']['summary']) + self._400() + self.server.postreq_busy = False + return 3 + if '://' in message_json['object']['summary']: + print('INBOX: summary should not contain links ' + + message_json['actor'] + ' ' + + message_json['object']['summary']) + self._400() + self.server.postreq_busy = False + return 3 # actor should look like a url if debug: diff --git a/inbox.py b/inbox.py index cd816268f..d8e443d76 100644 --- a/inbox.py +++ b/inbox.py @@ -126,6 +126,7 @@ from conversation import update_conversation from webapp_hashtagswarm import html_hash_tag_swarm from person import valid_sending_actor from fitnessFunctions import fitness_performance +from content import valid_url_lengths def _store_last_post_id(base_dir: str, nickname: str, domain: str, @@ -2586,6 +2587,12 @@ def _valid_post_content(base_dir: str, nickname: str, domain: str, http_prefix, domain_full, person_cache): return False + + # check for urls which are too long + if not valid_url_lengths(content_str, 2048): + print('REJECT: url within content too long') + return False + # check for filtered content if is_filtered(base_dir, nickname, domain, content_str): print('REJECT: content filtered')