From 04118c02ba3c521bf8eff4580f6230ca83bd964e Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Wed, 12 Jun 2024 11:24:59 +0100 Subject: [PATCH] Check for invalid local links at a later stage in the inbox queue --- content.py | 27 ++++++++++++++++-- daemon_post_receive.py | 63 ++++++++++++++++++++++++++++++------------ daemon_utils.py | 8 ------ inbox.py | 49 ++++++++++++++++++++++++-------- 4 files changed, 106 insertions(+), 41 deletions(-) diff --git a/content.py b/content.py index 5577e95b5..7ffe9d031 100644 --- a/content.py +++ b/content.py @@ -1906,12 +1906,33 @@ def words_similarity(content1: str, content2: str, min_words: int) -> int: return 100 - int(diff * 100 / len(histogram1.items())) -def contains_invalid_local_links(content: str) -> bool: +def contains_invalid_local_links(domain_full: str, + onion_domain: str, i2p_domain: str, + content: str) -> bool: """Returns true if the given content has invalid links """ for inv_str in INVALID_CONTENT_STRINGS: - if '?' + inv_str + '=' in content: - return True + match_str = '?' + inv_str + '=' + if match_str not in content: + continue + # extract the urls and check whether they are for the local domain + ctr = 0 + sections = content.split(match_str) + final_section_index = len(sections) - 1 + for section_str in sections: + if ctr == final_section_index: + continue + if '://' in section_str: + url = section_str.split('://')[-1] + if domain_full in url: + return True + if onion_domain: + if onion_domain in url: + return True + if i2p_domain: + if i2p_domain in url: + return True + ctr += 1 return False diff --git a/daemon_post_receive.py b/daemon_post_receive.py index 9180cb8d3..123a08de0 100644 --- a/daemon_post_receive.py +++ b/daemon_post_receive.py @@ -107,7 +107,9 @@ def _receive_new_post_process_newpost(self, fields: {}, buy_sites: [], project_version: str, proxy_type: str, - max_replies: int) -> int: + max_replies: int, + onion_domain: str, + i2p_domain: str) -> int: """ A new post has been received from the New Post screen and is then sent to the outbox """ @@ -204,7 +206,8 @@ def _receive_new_post_process_newpost(self, fields: {}, min_images_for_accounts, max_hashtags, buy_sites, - auto_cw_cache) + auto_cw_cache, + onion_domain, i2p_domain) print('DEBUG: sending edited public post ' + str(message_json)) if fields['schedulePost']: @@ -500,7 +503,9 @@ def _receive_new_post_process_newunlisted(self, fields: {}, buy_sites: [], project_version: str, proxy_type: str, - max_replies: int) -> int: + max_replies: int, + onion_domain: str, + i2p_domain: str) -> int: """Unlisted post has been received from New Post screen and is then sent to the outbox """ @@ -588,7 +593,8 @@ def _receive_new_post_process_newunlisted(self, fields: {}, min_images_for_accounts, max_hashtags, buy_sites, - auto_cw_cache) + auto_cw_cache, + onion_domain, i2p_domain) print('DEBUG: sending edited unlisted post ' + str(message_json)) @@ -647,7 +653,9 @@ def _receive_new_post_process_newfollowers(self, fields: {}, buy_sites: [], project_version: str, proxy_type: str, - max_replies: int) -> int: + max_replies: int, + onion_domain: str, + i2p_domain: str) -> int: """Followers only post has been received from New Post screen and is then sent to the outbox """ @@ -740,7 +748,8 @@ def _receive_new_post_process_newfollowers(self, fields: {}, min_images_for_accounts, max_hashtags, buy_sites, - auto_cw_cache) + auto_cw_cache, + onion_domain, i2p_domain) print('DEBUG: sending edited followers post ' + str(message_json)) @@ -800,7 +809,9 @@ def _receive_new_post_process_newdm(self, fields: {}, buy_sites: [], project_version: str, proxy_type: str, - max_replies: int) -> int: + max_replies: int, + onion_domain: str, + i2p_domain: str) -> int: """Direct message post has been received from New Post screen and is then sent to the outbox """ @@ -905,7 +916,8 @@ def _receive_new_post_process_newdm(self, fields: {}, min_images_for_accounts, max_hashtags, buy_sites, - auto_cw_cache) + auto_cw_cache, + onion_domain, i2p_domain) print('DEBUG: sending edited dm post ' + str(message_json)) @@ -965,7 +977,9 @@ def _receive_new_post_process_newreminder(self, fields: {}, nickname: str, max_hashtags: int, buy_sites: [], project_version: str, - proxy_type: str) -> int: + proxy_type: str, + onion_domain: str, + i2p_domain: str) -> int: """Reminder post has been received from New Post screen and is then sent to the outbox """ @@ -1063,7 +1077,8 @@ def _receive_new_post_process_newreminder(self, fields: {}, nickname: str, min_images_for_accounts, max_hashtags, buy_sites, - auto_cw_cache) + auto_cw_cache, + onion_domain, i2p_domain) print('DEBUG: sending edited reminder post ' + str(message_json)) if post_to_outbox(self, message_json, @@ -1265,7 +1280,9 @@ def _receive_new_post_process_newreading(self, fields: {}, buy_sites: [], project_version: str, proxy_type: str, - max_replies: int) -> int: + max_replies: int, + onion_domain: str, + i2p_domain: str) -> int: """Reading status post has been received from New Post screen and is then sent to the outbox """ @@ -1371,7 +1388,8 @@ def _receive_new_post_process_newreading(self, fields: {}, min_images_for_accounts, max_hashtags, buy_sites, - auto_cw_cache) + auto_cw_cache, + onion_domain, i2p_domain) print('DEBUG: sending edited reading status post ' + str(message_json)) if fields['schedulePost']: @@ -1825,7 +1843,9 @@ def _receive_new_post_process(self, post_type: str, path: str, headers: {}, buy_sites, project_version, proxy_type, - max_replies) + max_replies, + onion_domain, + i2p_domain) if post_type == 'newblog': return _receive_new_post_process_newblog( self, fields, @@ -1899,7 +1919,9 @@ def _receive_new_post_process(self, post_type: str, path: str, headers: {}, buy_sites, project_version, proxy_type, - max_replies) + max_replies, + onion_domain, + i2p_domain) if post_type == 'newfollowers': return _receive_new_post_process_newfollowers( self, fields, @@ -1943,7 +1965,8 @@ def _receive_new_post_process(self, post_type: str, path: str, headers: {}, buy_sites, project_version, proxy_type, - max_replies) + max_replies, + onion_domain, i2p_domain) if post_type == 'newdm': return _receive_new_post_process_newdm( self, fields, @@ -1988,7 +2011,9 @@ def _receive_new_post_process(self, post_type: str, path: str, headers: {}, buy_sites, project_version, proxy_type, - max_replies) + max_replies, + onion_domain, + i2p_domain) if post_type == 'newreminder': return _receive_new_post_process_newreminder( self, fields, @@ -2032,7 +2057,8 @@ def _receive_new_post_process(self, post_type: str, path: str, headers: {}, max_hashtags, buy_sites, project_version, - proxy_type) + proxy_type, + onion_domain, i2p_domain) if post_type == 'newreport': return _receive_new_post_process_newreport( self, fields, @@ -2104,7 +2130,8 @@ def _receive_new_post_process(self, post_type: str, path: str, headers: {}, buy_sites, project_version, proxy_type, - max_replies) + max_replies, + onion_domain, i2p_domain) if post_type in ('newshare', 'newwanted'): return _receive_new_post_process_newshare( self, fields, diff --git a/daemon_utils.py b/daemon_utils.py index bea5d4fd0..6605ee1a2 100644 --- a/daemon_utils.py +++ b/daemon_utils.py @@ -24,7 +24,6 @@ from inbox import clear_queue_items from blocking import update_blocked_cache from blocking import is_blocked_nickname from blocking import is_blocked_domain -from content import contains_invalid_local_links from content import valid_url_lengths from posts import add_to_field from utils import get_instance_url @@ -508,13 +507,6 @@ def update_inbox_queue(self, nickname: str, message_json: {}, # save the json for later queue processing message_bytes_decoded = message_bytes.decode('utf-8') - if debug: - print('INBOX: checking for invalid links') - if contains_invalid_local_links(message_bytes_decoded): - print('INBOX: post contains invalid local links ' + - str(original_message_json)) - return 5 - self.server.blocked_cache_last_updated = \ update_blocked_cache(self.server.base_dir, self.server.blocked_cache, diff --git a/inbox.py b/inbox.py index 4bbeb6478..fa8878ca2 100644 --- a/inbox.py +++ b/inbox.py @@ -161,6 +161,7 @@ from webapp_hashtagswarm import html_hash_tag_swarm from person import valid_sending_actor from person import get_person_avatar_url from fitnessFunctions import fitness_performance +from content import contains_invalid_local_links from content import reject_twitter_summary from content import load_dogwhistles from content import valid_url_lengths @@ -1425,7 +1426,8 @@ def _valid_post_content(base_dir: str, nickname: str, domain: str, system_language: str, http_prefix: str, domain_full: str, person_cache: {}, - max_hashtags: int) -> bool: + max_hashtags: int, + onion_domain: str, i2p_domain: str) -> bool: """Is the content of a received post valid? Check for bad html Check for hellthreads @@ -1538,6 +1540,15 @@ def _valid_post_content(base_dir: str, nickname: str, domain: str, content_str) return False + if contains_invalid_local_links(domain_full, + onion_domain, i2p_domain, + content_str): + if message_json['object'].get('id'): + print('REJECT: post contains invalid local links ' + + str(message_json['object']['id']) + ' ' + + str(content_str)) + return False + # check (rough) number of mentions mentions_est = _estimate_number_of_mentions(content_str) if mentions_est > max_mentions: @@ -1650,7 +1661,9 @@ def _receive_edit_to_post(recent_posts_cache: {}, message_json: {}, min_images_for_accounts: [], max_hashtags: int, buy_sites: {}, - auto_cw_cache: {}) -> bool: + auto_cw_cache: {}, + onion_domain: str, + i2p_domain: str) -> bool: """A post was edited """ if not has_object_dict(message_json): @@ -1677,7 +1690,7 @@ def _receive_edit_to_post(recent_posts_cache: {}, message_json: {}, allow_local_network_access, debug, system_language, http_prefix, domain_full, person_cache, - max_hashtags): + max_hashtags, onion_domain, i2p_domain): print('EDITPOST: contains invalid content' + str(message_json)) return False @@ -1819,7 +1832,9 @@ def update_edited_post(base_dir: str, min_images_for_accounts: [], max_hashtags: int, buy_sites: {}, - auto_cw_cache: {}) -> None: + auto_cw_cache: {}, + onion_domain: str, + i2p_domain: str) -> None: """ When an edited post is created this assigns a published and updated date to it, and uses the previous id @@ -1868,7 +1883,8 @@ def update_edited_post(base_dir: str, cw_lists, dogwhistles, min_images_for_accounts, max_hashtags, buy_sites, - auto_cw_cache) + auto_cw_cache, + onion_domain, i2p_domain) # update the index id_str = edited_postid.split('/')[-1] @@ -2015,7 +2031,9 @@ def _receive_update_activity(recent_posts_cache: {}, session, base_dir: str, min_images_for_accounts: [], max_hashtags: int, buy_sites: {}, - auto_cw_cache: {}) -> bool: + auto_cw_cache: {}, + onion_domain: str, + i2p_domain: str) -> bool: """Receives an Update activity within the POST section of HTTPServer """ @@ -2061,7 +2079,8 @@ def _receive_update_activity(recent_posts_cache: {}, session, base_dir: str, cw_lists, dogwhistles, min_images_for_accounts, max_hashtags, buy_sites, - auto_cw_cache): + auto_cw_cache, + onion_domain, i2p_domain): print('EDITPOST: received ' + message_json['object']['id']) return True else: @@ -4776,7 +4795,9 @@ def _former_representations_to_edits(base_dir: str, http_prefix: str, domain_full: str, person_cache: {}, max_hashtags: int, - port: int) -> bool: + port: int, + onion_domain: str, + i2p_domain: str) -> bool: """ Some instances use formerRepresentations to store previous edits """ @@ -4834,7 +4855,7 @@ def _former_representations_to_edits(base_dir: str, allow_local_network_access, debug, system_language, http_prefix, domain_full, person_cache, - max_hashtags): + max_hashtags, onion_domain, i2p_domain): continue post_history_json[published_str] = prev_post_json @@ -5235,7 +5256,7 @@ def _inbox_after_initial(server, inbox_start_time, allow_local_network_access, debug, system_language, http_prefix, domain_full, person_cache, - max_hashtags): + max_hashtags, onion_domain, i2p_domain): fitness_performance(inbox_start_time, server.fitness, 'INBOX', '_valid_post_content', debug) @@ -5452,7 +5473,9 @@ def _inbox_after_initial(server, inbox_start_time, http_prefix, domain_full, person_cache, - max_hashtags, port): + max_hashtags, port, + onion_domain, + i2p_domain): # ensure that there is an updated entry # for the publication date if post_json_object['object'].get('published') and \ @@ -6719,7 +6742,9 @@ def run_inbox_queue(server, cw_lists, dogwhistles, server.min_images_for_accounts, max_hashtags, server.buy_sites, - server.auto_cw_cache): + server.auto_cw_cache, + onion_domain, + i2p_domain): if debug: print('Queue: Update accepted from ' + key_id) if os.path.isfile(queue_filename):