Check for invalid local links at a later stage in the inbox queue

merge-requests/30/head
Bob Mottram 2024-06-12 11:24:59 +01:00
parent 5a71b8c406
commit 04118c02ba
4 changed files with 106 additions and 41 deletions

View File

@ -1906,12 +1906,33 @@ def words_similarity(content1: str, content2: str, min_words: int) -> int:
return 100 - int(diff * 100 / len(histogram1.items())) return 100 - int(diff * 100 / len(histogram1.items()))
def contains_invalid_local_links(content: str) -> bool: def contains_invalid_local_links(domain_full: str,
onion_domain: str, i2p_domain: str,
content: str) -> bool:
"""Returns true if the given content has invalid links """Returns true if the given content has invalid links
""" """
for inv_str in INVALID_CONTENT_STRINGS: for inv_str in INVALID_CONTENT_STRINGS:
if '?' + inv_str + '=' in content: match_str = '?' + inv_str + '='
return True if match_str not in content:
continue
# extract the urls and check whether they are for the local domain
ctr = 0
sections = content.split(match_str)
final_section_index = len(sections) - 1
for section_str in sections:
if ctr == final_section_index:
continue
if '://' in section_str:
url = section_str.split('://')[-1]
if domain_full in url:
return True
if onion_domain:
if onion_domain in url:
return True
if i2p_domain:
if i2p_domain in url:
return True
ctr += 1
return False return False

View File

@ -107,7 +107,9 @@ def _receive_new_post_process_newpost(self, fields: {},
buy_sites: [], buy_sites: [],
project_version: str, project_version: str,
proxy_type: str, proxy_type: str,
max_replies: int) -> int: max_replies: int,
onion_domain: str,
i2p_domain: str) -> int:
""" A new post has been received from the New Post screen and """ A new post has been received from the New Post screen and
is then sent to the outbox is then sent to the outbox
""" """
@ -204,7 +206,8 @@ def _receive_new_post_process_newpost(self, fields: {},
min_images_for_accounts, min_images_for_accounts,
max_hashtags, max_hashtags,
buy_sites, buy_sites,
auto_cw_cache) auto_cw_cache,
onion_domain, i2p_domain)
print('DEBUG: sending edited public post ' + print('DEBUG: sending edited public post ' +
str(message_json)) str(message_json))
if fields['schedulePost']: if fields['schedulePost']:
@ -500,7 +503,9 @@ def _receive_new_post_process_newunlisted(self, fields: {},
buy_sites: [], buy_sites: [],
project_version: str, project_version: str,
proxy_type: str, proxy_type: str,
max_replies: int) -> int: max_replies: int,
onion_domain: str,
i2p_domain: str) -> int:
"""Unlisted post has been received from New Post screen """Unlisted post has been received from New Post screen
and is then sent to the outbox and is then sent to the outbox
""" """
@ -588,7 +593,8 @@ def _receive_new_post_process_newunlisted(self, fields: {},
min_images_for_accounts, min_images_for_accounts,
max_hashtags, max_hashtags,
buy_sites, buy_sites,
auto_cw_cache) auto_cw_cache,
onion_domain, i2p_domain)
print('DEBUG: sending edited unlisted post ' + print('DEBUG: sending edited unlisted post ' +
str(message_json)) str(message_json))
@ -647,7 +653,9 @@ def _receive_new_post_process_newfollowers(self, fields: {},
buy_sites: [], buy_sites: [],
project_version: str, project_version: str,
proxy_type: str, proxy_type: str,
max_replies: int) -> int: max_replies: int,
onion_domain: str,
i2p_domain: str) -> int:
"""Followers only post has been received from New Post screen """Followers only post has been received from New Post screen
and is then sent to the outbox and is then sent to the outbox
""" """
@ -740,7 +748,8 @@ def _receive_new_post_process_newfollowers(self, fields: {},
min_images_for_accounts, min_images_for_accounts,
max_hashtags, max_hashtags,
buy_sites, buy_sites,
auto_cw_cache) auto_cw_cache,
onion_domain, i2p_domain)
print('DEBUG: sending edited followers post ' + print('DEBUG: sending edited followers post ' +
str(message_json)) str(message_json))
@ -800,7 +809,9 @@ def _receive_new_post_process_newdm(self, fields: {},
buy_sites: [], buy_sites: [],
project_version: str, project_version: str,
proxy_type: str, proxy_type: str,
max_replies: int) -> int: max_replies: int,
onion_domain: str,
i2p_domain: str) -> int:
"""Direct message post has been received from New Post screen """Direct message post has been received from New Post screen
and is then sent to the outbox and is then sent to the outbox
""" """
@ -905,7 +916,8 @@ def _receive_new_post_process_newdm(self, fields: {},
min_images_for_accounts, min_images_for_accounts,
max_hashtags, max_hashtags,
buy_sites, buy_sites,
auto_cw_cache) auto_cw_cache,
onion_domain, i2p_domain)
print('DEBUG: sending edited dm post ' + print('DEBUG: sending edited dm post ' +
str(message_json)) str(message_json))
@ -965,7 +977,9 @@ def _receive_new_post_process_newreminder(self, fields: {}, nickname: str,
max_hashtags: int, max_hashtags: int,
buy_sites: [], buy_sites: [],
project_version: str, project_version: str,
proxy_type: str) -> int: proxy_type: str,
onion_domain: str,
i2p_domain: str) -> int:
"""Reminder post has been received from New Post screen """Reminder post has been received from New Post screen
and is then sent to the outbox and is then sent to the outbox
""" """
@ -1063,7 +1077,8 @@ def _receive_new_post_process_newreminder(self, fields: {}, nickname: str,
min_images_for_accounts, min_images_for_accounts,
max_hashtags, max_hashtags,
buy_sites, buy_sites,
auto_cw_cache) auto_cw_cache,
onion_domain, i2p_domain)
print('DEBUG: sending edited reminder post ' + print('DEBUG: sending edited reminder post ' +
str(message_json)) str(message_json))
if post_to_outbox(self, message_json, if post_to_outbox(self, message_json,
@ -1265,7 +1280,9 @@ def _receive_new_post_process_newreading(self, fields: {},
buy_sites: [], buy_sites: [],
project_version: str, project_version: str,
proxy_type: str, proxy_type: str,
max_replies: int) -> int: max_replies: int,
onion_domain: str,
i2p_domain: str) -> int:
"""Reading status post has been received from New Post screen """Reading status post has been received from New Post screen
and is then sent to the outbox and is then sent to the outbox
""" """
@ -1371,7 +1388,8 @@ def _receive_new_post_process_newreading(self, fields: {},
min_images_for_accounts, min_images_for_accounts,
max_hashtags, max_hashtags,
buy_sites, buy_sites,
auto_cw_cache) auto_cw_cache,
onion_domain, i2p_domain)
print('DEBUG: sending edited reading status post ' + print('DEBUG: sending edited reading status post ' +
str(message_json)) str(message_json))
if fields['schedulePost']: if fields['schedulePost']:
@ -1825,7 +1843,9 @@ def _receive_new_post_process(self, post_type: str, path: str, headers: {},
buy_sites, buy_sites,
project_version, project_version,
proxy_type, proxy_type,
max_replies) max_replies,
onion_domain,
i2p_domain)
if post_type == 'newblog': if post_type == 'newblog':
return _receive_new_post_process_newblog( return _receive_new_post_process_newblog(
self, fields, self, fields,
@ -1899,7 +1919,9 @@ def _receive_new_post_process(self, post_type: str, path: str, headers: {},
buy_sites, buy_sites,
project_version, project_version,
proxy_type, proxy_type,
max_replies) max_replies,
onion_domain,
i2p_domain)
if post_type == 'newfollowers': if post_type == 'newfollowers':
return _receive_new_post_process_newfollowers( return _receive_new_post_process_newfollowers(
self, fields, self, fields,
@ -1943,7 +1965,8 @@ def _receive_new_post_process(self, post_type: str, path: str, headers: {},
buy_sites, buy_sites,
project_version, project_version,
proxy_type, proxy_type,
max_replies) max_replies,
onion_domain, i2p_domain)
if post_type == 'newdm': if post_type == 'newdm':
return _receive_new_post_process_newdm( return _receive_new_post_process_newdm(
self, fields, self, fields,
@ -1988,7 +2011,9 @@ def _receive_new_post_process(self, post_type: str, path: str, headers: {},
buy_sites, buy_sites,
project_version, project_version,
proxy_type, proxy_type,
max_replies) max_replies,
onion_domain,
i2p_domain)
if post_type == 'newreminder': if post_type == 'newreminder':
return _receive_new_post_process_newreminder( return _receive_new_post_process_newreminder(
self, fields, self, fields,
@ -2032,7 +2057,8 @@ def _receive_new_post_process(self, post_type: str, path: str, headers: {},
max_hashtags, max_hashtags,
buy_sites, buy_sites,
project_version, project_version,
proxy_type) proxy_type,
onion_domain, i2p_domain)
if post_type == 'newreport': if post_type == 'newreport':
return _receive_new_post_process_newreport( return _receive_new_post_process_newreport(
self, fields, self, fields,
@ -2104,7 +2130,8 @@ def _receive_new_post_process(self, post_type: str, path: str, headers: {},
buy_sites, buy_sites,
project_version, project_version,
proxy_type, proxy_type,
max_replies) max_replies,
onion_domain, i2p_domain)
if post_type in ('newshare', 'newwanted'): if post_type in ('newshare', 'newwanted'):
return _receive_new_post_process_newshare( return _receive_new_post_process_newshare(
self, fields, self, fields,

View File

@ -24,7 +24,6 @@ from inbox import clear_queue_items
from blocking import update_blocked_cache from blocking import update_blocked_cache
from blocking import is_blocked_nickname from blocking import is_blocked_nickname
from blocking import is_blocked_domain from blocking import is_blocked_domain
from content import contains_invalid_local_links
from content import valid_url_lengths from content import valid_url_lengths
from posts import add_to_field from posts import add_to_field
from utils import get_instance_url from utils import get_instance_url
@ -508,13 +507,6 @@ def update_inbox_queue(self, nickname: str, message_json: {},
# save the json for later queue processing # save the json for later queue processing
message_bytes_decoded = message_bytes.decode('utf-8') message_bytes_decoded = message_bytes.decode('utf-8')
if debug:
print('INBOX: checking for invalid links')
if contains_invalid_local_links(message_bytes_decoded):
print('INBOX: post contains invalid local links ' +
str(original_message_json))
return 5
self.server.blocked_cache_last_updated = \ self.server.blocked_cache_last_updated = \
update_blocked_cache(self.server.base_dir, update_blocked_cache(self.server.base_dir,
self.server.blocked_cache, self.server.blocked_cache,

View File

@ -161,6 +161,7 @@ from webapp_hashtagswarm import html_hash_tag_swarm
from person import valid_sending_actor from person import valid_sending_actor
from person import get_person_avatar_url from person import get_person_avatar_url
from fitnessFunctions import fitness_performance from fitnessFunctions import fitness_performance
from content import contains_invalid_local_links
from content import reject_twitter_summary from content import reject_twitter_summary
from content import load_dogwhistles from content import load_dogwhistles
from content import valid_url_lengths from content import valid_url_lengths
@ -1425,7 +1426,8 @@ def _valid_post_content(base_dir: str, nickname: str, domain: str,
system_language: str, system_language: str,
http_prefix: str, domain_full: str, http_prefix: str, domain_full: str,
person_cache: {}, person_cache: {},
max_hashtags: int) -> bool: max_hashtags: int,
onion_domain: str, i2p_domain: str) -> bool:
"""Is the content of a received post valid? """Is the content of a received post valid?
Check for bad html Check for bad html
Check for hellthreads Check for hellthreads
@ -1538,6 +1540,15 @@ def _valid_post_content(base_dir: str, nickname: str, domain: str,
content_str) content_str)
return False return False
if contains_invalid_local_links(domain_full,
onion_domain, i2p_domain,
content_str):
if message_json['object'].get('id'):
print('REJECT: post contains invalid local links ' +
str(message_json['object']['id']) + ' ' +
str(content_str))
return False
# check (rough) number of mentions # check (rough) number of mentions
mentions_est = _estimate_number_of_mentions(content_str) mentions_est = _estimate_number_of_mentions(content_str)
if mentions_est > max_mentions: if mentions_est > max_mentions:
@ -1650,7 +1661,9 @@ def _receive_edit_to_post(recent_posts_cache: {}, message_json: {},
min_images_for_accounts: [], min_images_for_accounts: [],
max_hashtags: int, max_hashtags: int,
buy_sites: {}, buy_sites: {},
auto_cw_cache: {}) -> bool: auto_cw_cache: {},
onion_domain: str,
i2p_domain: str) -> bool:
"""A post was edited """A post was edited
""" """
if not has_object_dict(message_json): if not has_object_dict(message_json):
@ -1677,7 +1690,7 @@ def _receive_edit_to_post(recent_posts_cache: {}, message_json: {},
allow_local_network_access, debug, allow_local_network_access, debug,
system_language, http_prefix, system_language, http_prefix,
domain_full, person_cache, domain_full, person_cache,
max_hashtags): max_hashtags, onion_domain, i2p_domain):
print('EDITPOST: contains invalid content' + str(message_json)) print('EDITPOST: contains invalid content' + str(message_json))
return False return False
@ -1819,7 +1832,9 @@ def update_edited_post(base_dir: str,
min_images_for_accounts: [], min_images_for_accounts: [],
max_hashtags: int, max_hashtags: int,
buy_sites: {}, buy_sites: {},
auto_cw_cache: {}) -> None: auto_cw_cache: {},
onion_domain: str,
i2p_domain: str) -> None:
""" When an edited post is created this assigns """ When an edited post is created this assigns
a published and updated date to it, and uses a published and updated date to it, and uses
the previous id the previous id
@ -1868,7 +1883,8 @@ def update_edited_post(base_dir: str,
cw_lists, dogwhistles, cw_lists, dogwhistles,
min_images_for_accounts, min_images_for_accounts,
max_hashtags, buy_sites, max_hashtags, buy_sites,
auto_cw_cache) auto_cw_cache,
onion_domain, i2p_domain)
# update the index # update the index
id_str = edited_postid.split('/')[-1] id_str = edited_postid.split('/')[-1]
@ -2015,7 +2031,9 @@ def _receive_update_activity(recent_posts_cache: {}, session, base_dir: str,
min_images_for_accounts: [], min_images_for_accounts: [],
max_hashtags: int, max_hashtags: int,
buy_sites: {}, buy_sites: {},
auto_cw_cache: {}) -> bool: auto_cw_cache: {},
onion_domain: str,
i2p_domain: str) -> bool:
"""Receives an Update activity within the POST section of HTTPServer """Receives an Update activity within the POST section of HTTPServer
""" """
@ -2061,7 +2079,8 @@ def _receive_update_activity(recent_posts_cache: {}, session, base_dir: str,
cw_lists, dogwhistles, cw_lists, dogwhistles,
min_images_for_accounts, min_images_for_accounts,
max_hashtags, buy_sites, max_hashtags, buy_sites,
auto_cw_cache): auto_cw_cache,
onion_domain, i2p_domain):
print('EDITPOST: received ' + message_json['object']['id']) print('EDITPOST: received ' + message_json['object']['id'])
return True return True
else: else:
@ -4776,7 +4795,9 @@ def _former_representations_to_edits(base_dir: str,
http_prefix: str, http_prefix: str,
domain_full: str, person_cache: {}, domain_full: str, person_cache: {},
max_hashtags: int, max_hashtags: int,
port: int) -> bool: port: int,
onion_domain: str,
i2p_domain: str) -> bool:
""" Some instances use formerRepresentations to store """ Some instances use formerRepresentations to store
previous edits previous edits
""" """
@ -4834,7 +4855,7 @@ def _former_representations_to_edits(base_dir: str,
allow_local_network_access, debug, allow_local_network_access, debug,
system_language, http_prefix, system_language, http_prefix,
domain_full, person_cache, domain_full, person_cache,
max_hashtags): max_hashtags, onion_domain, i2p_domain):
continue continue
post_history_json[published_str] = prev_post_json post_history_json[published_str] = prev_post_json
@ -5235,7 +5256,7 @@ def _inbox_after_initial(server, inbox_start_time,
allow_local_network_access, debug, allow_local_network_access, debug,
system_language, http_prefix, system_language, http_prefix,
domain_full, person_cache, domain_full, person_cache,
max_hashtags): max_hashtags, onion_domain, i2p_domain):
fitness_performance(inbox_start_time, server.fitness, fitness_performance(inbox_start_time, server.fitness,
'INBOX', '_valid_post_content', 'INBOX', '_valid_post_content',
debug) debug)
@ -5452,7 +5473,9 @@ def _inbox_after_initial(server, inbox_start_time,
http_prefix, http_prefix,
domain_full, domain_full,
person_cache, person_cache,
max_hashtags, port): max_hashtags, port,
onion_domain,
i2p_domain):
# ensure that there is an updated entry # ensure that there is an updated entry
# for the publication date # for the publication date
if post_json_object['object'].get('published') and \ if post_json_object['object'].get('published') and \
@ -6719,7 +6742,9 @@ def run_inbox_queue(server,
cw_lists, dogwhistles, cw_lists, dogwhistles,
server.min_images_for_accounts, server.min_images_for_accounts,
max_hashtags, server.buy_sites, max_hashtags, server.buy_sites,
server.auto_cw_cache): server.auto_cw_cache,
onion_domain,
i2p_domain):
if debug: if debug:
print('Queue: Update accepted from ' + key_id) print('Queue: Update accepted from ' + key_id)
if os.path.isfile(queue_filename): if os.path.isfile(queue_filename):