Check for invalid local links at a later stage in the inbox queue

main
Bob Mottram 2024-06-12 11:24:59 +01:00
parent 5a71b8c406
commit 04118c02ba
4 changed files with 106 additions and 41 deletions

View File

@ -1906,12 +1906,33 @@ def words_similarity(content1: str, content2: str, min_words: int) -> int:
return 100 - int(diff * 100 / len(histogram1.items()))
def contains_invalid_local_links(content: str) -> bool:
def contains_invalid_local_links(domain_full: str,
onion_domain: str, i2p_domain: str,
content: str) -> bool:
"""Returns true if the given content has invalid links
"""
for inv_str in INVALID_CONTENT_STRINGS:
if '?' + inv_str + '=' in content:
return True
match_str = '?' + inv_str + '='
if match_str not in content:
continue
# extract the urls and check whether they are for the local domain
ctr = 0
sections = content.split(match_str)
final_section_index = len(sections) - 1
for section_str in sections:
if ctr == final_section_index:
continue
if '://' in section_str:
url = section_str.split('://')[-1]
if domain_full in url:
return True
if onion_domain:
if onion_domain in url:
return True
if i2p_domain:
if i2p_domain in url:
return True
ctr += 1
return False

View File

@ -107,7 +107,9 @@ def _receive_new_post_process_newpost(self, fields: {},
buy_sites: [],
project_version: str,
proxy_type: str,
max_replies: int) -> int:
max_replies: int,
onion_domain: str,
i2p_domain: str) -> int:
""" A new post has been received from the New Post screen and
is then sent to the outbox
"""
@ -204,7 +206,8 @@ def _receive_new_post_process_newpost(self, fields: {},
min_images_for_accounts,
max_hashtags,
buy_sites,
auto_cw_cache)
auto_cw_cache,
onion_domain, i2p_domain)
print('DEBUG: sending edited public post ' +
str(message_json))
if fields['schedulePost']:
@ -500,7 +503,9 @@ def _receive_new_post_process_newunlisted(self, fields: {},
buy_sites: [],
project_version: str,
proxy_type: str,
max_replies: int) -> int:
max_replies: int,
onion_domain: str,
i2p_domain: str) -> int:
"""Unlisted post has been received from New Post screen
and is then sent to the outbox
"""
@ -588,7 +593,8 @@ def _receive_new_post_process_newunlisted(self, fields: {},
min_images_for_accounts,
max_hashtags,
buy_sites,
auto_cw_cache)
auto_cw_cache,
onion_domain, i2p_domain)
print('DEBUG: sending edited unlisted post ' +
str(message_json))
@ -647,7 +653,9 @@ def _receive_new_post_process_newfollowers(self, fields: {},
buy_sites: [],
project_version: str,
proxy_type: str,
max_replies: int) -> int:
max_replies: int,
onion_domain: str,
i2p_domain: str) -> int:
"""Followers only post has been received from New Post screen
and is then sent to the outbox
"""
@ -740,7 +748,8 @@ def _receive_new_post_process_newfollowers(self, fields: {},
min_images_for_accounts,
max_hashtags,
buy_sites,
auto_cw_cache)
auto_cw_cache,
onion_domain, i2p_domain)
print('DEBUG: sending edited followers post ' +
str(message_json))
@ -800,7 +809,9 @@ def _receive_new_post_process_newdm(self, fields: {},
buy_sites: [],
project_version: str,
proxy_type: str,
max_replies: int) -> int:
max_replies: int,
onion_domain: str,
i2p_domain: str) -> int:
"""Direct message post has been received from New Post screen
and is then sent to the outbox
"""
@ -905,7 +916,8 @@ def _receive_new_post_process_newdm(self, fields: {},
min_images_for_accounts,
max_hashtags,
buy_sites,
auto_cw_cache)
auto_cw_cache,
onion_domain, i2p_domain)
print('DEBUG: sending edited dm post ' +
str(message_json))
@ -965,7 +977,9 @@ def _receive_new_post_process_newreminder(self, fields: {}, nickname: str,
max_hashtags: int,
buy_sites: [],
project_version: str,
proxy_type: str) -> int:
proxy_type: str,
onion_domain: str,
i2p_domain: str) -> int:
"""Reminder post has been received from New Post screen
and is then sent to the outbox
"""
@ -1063,7 +1077,8 @@ def _receive_new_post_process_newreminder(self, fields: {}, nickname: str,
min_images_for_accounts,
max_hashtags,
buy_sites,
auto_cw_cache)
auto_cw_cache,
onion_domain, i2p_domain)
print('DEBUG: sending edited reminder post ' +
str(message_json))
if post_to_outbox(self, message_json,
@ -1265,7 +1280,9 @@ def _receive_new_post_process_newreading(self, fields: {},
buy_sites: [],
project_version: str,
proxy_type: str,
max_replies: int) -> int:
max_replies: int,
onion_domain: str,
i2p_domain: str) -> int:
"""Reading status post has been received from New Post screen
and is then sent to the outbox
"""
@ -1371,7 +1388,8 @@ def _receive_new_post_process_newreading(self, fields: {},
min_images_for_accounts,
max_hashtags,
buy_sites,
auto_cw_cache)
auto_cw_cache,
onion_domain, i2p_domain)
print('DEBUG: sending edited reading status post ' +
str(message_json))
if fields['schedulePost']:
@ -1825,7 +1843,9 @@ def _receive_new_post_process(self, post_type: str, path: str, headers: {},
buy_sites,
project_version,
proxy_type,
max_replies)
max_replies,
onion_domain,
i2p_domain)
if post_type == 'newblog':
return _receive_new_post_process_newblog(
self, fields,
@ -1899,7 +1919,9 @@ def _receive_new_post_process(self, post_type: str, path: str, headers: {},
buy_sites,
project_version,
proxy_type,
max_replies)
max_replies,
onion_domain,
i2p_domain)
if post_type == 'newfollowers':
return _receive_new_post_process_newfollowers(
self, fields,
@ -1943,7 +1965,8 @@ def _receive_new_post_process(self, post_type: str, path: str, headers: {},
buy_sites,
project_version,
proxy_type,
max_replies)
max_replies,
onion_domain, i2p_domain)
if post_type == 'newdm':
return _receive_new_post_process_newdm(
self, fields,
@ -1988,7 +2011,9 @@ def _receive_new_post_process(self, post_type: str, path: str, headers: {},
buy_sites,
project_version,
proxy_type,
max_replies)
max_replies,
onion_domain,
i2p_domain)
if post_type == 'newreminder':
return _receive_new_post_process_newreminder(
self, fields,
@ -2032,7 +2057,8 @@ def _receive_new_post_process(self, post_type: str, path: str, headers: {},
max_hashtags,
buy_sites,
project_version,
proxy_type)
proxy_type,
onion_domain, i2p_domain)
if post_type == 'newreport':
return _receive_new_post_process_newreport(
self, fields,
@ -2104,7 +2130,8 @@ def _receive_new_post_process(self, post_type: str, path: str, headers: {},
buy_sites,
project_version,
proxy_type,
max_replies)
max_replies,
onion_domain, i2p_domain)
if post_type in ('newshare', 'newwanted'):
return _receive_new_post_process_newshare(
self, fields,

View File

@ -24,7 +24,6 @@ from inbox import clear_queue_items
from blocking import update_blocked_cache
from blocking import is_blocked_nickname
from blocking import is_blocked_domain
from content import contains_invalid_local_links
from content import valid_url_lengths
from posts import add_to_field
from utils import get_instance_url
@ -508,13 +507,6 @@ def update_inbox_queue(self, nickname: str, message_json: {},
# save the json for later queue processing
message_bytes_decoded = message_bytes.decode('utf-8')
if debug:
print('INBOX: checking for invalid links')
if contains_invalid_local_links(message_bytes_decoded):
print('INBOX: post contains invalid local links ' +
str(original_message_json))
return 5
self.server.blocked_cache_last_updated = \
update_blocked_cache(self.server.base_dir,
self.server.blocked_cache,

View File

@ -161,6 +161,7 @@ from webapp_hashtagswarm import html_hash_tag_swarm
from person import valid_sending_actor
from person import get_person_avatar_url
from fitnessFunctions import fitness_performance
from content import contains_invalid_local_links
from content import reject_twitter_summary
from content import load_dogwhistles
from content import valid_url_lengths
@ -1425,7 +1426,8 @@ def _valid_post_content(base_dir: str, nickname: str, domain: str,
system_language: str,
http_prefix: str, domain_full: str,
person_cache: {},
max_hashtags: int) -> bool:
max_hashtags: int,
onion_domain: str, i2p_domain: str) -> bool:
"""Is the content of a received post valid?
Check for bad html
Check for hellthreads
@ -1538,6 +1540,15 @@ def _valid_post_content(base_dir: str, nickname: str, domain: str,
content_str)
return False
if contains_invalid_local_links(domain_full,
onion_domain, i2p_domain,
content_str):
if message_json['object'].get('id'):
print('REJECT: post contains invalid local links ' +
str(message_json['object']['id']) + ' ' +
str(content_str))
return False
# check (rough) number of mentions
mentions_est = _estimate_number_of_mentions(content_str)
if mentions_est > max_mentions:
@ -1650,7 +1661,9 @@ def _receive_edit_to_post(recent_posts_cache: {}, message_json: {},
min_images_for_accounts: [],
max_hashtags: int,
buy_sites: {},
auto_cw_cache: {}) -> bool:
auto_cw_cache: {},
onion_domain: str,
i2p_domain: str) -> bool:
"""A post was edited
"""
if not has_object_dict(message_json):
@ -1677,7 +1690,7 @@ def _receive_edit_to_post(recent_posts_cache: {}, message_json: {},
allow_local_network_access, debug,
system_language, http_prefix,
domain_full, person_cache,
max_hashtags):
max_hashtags, onion_domain, i2p_domain):
print('EDITPOST: contains invalid content' + str(message_json))
return False
@ -1819,7 +1832,9 @@ def update_edited_post(base_dir: str,
min_images_for_accounts: [],
max_hashtags: int,
buy_sites: {},
auto_cw_cache: {}) -> None:
auto_cw_cache: {},
onion_domain: str,
i2p_domain: str) -> None:
""" When an edited post is created this assigns
a published and updated date to it, and uses
the previous id
@ -1868,7 +1883,8 @@ def update_edited_post(base_dir: str,
cw_lists, dogwhistles,
min_images_for_accounts,
max_hashtags, buy_sites,
auto_cw_cache)
auto_cw_cache,
onion_domain, i2p_domain)
# update the index
id_str = edited_postid.split('/')[-1]
@ -2015,7 +2031,9 @@ def _receive_update_activity(recent_posts_cache: {}, session, base_dir: str,
min_images_for_accounts: [],
max_hashtags: int,
buy_sites: {},
auto_cw_cache: {}) -> bool:
auto_cw_cache: {},
onion_domain: str,
i2p_domain: str) -> bool:
"""Receives an Update activity within the POST section of HTTPServer
"""
@ -2061,7 +2079,8 @@ def _receive_update_activity(recent_posts_cache: {}, session, base_dir: str,
cw_lists, dogwhistles,
min_images_for_accounts,
max_hashtags, buy_sites,
auto_cw_cache):
auto_cw_cache,
onion_domain, i2p_domain):
print('EDITPOST: received ' + message_json['object']['id'])
return True
else:
@ -4776,7 +4795,9 @@ def _former_representations_to_edits(base_dir: str,
http_prefix: str,
domain_full: str, person_cache: {},
max_hashtags: int,
port: int) -> bool:
port: int,
onion_domain: str,
i2p_domain: str) -> bool:
""" Some instances use formerRepresentations to store
previous edits
"""
@ -4834,7 +4855,7 @@ def _former_representations_to_edits(base_dir: str,
allow_local_network_access, debug,
system_language, http_prefix,
domain_full, person_cache,
max_hashtags):
max_hashtags, onion_domain, i2p_domain):
continue
post_history_json[published_str] = prev_post_json
@ -5235,7 +5256,7 @@ def _inbox_after_initial(server, inbox_start_time,
allow_local_network_access, debug,
system_language, http_prefix,
domain_full, person_cache,
max_hashtags):
max_hashtags, onion_domain, i2p_domain):
fitness_performance(inbox_start_time, server.fitness,
'INBOX', '_valid_post_content',
debug)
@ -5452,7 +5473,9 @@ def _inbox_after_initial(server, inbox_start_time,
http_prefix,
domain_full,
person_cache,
max_hashtags, port):
max_hashtags, port,
onion_domain,
i2p_domain):
# ensure that there is an updated entry
# for the publication date
if post_json_object['object'].get('published') and \
@ -6719,7 +6742,9 @@ def run_inbox_queue(server,
cw_lists, dogwhistles,
server.min_images_for_accounts,
max_hashtags, server.buy_sites,
server.auto_cw_cache):
server.auto_cw_cache,
onion_domain,
i2p_domain):
if debug:
print('Queue: Update accepted from ' + key_id)
if os.path.isfile(queue_filename):