From 973e2e1d138c0433bdeb577f5af8d5c1d0276cb3 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 23 Apr 2022 19:26:54 +0100 Subject: [PATCH 01/17] Profiling for inbox --- daemon.py | 4 ++ inbox.py | 175 +++++++++++++++++++++++++++++++++++++++++++++- webapp_profile.py | 2 + 3 files changed, 179 insertions(+), 2 deletions(-) diff --git a/daemon.py b/daemon.py index 62a12a720..88009d8bd 100644 --- a/daemon.py +++ b/daemon.py @@ -15710,6 +15710,8 @@ class PubServer(BaseHTTPRequestHandler): if html_getreq and not graph.endswith('.json'): if graph == 'post': graph = '_POST' + elif graph == 'inbox': + graph = 'INBOX' elif graph == 'get': graph = '_GET' msg = \ @@ -15728,6 +15730,8 @@ class PubServer(BaseHTTPRequestHandler): graph = graph.replace('.json', '') if graph == 'post': graph = '_POST' + elif graph == 'inbox': + graph = 'INBOX' elif graph == 'get': graph = '_GET' watch_points_json = \ diff --git a/inbox.py b/inbox.py index 24ed210ec..ec155986c 100644 --- a/inbox.py +++ b/inbox.py @@ -125,6 +125,7 @@ from notifyOnPost import notify_when_person_posts from conversation import update_conversation from webapp_hashtagswarm import html_hash_tag_swarm from person import valid_sending_actor +from fitnessFunctions import fitness_performance def _store_last_post_id(base_dir: str, nickname: str, domain: str, @@ -3526,7 +3527,7 @@ def _check_for_git_patches(base_dir: str, nickname: str, domain: str, return 0 -def _inbox_after_initial(server, +def _inbox_after_initial(server, inbox_start_time, recent_posts_cache: {}, max_recent_posts: int, session, session_onion, session_i2p, key_id: str, handle: str, message_json: {}, @@ -3602,6 +3603,9 @@ def _inbox_after_initial(server, bold_reading): if debug: print('DEBUG: Like accepted from ' + actor) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', '_receive_like', + debug) return False if _receive_undo_like(recent_posts_cache, @@ -3625,6 +3629,9 @@ def _inbox_after_initial(server, bold_reading): if debug: print('DEBUG: Undo like accepted from ' + actor) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', '_receive_undo_like', + debug) return False if _receive_reaction(recent_posts_cache, @@ -3649,6 +3656,9 @@ def _inbox_after_initial(server, bold_reading): if debug: print('DEBUG: Reaction accepted from ' + actor) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', '_receive_reaction', + debug) return False if _receive_undo_reaction(recent_posts_cache, @@ -3672,6 +3682,9 @@ def _inbox_after_initial(server, bold_reading): if debug: print('DEBUG: Undo reaction accepted from ' + actor) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', '_receive_undo_reaction', + debug) return False if _receive_bookmark(recent_posts_cache, @@ -3695,6 +3708,9 @@ def _inbox_after_initial(server, bold_reading): if debug: print('DEBUG: Bookmark accepted from ' + actor) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', '_receive_bookmark', + debug) return False if _receive_undo_bookmark(recent_posts_cache, @@ -3718,6 +3734,9 @@ def _inbox_after_initial(server, bold_reading): if debug: print('DEBUG: Undo bookmark accepted from ' + actor) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', '_receive_undo_bookmark', + debug) return False if is_create_inside_announce(message_json): @@ -3745,6 +3764,9 @@ def _inbox_after_initial(server, bold_reading): if debug: print('DEBUG: Announce accepted from ' + actor) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', '_receive_announce', + debug) if _receive_undo_announce(recent_posts_cache, session, handle, is_group, @@ -3758,6 +3780,9 @@ def _inbox_after_initial(server, debug): if debug: print('DEBUG: Undo announce accepted from ' + actor) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', '_receive_undo_announce', + debug) return False if _receive_delete(session, handle, is_group, @@ -3772,6 +3797,9 @@ def _inbox_after_initial(server, recent_posts_cache): if debug: print('DEBUG: Delete accepted from ' + actor) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', '_receive_delete', + debug) return False if debug: @@ -3795,6 +3823,9 @@ def _inbox_after_initial(server, allow_local_network_access, debug, system_language, http_prefix, domain_full, person_cache): + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', '_valid_post_content', + debug) # is the sending actor valid? if not valid_sending_actor(session, base_dir, nickname, domain, person_cache, post_json_object, @@ -3802,7 +3833,13 @@ def _inbox_after_initial(server, if debug: print('Inbox sending actor is not valid ' + str(post_json_object)) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', 'not_valid_sending_actor', + debug) return False + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', 'valid_sending_actor', + debug) if post_json_object.get('object'): json_obj = post_json_object['object'] @@ -3813,19 +3850,31 @@ def _inbox_after_initial(server, if _check_for_git_patches(base_dir, nickname, domain, handle, json_obj) == 2: + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', '_check_for_git_patches', + debug) return False # replace YouTube links, so they get less tracking data replace_you_tube(post_json_object, yt_replace_domain, system_language) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', 'replace_you_tube', + debug) # replace twitter link domains, so that you can view twitter posts # without having an account replace_twitter(post_json_object, twitter_replacement_domain, system_language) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', 'replace_you_twitter', + debug) # list of indexes to be updated update_index_list = ['inbox'] populate_replies(base_dir, http_prefix, domain, post_json_object, max_replies, debug) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', 'populate_replies', + debug) _receive_question_vote(server, base_dir, nickname, domain, http_prefix, handle, debug, @@ -3845,6 +3894,9 @@ def _inbox_after_initial(server, max_like_count, cw_lists, lists_enabled, bold_reading) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', '_receive_question_vote', + debug) is_reply_to_muted_post = False @@ -3869,6 +3921,9 @@ def _inbox_after_initial(server, onion_domain, i2p_domain): if debug: print('Invalid DM ' + str(post_json_object)) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', '_is_valid_dm', + debug) return False # get the actor being replied to @@ -3881,6 +3936,9 @@ def _inbox_after_initial(server, post_json_object, actor, update_index_list, http_prefix, default_reply_interval_hrs) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', '_create_reply_notification_file', + debug) if is_image_media(session, base_dir, http_prefix, nickname, domain, post_json_object, @@ -3893,6 +3951,9 @@ def _inbox_after_initial(server, bold_reading): # media index will be updated update_index_list.append('tlmedia') + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', 'is_image_media', + debug) if is_blog_post(post_json_object): # blogs index will be updated update_index_list.append('tlblogs') @@ -3903,9 +3964,15 @@ def _inbox_after_initial(server, onion_domain, i2p_domain, person_cache, post_json_object, debug, signing_priv_key_pem) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', '_obtain_avatar_for_reply_post', + debug) # save the post to file if save_json(post_json_object, destination_filename): + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', 'save_json', + debug) if mitm: # write a file to indicate that this post was delivered # via a third party @@ -3920,6 +3987,9 @@ def _inbox_after_initial(server, _low_frequency_post_notification(base_dir, http_prefix, nickname, domain, port, handle, post_is_dm, json_obj) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', '_low_frequency_post_notification', + debug) # If this is a reply to a muted post then also mute it. # This enables you to ignore a threat that's getting boring @@ -3939,6 +4009,9 @@ def _inbox_after_initial(server, edited_post_filename(base_dir, handle_name, domain, post_json_object, debug, 300, system_language) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', 'edited_post_filename', + debug) # If this was an edit then update the edits json file and # delete the previous version of the post @@ -3981,6 +4054,10 @@ def _inbox_after_initial(server, for boxname in update_index_list: if not inbox_update_index(boxname, base_dir, handle, destination_filename, debug): + fitness_performance(inbox_start_time, + server.fitness, + 'INBOX', 'inbox_update_index', + debug) print('ERROR: unable to update ' + boxname + ' index') else: if boxname == 'inbox': @@ -3990,6 +4067,10 @@ def _inbox_after_initial(server, nickname, domain, domain_full, post_json_object, person_cache, translate, None, theme_name) + fitness_performance(inbox_start_time, + server.fitness, + 'INBOX', 'update_speaker', + debug) if not unit_test: if debug: print('Saving inbox post as html to cache') @@ -3999,6 +4080,10 @@ def _inbox_after_initial(server, show_pub_date_only = show_published_date_only timezone = \ get_account_timezone(base_dir, handle_name, domain) + fitness_performance(inbox_start_time, + server.fitness, + 'INBOX', 'get_account_timezone', + debug) _inbox_store_post_to_html_cache(recent_posts_cache, max_recent_posts, translate, base_dir, @@ -4022,6 +4107,11 @@ def _inbox_after_initial(server, lists_enabled, timezone, mitm, bold_reading) + fitness_performance(inbox_start_time, + server.fitness, + 'INBOX', + '_inbox_store_post_to_html_cache', + debug) if debug: time_diff = \ str(int((time.time() - html_cache_start_time) * @@ -4032,15 +4122,31 @@ def _inbox_after_initial(server, update_conversation(base_dir, handle_name, domain, post_json_object) + fitness_performance(inbox_start_time, + server.fitness, + 'INBOX', 'update_conversation', + debug) # store the id of the last post made by this actor _store_last_post_id(base_dir, nickname, domain, post_json_object) + fitness_performance(inbox_start_time, + server.fitness, + 'INBOX', '_store_last_post_id', + debug) _inbox_update_calendar(base_dir, handle, post_json_object) + fitness_performance(inbox_start_time, + server.fitness, + 'INBOX', '_inbox_update_calendar', + debug) store_hash_tags(base_dir, handle_name, domain, http_prefix, domain_full, post_json_object, translate) + fitness_performance(inbox_start_time, + server.fitness, + 'INBOX', 'store_hash_tags', + debug) # send the post out to group members if is_group: @@ -4055,6 +4161,10 @@ def _inbox_after_initial(server, debug, system_language, domain, onion_domain, i2p_domain, signing_priv_key_pem) + fitness_performance(inbox_start_time, + server.fitness, + 'INBOX', '_send_to_group_members', + debug) else: if debug: print("Inbox post is not valid " + str(post_json_object)) @@ -4064,6 +4174,10 @@ def _inbox_after_initial(server, if debug: print("Inbox post was not saved " + destination_filename) return False + fitness_performance(inbox_start_time, + server.fitness, + 'INBOX', 'end_inbox_after_initial', + debug) return True @@ -4588,7 +4702,11 @@ def run_inbox_queue(server, """Processes received items and moves them to the appropriate directories """ + inbox_start_time = time.time() print('Starting new session when starting inbox queue') + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', 'start', debug) + curr_session_time = int(time.time()) session_last_update = 0 session = create_session(proxy_type) @@ -4619,6 +4737,8 @@ def run_inbox_queue(server, # if queue processing was interrupted (eg server crash) # then this loads any outstanding items back into the queue _restore_queue_items(base_dir, queue) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', '_restore_queue_items', debug) # keep track of numbers of incoming posts per day quotas_last_update_daily = int(time.time()) @@ -4643,8 +4763,12 @@ def run_inbox_queue(server, # how long it takes for broch mode to lapse broch_lapse_days = random.randrange(7, 14) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', 'while_loop_start', debug) while True: time.sleep(1) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', 'while_loop_itteration', debug) # heartbeat to monitor whether the inbox queue is running heart_beat_ctr += 1 @@ -4652,6 +4776,8 @@ def run_inbox_queue(server, # turn off broch mode after it has timed out if broch_modeLapses(base_dir, broch_lapse_days): broch_lapse_days = random.randrange(7, 14) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', 'broch_modeLapses', debug) print('>>> Heartbeat Q:' + str(len(queue)) + ' ' + '{:%F %T}'.format(datetime.datetime.now())) heart_beat_ctr = 0 @@ -4662,6 +4788,8 @@ def run_inbox_queue(server, if queue_restore_ctr >= 30: queue_restore_ctr = 0 _restore_queue_items(base_dir, queue) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', 'restore_queue', debug) continue # oldest item first @@ -4679,6 +4807,8 @@ def run_inbox_queue(server, # Load the queue json queue_json = load_json(queue_filename, 1) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', 'load_queue_json', debug) if not queue_json: print('Queue: run_inbox_queue failed to load inbox queue item ' + queue_filename) @@ -4722,6 +4852,8 @@ def run_inbox_queue(server, domain_max_posts_per_day, account_max_posts_per_day, debug): continue + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', '_inbox_quota_exceeded', debug) # recreate the session periodically if not session or curr_time - session_last_update > 21600: @@ -4751,6 +4883,8 @@ def run_inbox_queue(server, else: print('WARN: inbox i2p session not created') continue + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', 'recreate_session', debug) curr_session = session curr_proxy_type = proxy_type @@ -4769,6 +4903,8 @@ def run_inbox_queue(server, if debug and queue_json.get('actor'): print('Obtaining public key for actor ' + queue_json['actor']) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', 'start_get_pubkey', debug) # Try a few times to obtain the public key pub_key = None key_id = None @@ -4793,6 +4929,8 @@ def run_inbox_queue(server, project_version, http_prefix, domain, onion_domain, i2p_domain, signing_priv_key_pem) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', 'get_person_pub_key', debug) if pub_key: if debug: print('DEBUG: public key: ' + str(pub_key)) @@ -4817,6 +4955,8 @@ def run_inbox_queue(server, continue # check the http header signature + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', 'begin_check_signature', debug) if debug: print('DEBUG: checking http header signature') pprint(queue_json['httpHeaders']) @@ -4833,10 +4973,14 @@ def run_inbox_queue(server, else: if debug: print('DEBUG: http header signature check success') + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', 'verify_post_headers', debug) # check if a json signature exists on this post has_json_signature, jwebsig_type = \ _check_json_signature(base_dir, queue_json) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', '_check_json_signature', debug) # strict enforcement of json signatures if not has_json_signature: @@ -4887,6 +5031,9 @@ def run_inbox_queue(server, str(queue_filename)) if len(queue) > 0: queue.pop(0) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', 'not_verify_signature', + debug) continue else: if http_signature_failed: @@ -4894,6 +5041,9 @@ def run_inbox_queue(server, 'via relay ' + key_id) else: print('jsonld inbox signature check success ' + key_id) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', 'verify_signature_success', + debug) # set the id to the same as the post filename # This makes the filename and the id consistent @@ -4917,6 +5067,9 @@ def run_inbox_queue(server, str(queue_filename)) if len(queue) > 0: queue.pop(0) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', '_receive_undo', + debug) continue if debug: @@ -4942,6 +5095,9 @@ def run_inbox_queue(server, queue.pop(0) print('Queue: Follow activity for ' + key_id + ' removed from queue') + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', '_receive_follow_request', + debug) continue else: if debug: @@ -4963,6 +5119,9 @@ def run_inbox_queue(server, str(queue_filename)) if len(queue) > 0: queue.pop(0) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', 'receive_accept_reject', + debug) continue if _receive_update_activity(recent_posts_cache, curr_session, @@ -4997,6 +5156,9 @@ def run_inbox_queue(server, str(queue_filename)) if len(queue) > 0: queue.pop(0) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', '_receive_update_activity', + debug) continue # get recipients list @@ -5017,6 +5179,9 @@ def run_inbox_queue(server, str(queue_filename)) if len(queue) > 0: queue.pop(0) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', '_inbox_post_recipients', + debug) continue # if there are only a small number of followers then @@ -5050,6 +5215,9 @@ def run_inbox_queue(server, queue_json['destination'].replace(inbox_handle, inbox_handle) if not os.path.isfile(shared_inbox_post_filename): save_json(queue_json['post'], shared_inbox_post_filename) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', 'shared_inbox_save', + debug) lists_enabled = get_config_param(base_dir, "listsEnabled") content_license_url = get_config_param(base_dir, "contentLicenseUrl") @@ -5067,7 +5235,7 @@ def run_inbox_queue(server, base_dir + '/accounts/' + handle + '/.boldReading' if os.path.isfile(bold_reading_filename): bold_reading = True - _inbox_after_initial(server, + _inbox_after_initial(server, inbox_start_time, recent_posts_cache, max_recent_posts, session, session_onion, session_i2p, @@ -5100,6 +5268,9 @@ def run_inbox_queue(server, content_license_url, languages_understood, mitm, bold_reading) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', 'inbox_after_initial', + debug) if debug: pprint(queue_json['post']) print('Queue: Queue post accepted') diff --git a/webapp_profile.py b/webapp_profile.py index db41e45e7..9eeb71b7d 100644 --- a/webapp_profile.py +++ b/webapp_profile.py @@ -1551,6 +1551,8 @@ def _html_system_monitor(nickname: str, translate: {}) -> str: system_monitor_str = begin_edit_section(translate['System Monitor']) system_monitor_str += '

📊 GET

' + system_monitor_str += '

📊 INBOX

' system_monitor_str += '

📊 POST

' system_monitor_str += end_edit_section() From 9733f6f9defafcd5f558d8bbb50f38877bddd4b5 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 23 Apr 2022 19:41:17 +0100 Subject: [PATCH 02/17] Reset start time --- inbox.py | 1 + 1 file changed, 1 insertion(+) diff --git a/inbox.py b/inbox.py index ec155986c..ecd355264 100644 --- a/inbox.py +++ b/inbox.py @@ -4767,6 +4767,7 @@ def run_inbox_queue(server, 'INBOX', 'while_loop_start', debug) while True: time.sleep(1) + inbox_start_time = time.time() fitness_performance(inbox_start_time, server.fitness, 'INBOX', 'while_loop_itteration', debug) From d4bde42c90b4f6f6b1c4c0d84084877eccd04b2f Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 23 Apr 2022 20:05:45 +0100 Subject: [PATCH 03/17] More profiling --- inbox.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/inbox.py b/inbox.py index ecd355264..9a38a0e73 100644 --- a/inbox.py +++ b/inbox.py @@ -3921,13 +3921,16 @@ def _inbox_after_initial(server, inbox_start_time, onion_domain, i2p_domain): if debug: print('Invalid DM ' + str(post_json_object)) - fitness_performance(inbox_start_time, server.fitness, - 'INBOX', '_is_valid_dm', - debug) return False + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', '_is_valid_dm', + debug) # get the actor being replied to actor = local_actor_url(http_prefix, nickname, domain_full) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', 'local_actor_url', + debug) # create a reply notification file if needed is_reply_to_muted_post = \ From 999cd953695532bb388243167afaa200313fcacc Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 23 Apr 2022 20:09:02 +0100 Subject: [PATCH 04/17] More profiling --- inbox.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/inbox.py b/inbox.py index 9a38a0e73..9c2eb3c27 100644 --- a/inbox.py +++ b/inbox.py @@ -4055,6 +4055,10 @@ def _inbox_after_initial(server, inbox_start_time, # update the indexes for different timelines for boxname in update_index_list: + fitness_performance(inbox_start_time, + server.fitness, + 'INBOX', 'box_' + boxname, + debug) if not inbox_update_index(boxname, base_dir, handle, destination_filename, debug): fitness_performance(inbox_start_time, From a1cfa0cec6334da76205f5717ae21386b8a91da6 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 23 Apr 2022 20:26:46 +0100 Subject: [PATCH 05/17] More profiling --- inbox.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/inbox.py b/inbox.py index 9c2eb3c27..33ec3c55a 100644 --- a/inbox.py +++ b/inbox.py @@ -5187,10 +5187,10 @@ def run_inbox_queue(server, str(queue_filename)) if len(queue) > 0: queue.pop(0) - fitness_performance(inbox_start_time, server.fitness, - 'INBOX', '_inbox_post_recipients', - debug) continue + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', '_post_recipients', + debug) # if there are only a small number of followers then # process them as if they were specifically @@ -5230,6 +5230,10 @@ def run_inbox_queue(server, lists_enabled = get_config_param(base_dir, "listsEnabled") content_license_url = get_config_param(base_dir, "contentLicenseUrl") + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', 'distribute_post', + debug) + # for posts addressed to specific accounts for handle, _ in recipients_dict.items(): destination = \ From 9569969c30e1bba40f7ebd538aea13b6cf4b1b9e Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 23 Apr 2022 20:47:40 +0100 Subject: [PATCH 06/17] Sort with more preceding zeros --- fitnessFunctions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fitnessFunctions.py b/fitnessFunctions.py index 0845eba13..bebba2a09 100644 --- a/fitnessFunctions.py +++ b/fitnessFunctions.py @@ -59,8 +59,8 @@ def sorted_watch_points(fitness: {}, fitness_id: str) -> []: for watch_point, item in fitness['performance'][fitness_id].items(): if not item.get('total'): continue - average_time = item['total'] * 1000 / item['ctr'] - average_time_str = str(average_time).zfill(8) + average_time = int(item['total'] * 1000 / item['ctr']) + average_time_str = str(average_time).zfill(16) result.append(average_time_str + ' ' + watch_point) result.sort(reverse=True) return result From 2983d9c8bc3a00a5e6ba5f4d8fb62fc9de1639d1 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 23 Apr 2022 20:55:30 +0100 Subject: [PATCH 07/17] Change profile names --- inbox.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inbox.py b/inbox.py index 33ec3c55a..df62d6fcd 100644 --- a/inbox.py +++ b/inbox.py @@ -4183,7 +4183,7 @@ def _inbox_after_initial(server, inbox_start_time, return False fitness_performance(inbox_start_time, server.fitness, - 'INBOX', 'end_inbox_after_initial', + 'INBOX', 'end_after_initial', debug) return True @@ -5281,7 +5281,7 @@ def run_inbox_queue(server, languages_understood, mitm, bold_reading) fitness_performance(inbox_start_time, server.fitness, - 'INBOX', 'inbox_after_initial', + 'INBOX', 'handle_after_initial', debug) if debug: pprint(queue_json['post']) From 85617c7d5500552d7cc387891055dd17a6284c0a Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 23 Apr 2022 21:06:38 +0100 Subject: [PATCH 08/17] More profiling --- inbox.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/inbox.py b/inbox.py index df62d6fcd..9518d4b06 100644 --- a/inbox.py +++ b/inbox.py @@ -3578,6 +3578,9 @@ def _inbox_after_initial(server, inbox_start_time, post_is_dm = False is_group = _group_handle(base_dir, handle) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', '_group_handle', + debug) handle_name = handle.split('@')[0] @@ -3741,6 +3744,9 @@ def _inbox_after_initial(server, inbox_start_time, if is_create_inside_announce(message_json): message_json = message_json['object'] + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', 'is_create_inside_announce', + debug) if _receive_announce(recent_posts_cache, session, handle, is_group, @@ -4175,6 +4181,9 @@ def _inbox_after_initial(server, inbox_start_time, else: if debug: print("Inbox post is not valid " + str(post_json_object)) + fitness_performance(inbox_start_time, server.fitness, + 'INBOX', 'invalid_post', + debug) # if the post wasn't saved if not os.path.isfile(destination_filename): From 95c709c7ca53124f7eca427a9ab53267a4806c14 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 23 Apr 2022 22:20:20 +0100 Subject: [PATCH 09/17] More profiling --- inbox.py | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/inbox.py b/inbox.py index 9518d4b06..daaec9115 100644 --- a/inbox.py +++ b/inbox.py @@ -3581,6 +3581,7 @@ def _inbox_after_initial(server, inbox_start_time, fitness_performance(inbox_start_time, server.fitness, 'INBOX', '_group_handle', debug) + inbox_start_time = time.time() handle_name = handle.split('@')[0] @@ -3609,6 +3610,7 @@ def _inbox_after_initial(server, inbox_start_time, fitness_performance(inbox_start_time, server.fitness, 'INBOX', '_receive_like', debug) + inbox_start_time = time.time() return False if _receive_undo_like(recent_posts_cache, @@ -3635,6 +3637,7 @@ def _inbox_after_initial(server, inbox_start_time, fitness_performance(inbox_start_time, server.fitness, 'INBOX', '_receive_undo_like', debug) + inbox_start_time = time.time() return False if _receive_reaction(recent_posts_cache, @@ -3662,6 +3665,7 @@ def _inbox_after_initial(server, inbox_start_time, fitness_performance(inbox_start_time, server.fitness, 'INBOX', '_receive_reaction', debug) + inbox_start_time = time.time() return False if _receive_undo_reaction(recent_posts_cache, @@ -3688,6 +3692,7 @@ def _inbox_after_initial(server, inbox_start_time, fitness_performance(inbox_start_time, server.fitness, 'INBOX', '_receive_undo_reaction', debug) + inbox_start_time = time.time() return False if _receive_bookmark(recent_posts_cache, @@ -3714,6 +3719,7 @@ def _inbox_after_initial(server, inbox_start_time, fitness_performance(inbox_start_time, server.fitness, 'INBOX', '_receive_bookmark', debug) + inbox_start_time = time.time() return False if _receive_undo_bookmark(recent_posts_cache, @@ -3740,6 +3746,7 @@ def _inbox_after_initial(server, inbox_start_time, fitness_performance(inbox_start_time, server.fitness, 'INBOX', '_receive_undo_bookmark', debug) + inbox_start_time = time.time() return False if is_create_inside_announce(message_json): @@ -3747,6 +3754,7 @@ def _inbox_after_initial(server, inbox_start_time, fitness_performance(inbox_start_time, server.fitness, 'INBOX', 'is_create_inside_announce', debug) + inbox_start_time = time.time() if _receive_announce(recent_posts_cache, session, handle, is_group, @@ -3773,6 +3781,7 @@ def _inbox_after_initial(server, inbox_start_time, fitness_performance(inbox_start_time, server.fitness, 'INBOX', '_receive_announce', debug) + inbox_start_time = time.time() if _receive_undo_announce(recent_posts_cache, session, handle, is_group, @@ -3789,6 +3798,7 @@ def _inbox_after_initial(server, inbox_start_time, fitness_performance(inbox_start_time, server.fitness, 'INBOX', '_receive_undo_announce', debug) + inbox_start_time = time.time() return False if _receive_delete(session, handle, is_group, @@ -3806,6 +3816,7 @@ def _inbox_after_initial(server, inbox_start_time, fitness_performance(inbox_start_time, server.fitness, 'INBOX', '_receive_delete', debug) + inbox_start_time = time.time() return False if debug: @@ -3832,6 +3843,7 @@ def _inbox_after_initial(server, inbox_start_time, fitness_performance(inbox_start_time, server.fitness, 'INBOX', '_valid_post_content', debug) + inbox_start_time = time.time() # is the sending actor valid? if not valid_sending_actor(session, base_dir, nickname, domain, person_cache, post_json_object, @@ -3842,10 +3854,12 @@ def _inbox_after_initial(server, inbox_start_time, fitness_performance(inbox_start_time, server.fitness, 'INBOX', 'not_valid_sending_actor', debug) + inbox_start_time = time.time() return False fitness_performance(inbox_start_time, server.fitness, 'INBOX', 'valid_sending_actor', debug) + inbox_start_time = time.time() if post_json_object.get('object'): json_obj = post_json_object['object'] @@ -3859,6 +3873,7 @@ def _inbox_after_initial(server, inbox_start_time, fitness_performance(inbox_start_time, server.fitness, 'INBOX', '_check_for_git_patches', debug) + inbox_start_time = time.time() return False # replace YouTube links, so they get less tracking data @@ -3866,6 +3881,7 @@ def _inbox_after_initial(server, inbox_start_time, fitness_performance(inbox_start_time, server.fitness, 'INBOX', 'replace_you_tube', debug) + inbox_start_time = time.time() # replace twitter link domains, so that you can view twitter posts # without having an account replace_twitter(post_json_object, twitter_replacement_domain, @@ -3873,6 +3889,7 @@ def _inbox_after_initial(server, inbox_start_time, fitness_performance(inbox_start_time, server.fitness, 'INBOX', 'replace_you_twitter', debug) + inbox_start_time = time.time() # list of indexes to be updated update_index_list = ['inbox'] @@ -3881,6 +3898,7 @@ def _inbox_after_initial(server, inbox_start_time, fitness_performance(inbox_start_time, server.fitness, 'INBOX', 'populate_replies', debug) + inbox_start_time = time.time() _receive_question_vote(server, base_dir, nickname, domain, http_prefix, handle, debug, @@ -3903,6 +3921,7 @@ def _inbox_after_initial(server, inbox_start_time, fitness_performance(inbox_start_time, server.fitness, 'INBOX', '_receive_question_vote', debug) + inbox_start_time = time.time() is_reply_to_muted_post = False @@ -3931,12 +3950,14 @@ def _inbox_after_initial(server, inbox_start_time, fitness_performance(inbox_start_time, server.fitness, 'INBOX', '_is_valid_dm', debug) + inbox_start_time = time.time() # get the actor being replied to actor = local_actor_url(http_prefix, nickname, domain_full) fitness_performance(inbox_start_time, server.fitness, 'INBOX', 'local_actor_url', debug) + inbox_start_time = time.time() # create a reply notification file if needed is_reply_to_muted_post = \ @@ -3948,6 +3969,7 @@ def _inbox_after_initial(server, inbox_start_time, fitness_performance(inbox_start_time, server.fitness, 'INBOX', '_create_reply_notification_file', debug) + inbox_start_time = time.time() if is_image_media(session, base_dir, http_prefix, nickname, domain, post_json_object, @@ -3963,6 +3985,7 @@ def _inbox_after_initial(server, inbox_start_time, fitness_performance(inbox_start_time, server.fitness, 'INBOX', 'is_image_media', debug) + inbox_start_time = time.time() if is_blog_post(post_json_object): # blogs index will be updated update_index_list.append('tlblogs') @@ -3976,12 +3999,14 @@ def _inbox_after_initial(server, inbox_start_time, fitness_performance(inbox_start_time, server.fitness, 'INBOX', '_obtain_avatar_for_reply_post', debug) + inbox_start_time = time.time() # save the post to file if save_json(post_json_object, destination_filename): fitness_performance(inbox_start_time, server.fitness, 'INBOX', 'save_json', debug) + inbox_start_time = time.time() if mitm: # write a file to indicate that this post was delivered # via a third party @@ -3999,6 +4024,7 @@ def _inbox_after_initial(server, inbox_start_time, fitness_performance(inbox_start_time, server.fitness, 'INBOX', '_low_frequency_post_notification', debug) + inbox_start_time = time.time() # If this is a reply to a muted post then also mute it. # This enables you to ignore a threat that's getting boring @@ -4021,6 +4047,7 @@ def _inbox_after_initial(server, inbox_start_time, fitness_performance(inbox_start_time, server.fitness, 'INBOX', 'edited_post_filename', debug) + inbox_start_time = time.time() # If this was an edit then update the edits json file and # delete the previous version of the post @@ -4065,12 +4092,14 @@ def _inbox_after_initial(server, inbox_start_time, server.fitness, 'INBOX', 'box_' + boxname, debug) + inbox_start_time = time.time() if not inbox_update_index(boxname, base_dir, handle, destination_filename, debug): fitness_performance(inbox_start_time, server.fitness, 'INBOX', 'inbox_update_index', debug) + inbox_start_time = time.time() print('ERROR: unable to update ' + boxname + ' index') else: if boxname == 'inbox': @@ -4084,6 +4113,7 @@ def _inbox_after_initial(server, inbox_start_time, server.fitness, 'INBOX', 'update_speaker', debug) + inbox_start_time = time.time() if not unit_test: if debug: print('Saving inbox post as html to cache') @@ -4097,6 +4127,7 @@ def _inbox_after_initial(server, inbox_start_time, server.fitness, 'INBOX', 'get_account_timezone', debug) + inbox_start_time = time.time() _inbox_store_post_to_html_cache(recent_posts_cache, max_recent_posts, translate, base_dir, @@ -4125,6 +4156,7 @@ def _inbox_after_initial(server, inbox_start_time, 'INBOX', '_inbox_store_post_to_html_cache', debug) + inbox_start_time = time.time() if debug: time_diff = \ str(int((time.time() - html_cache_start_time) * @@ -4139,6 +4171,7 @@ def _inbox_after_initial(server, inbox_start_time, server.fitness, 'INBOX', 'update_conversation', debug) + inbox_start_time = time.time() # store the id of the last post made by this actor _store_last_post_id(base_dir, nickname, domain, post_json_object) @@ -4146,12 +4179,14 @@ def _inbox_after_initial(server, inbox_start_time, server.fitness, 'INBOX', '_store_last_post_id', debug) + inbox_start_time = time.time() _inbox_update_calendar(base_dir, handle, post_json_object) fitness_performance(inbox_start_time, server.fitness, 'INBOX', '_inbox_update_calendar', debug) + inbox_start_time = time.time() store_hash_tags(base_dir, handle_name, domain, http_prefix, domain_full, @@ -4160,6 +4195,7 @@ def _inbox_after_initial(server, inbox_start_time, server.fitness, 'INBOX', 'store_hash_tags', debug) + inbox_start_time = time.time() # send the post out to group members if is_group: @@ -4178,12 +4214,14 @@ def _inbox_after_initial(server, inbox_start_time, server.fitness, 'INBOX', '_send_to_group_members', debug) + inbox_start_time = time.time() else: if debug: print("Inbox post is not valid " + str(post_json_object)) fitness_performance(inbox_start_time, server.fitness, 'INBOX', 'invalid_post', debug) + inbox_start_time = time.time() # if the post wasn't saved if not os.path.isfile(destination_filename): @@ -4194,6 +4232,7 @@ def _inbox_after_initial(server, inbox_start_time, server.fitness, 'INBOX', 'end_after_initial', debug) + inbox_start_time = time.time() return True @@ -4722,6 +4761,7 @@ def run_inbox_queue(server, print('Starting new session when starting inbox queue') fitness_performance(inbox_start_time, server.fitness, 'INBOX', 'start', debug) + inbox_start_time = time.time() curr_session_time = int(time.time()) session_last_update = 0 @@ -4755,6 +4795,7 @@ def run_inbox_queue(server, _restore_queue_items(base_dir, queue) fitness_performance(inbox_start_time, server.fitness, 'INBOX', '_restore_queue_items', debug) + inbox_start_time = time.time() # keep track of numbers of incoming posts per day quotas_last_update_daily = int(time.time()) @@ -4781,11 +4822,13 @@ def run_inbox_queue(server, fitness_performance(inbox_start_time, server.fitness, 'INBOX', 'while_loop_start', debug) + inbox_start_time = time.time() while True: time.sleep(1) inbox_start_time = time.time() fitness_performance(inbox_start_time, server.fitness, 'INBOX', 'while_loop_itteration', debug) + inbox_start_time = time.time() # heartbeat to monitor whether the inbox queue is running heart_beat_ctr += 1 @@ -4795,6 +4838,7 @@ def run_inbox_queue(server, broch_lapse_days = random.randrange(7, 14) fitness_performance(inbox_start_time, server.fitness, 'INBOX', 'broch_modeLapses', debug) + inbox_start_time = time.time() print('>>> Heartbeat Q:' + str(len(queue)) + ' ' + '{:%F %T}'.format(datetime.datetime.now())) heart_beat_ctr = 0 @@ -4807,6 +4851,7 @@ def run_inbox_queue(server, _restore_queue_items(base_dir, queue) fitness_performance(inbox_start_time, server.fitness, 'INBOX', 'restore_queue', debug) + inbox_start_time = time.time() continue # oldest item first @@ -4826,6 +4871,7 @@ def run_inbox_queue(server, queue_json = load_json(queue_filename, 1) fitness_performance(inbox_start_time, server.fitness, 'INBOX', 'load_queue_json', debug) + inbox_start_time = time.time() if not queue_json: print('Queue: run_inbox_queue failed to load inbox queue item ' + queue_filename) @@ -4871,6 +4917,7 @@ def run_inbox_queue(server, continue fitness_performance(inbox_start_time, server.fitness, 'INBOX', '_inbox_quota_exceeded', debug) + inbox_start_time = time.time() # recreate the session periodically if not session or curr_time - session_last_update > 21600: @@ -4902,6 +4949,7 @@ def run_inbox_queue(server, continue fitness_performance(inbox_start_time, server.fitness, 'INBOX', 'recreate_session', debug) + inbox_start_time = time.time() curr_session = session curr_proxy_type = proxy_type @@ -4922,6 +4970,7 @@ def run_inbox_queue(server, fitness_performance(inbox_start_time, server.fitness, 'INBOX', 'start_get_pubkey', debug) + inbox_start_time = time.time() # Try a few times to obtain the public key pub_key = None key_id = None @@ -4948,6 +4997,7 @@ def run_inbox_queue(server, signing_priv_key_pem) fitness_performance(inbox_start_time, server.fitness, 'INBOX', 'get_person_pub_key', debug) + inbox_start_time = time.time() if pub_key: if debug: print('DEBUG: public key: ' + str(pub_key)) @@ -4974,6 +5024,7 @@ def run_inbox_queue(server, # check the http header signature fitness_performance(inbox_start_time, server.fitness, 'INBOX', 'begin_check_signature', debug) + inbox_start_time = time.time() if debug: print('DEBUG: checking http header signature') pprint(queue_json['httpHeaders']) @@ -4992,12 +5043,14 @@ def run_inbox_queue(server, print('DEBUG: http header signature check success') fitness_performance(inbox_start_time, server.fitness, 'INBOX', 'verify_post_headers', debug) + inbox_start_time = time.time() # check if a json signature exists on this post has_json_signature, jwebsig_type = \ _check_json_signature(base_dir, queue_json) fitness_performance(inbox_start_time, server.fitness, 'INBOX', '_check_json_signature', debug) + inbox_start_time = time.time() # strict enforcement of json signatures if not has_json_signature: @@ -5051,6 +5104,7 @@ def run_inbox_queue(server, fitness_performance(inbox_start_time, server.fitness, 'INBOX', 'not_verify_signature', debug) + inbox_start_time = time.time() continue else: if http_signature_failed: @@ -5061,6 +5115,7 @@ def run_inbox_queue(server, fitness_performance(inbox_start_time, server.fitness, 'INBOX', 'verify_signature_success', debug) + inbox_start_time = time.time() # set the id to the same as the post filename # This makes the filename and the id consistent @@ -5087,6 +5142,7 @@ def run_inbox_queue(server, fitness_performance(inbox_start_time, server.fitness, 'INBOX', '_receive_undo', debug) + inbox_start_time = time.time() continue if debug: @@ -5115,6 +5171,7 @@ def run_inbox_queue(server, fitness_performance(inbox_start_time, server.fitness, 'INBOX', '_receive_follow_request', debug) + inbox_start_time = time.time() continue else: if debug: @@ -5139,6 +5196,7 @@ def run_inbox_queue(server, fitness_performance(inbox_start_time, server.fitness, 'INBOX', 'receive_accept_reject', debug) + inbox_start_time = time.time() continue if _receive_update_activity(recent_posts_cache, curr_session, @@ -5176,6 +5234,7 @@ def run_inbox_queue(server, fitness_performance(inbox_start_time, server.fitness, 'INBOX', '_receive_update_activity', debug) + inbox_start_time = time.time() continue # get recipients list @@ -5200,6 +5259,7 @@ def run_inbox_queue(server, fitness_performance(inbox_start_time, server.fitness, 'INBOX', '_post_recipients', debug) + inbox_start_time = time.time() # if there are only a small number of followers then # process them as if they were specifically @@ -5235,6 +5295,7 @@ def run_inbox_queue(server, fitness_performance(inbox_start_time, server.fitness, 'INBOX', 'shared_inbox_save', debug) + inbox_start_time = time.time() lists_enabled = get_config_param(base_dir, "listsEnabled") content_license_url = get_config_param(base_dir, "contentLicenseUrl") @@ -5242,6 +5303,7 @@ def run_inbox_queue(server, fitness_performance(inbox_start_time, server.fitness, 'INBOX', 'distribute_post', debug) + inbox_start_time = time.time() # for posts addressed to specific accounts for handle, _ in recipients_dict.items(): @@ -5292,6 +5354,7 @@ def run_inbox_queue(server, fitness_performance(inbox_start_time, server.fitness, 'INBOX', 'handle_after_initial', debug) + inbox_start_time = time.time() if debug: pprint(queue_json['post']) print('Queue: Queue post accepted') From 52697e5437d8ef3d60dd74b3ec6d6ef6dd908241 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 24 Apr 2022 17:41:01 +0100 Subject: [PATCH 10/17] Longer period between hashtag swarm update --- inbox.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/inbox.py b/inbox.py index daaec9115..21025d673 100644 --- a/inbox.py +++ b/inbox.py @@ -179,10 +179,11 @@ def _update_cached_hashtag_swarm(base_dir: str, nickname: str, domain: str, curr_date = datetime.datetime.utcnow() time_diff = curr_date - modified_date diff_mins = int(time_diff.total_seconds() / 60) - if diff_mins < 10: + if diff_mins < 30: # was saved recently, so don't save again # This avoids too much disk I/O save_swarm = False + print('Not updating hashtag swarm') else: print('Updating cached hashtag swarm, last changed ' + str(diff_mins) + ' minutes ago') From 94f9daeec483e3e6e8e2190a0acbef177885164b Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 24 Apr 2022 17:56:52 +0100 Subject: [PATCH 11/17] Write hashtag index in one go --- inbox.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/inbox.py b/inbox.py index 21025d673..908d4298c 100644 --- a/inbox.py +++ b/inbox.py @@ -256,13 +256,17 @@ def store_hash_tags(base_dir: str, nickname: str, domain: str, except OSError: print('EX: unable to write ' + tags_filename) else: - if post_url not in open(tags_filename).read(): + content = '' + try: + with open(tags_filename, 'r') as tags_file: + content = tags_file.read() + except OSError: + pass + if post_url not in content: + content = tag_line + content try: - with open(tags_filename, 'r+') as tags_file: - content = tags_file.read() - if tag_line not in content: - tags_file.seek(0, 0) - tags_file.write(tag_line + content) + with open(tags_filename, 'w+') as tags_file: + tags_file.write(content) except OSError as ex: print('EX: Failed to write entry to tags file ' + tags_filename + ' ' + str(ex)) From 3c17749daee0e5c1370e735b91d7d4b2109e75e8 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 24 Apr 2022 18:02:09 +0100 Subject: [PATCH 12/17] Remove old hashtags while updating hashtag swarm --- inbox.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inbox.py b/inbox.py index 908d4298c..b51f8879b 100644 --- a/inbox.py +++ b/inbox.py @@ -200,6 +200,7 @@ def _update_cached_hashtag_swarm(base_dir: str, nickname: str, domain: str, except OSError: print('EX: unable to write cached hashtag swarm ' + cached_hashtag_swarm_filename) + remove_old_hashtags(base_dir, 3) return False @@ -270,7 +271,6 @@ def store_hash_tags(base_dir: str, nickname: str, domain: str, except OSError as ex: print('EX: Failed to write entry to tags file ' + tags_filename + ' ' + str(ex)) - remove_old_hashtags(base_dir, 3) # automatically assign a category to the tag if possible category_filename = tags_dir + '/' + tag_name + '.category' From 726447e1cbe93932c1e5d90f83c8d5bea8f6499e Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 24 Apr 2022 18:19:41 +0100 Subject: [PATCH 13/17] Only update category if hashtag was added --- inbox.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/inbox.py b/inbox.py index b51f8879b..59ed67f6d 100644 --- a/inbox.py +++ b/inbox.py @@ -249,11 +249,12 @@ def store_hash_tags(base_dir: str, nickname: str, domain: str, days_since_epoch = days_diff.days tag_line = \ str(days_since_epoch) + ' ' + nickname + ' ' + post_url + '\n' - hashtags_ctr += 1 + hashtag_added = False if not os.path.isfile(tags_filename): try: with open(tags_filename, 'w+') as tags_file: tags_file.write(tag_line) + hashtag_added = True except OSError: print('EX: unable to write ' + tags_filename) else: @@ -268,17 +269,22 @@ def store_hash_tags(base_dir: str, nickname: str, domain: str, try: with open(tags_filename, 'w+') as tags_file: tags_file.write(content) + hashtag_added = True except OSError as ex: print('EX: Failed to write entry to tags file ' + tags_filename + ' ' + str(ex)) - # automatically assign a category to the tag if possible - category_filename = tags_dir + '/' + tag_name + '.category' - if not os.path.isfile(category_filename): - category_str = \ - guess_hashtag_category(tag_name, hashtag_categories) - if category_str: - set_hashtag_category(base_dir, tag_name, category_str, False) + if hashtag_added: + hashtags_ctr += 1 + + # automatically assign a category to the tag if possible + category_filename = tags_dir + '/' + tag_name + '.category' + if not os.path.isfile(category_filename): + category_str = \ + guess_hashtag_category(tag_name, hashtag_categories) + if category_str: + set_hashtag_category(base_dir, tag_name, + category_str, False) # if some hashtags were found then recalculate the swarm # ready for later display From b8b9f9f2fc0b7956d4d9d128b4d651ef01456b08 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 24 Apr 2022 19:15:25 +0100 Subject: [PATCH 14/17] Get hashtag categories only when needed --- inbox.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/inbox.py b/inbox.py index 59ed67f6d..bef69c55c 100644 --- a/inbox.py +++ b/inbox.py @@ -227,8 +227,6 @@ def store_hash_tags(base_dir: str, nickname: str, domain: str, print('Creating tags directory') os.mkdir(tags_dir) - hashtag_categories = get_hashtag_categories(base_dir) - hashtags_ctr = 0 for tag in post_json_object['object']['tag']: if not tag.get('type'): @@ -280,6 +278,7 @@ def store_hash_tags(base_dir: str, nickname: str, domain: str, # automatically assign a category to the tag if possible category_filename = tags_dir + '/' + tag_name + '.category' if not os.path.isfile(category_filename): + hashtag_categories = get_hashtag_categories(base_dir) category_str = \ guess_hashtag_category(tag_name, hashtag_categories) if category_str: From aee89ca8c93906d9d608444aa0b61261b8f2d144 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 24 Apr 2022 20:03:02 +0100 Subject: [PATCH 15/17] Timeout when getting rss feeds --- daemon.py | 3 +++ epicyon.py | 3 ++- newsdaemon.py | 3 ++- newswire.py | 13 +++++++++---- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/daemon.py b/daemon.py index 88009d8bd..02892de67 100644 --- a/daemon.py +++ b/daemon.py @@ -20804,6 +20804,9 @@ def run_daemon(preferred_podcast_formats: [], 'Reminder': 'r' } + # timeout used when getting rss feeds + httpd.rss_timeout_sec = 20 + # timeout used when checking for actor changes when clicking an avatar # and entering person options screen if check_actor_timeout < 2: diff --git a/epicyon.py b/epicyon.py index 034ae5b77..67073ce82 100644 --- a/epicyon.py +++ b/epicyon.py @@ -1162,10 +1162,11 @@ if podcast_formats_str: preferred_podcast_formats.append(pod_format) if args.rss: + timeout_sec = 20 session = create_session(None) testRSS = get_rss(base_dir, domain, session, args.rss, False, False, 1000, 1000, 1000, 1000, debug, - preferred_podcast_formats) + preferred_podcast_formats, timeout_sec) pprint(testRSS) sys.exit() diff --git a/newsdaemon.py b/newsdaemon.py index 4ec7e0b1c..5bf8ab115 100644 --- a/newsdaemon.py +++ b/newsdaemon.py @@ -818,7 +818,8 @@ def run_newswire_daemon(base_dir: str, httpd, httpd.maxCategoriesFeedItemSizeKb, httpd.system_language, httpd.debug, - httpd.preferred_podcast_formats) + httpd.preferred_podcast_formats, + httpd.rss_timeout_sec) if not httpd.newswire: print('Newswire feeds not updated') diff --git a/newswire.py b/newswire.py index 65c3d2302..748425c4e 100644 --- a/newswire.py +++ b/newswire.py @@ -1278,7 +1278,8 @@ def get_rss(base_dir: str, domain: str, session, url: str, max_posts_per_source: int, max_feed_size_kb: int, max_feed_item_size_kb: int, max_categories_feedItem_size_kb: int, debug: bool, - preferred_podcast_formats: []) -> {}: + preferred_podcast_formats: [], + timeout_sec: int) -> {}: """Returns an RSS url as a dict """ if not isinstance(url, str): @@ -1302,7 +1303,9 @@ def get_rss(base_dir: str, domain: str, session, url: str, url = _yt_channel_to_atom_feed(url) try: result = \ - session.get(url, headers=session_headers, params=session_params) + session.get(url, headers=session_headers, + params=session_params, + timeout=timeout_sec) if result: if int(len(result.text) / 1024) < max_feed_size_kb and \ not contains_invalid_chars(result.text): @@ -1558,7 +1561,8 @@ def get_dict_from_newswire(session, base_dir: str, domain: str, max_newswire_posts: int, max_categories_feedItem_size_kb: int, system_language: str, debug: bool, - preferred_podcast_formats: []) -> {}: + preferred_podcast_formats: [], + timeout_sec: int) -> {}: """Gets rss feeds as a dictionary from newswire file """ subscriptions_filename = base_dir + '/accounts/newswire.txt' @@ -1600,7 +1604,8 @@ def get_dict_from_newswire(session, base_dir: str, domain: str, max_posts_per_source, max_feed_size_kb, max_feed_item_size_kb, max_categories_feedItem_size_kb, debug, - preferred_podcast_formats) + preferred_podcast_formats, + timeout_sec) if items_list: for date_str, item in items_list.items(): result[date_str] = item From 0a3a584d1b97d0503d52d482e10ce25dfc59b298 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 24 Apr 2022 21:19:16 +0100 Subject: [PATCH 16/17] Wait longer between rss feed updates --- newsdaemon.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/newsdaemon.py b/newsdaemon.py index 5bf8ab115..431230ccc 100644 --- a/newsdaemon.py +++ b/newsdaemon.py @@ -868,7 +868,7 @@ def run_newswire_daemon(base_dir: str, httpd, httpd.max_news_posts) # wait a while before the next feeds update - for _ in range(120): + for _ in range(360): time.sleep(10) # if a new blog post has been created then stop # waiting and recalculate the newswire From 53174ddfd8d763bd9db1cb401c0f3ec53168935c Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 24 Apr 2022 21:33:07 +0100 Subject: [PATCH 17/17] Don't allow redirects on session get --- newswire.py | 3 ++- session.py | 16 +++++++++++----- webapp_utils.py | 3 ++- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/newswire.py b/newswire.py index 748425c4e..54c22ac6b 100644 --- a/newswire.py +++ b/newswire.py @@ -1305,7 +1305,8 @@ def get_rss(base_dir: str, domain: str, session, url: str, result = \ session.get(url, headers=session_headers, params=session_params, - timeout=timeout_sec) + timeout=timeout_sec, + allow_redirects=False) if result: if int(len(result.text) / 1024) < max_feed_size_kb and \ not contains_invalid_chars(result.text): diff --git a/session.py b/session.py index 0f57d03ac..32ffa516c 100644 --- a/session.py +++ b/session.py @@ -71,7 +71,8 @@ def url_exists(session, url: str, timeout_sec: int = 3, try: result = session.get(url, headers=session_headers, params=session_params, - timeout=timeout_sec) + timeout=timeout_sec, + allow_redirects=False) if result: if result.status_code == 200 or \ result.status_code == 304: @@ -91,7 +92,8 @@ def _get_json_request(session, url: str, domain_full: str, session_headers: {}, """ try: result = session.get(url, headers=session_headers, - params=session_params, timeout=timeout_sec) + params=session_params, timeout=timeout_sec, + allow_redirects=False) if result.status_code != 200: if result.status_code == 401: print("WARN: get_json " + url + ' rejected by secure mode') @@ -284,7 +286,8 @@ def get_vcard(xml_format: bool, try: result = session.get(url, headers=session_headers, - params=session_params, timeout=timeout_sec) + params=session_params, timeout=timeout_sec, + allow_redirects=False) if result.status_code != 200: if result.status_code == 401: print("WARN: get_vcard " + url + ' rejected by secure mode') @@ -592,7 +595,8 @@ def download_image(session, base_dir: str, url: str, print('Downloading image url: ' + url) result = session.get(url, headers=session_headers, - params=None) + params=None, + allow_redirects=False) if result.status_code < 200 or \ result.status_code > 202: if debug: @@ -635,7 +639,9 @@ def download_image_any_mime_type(session, url: str, 'Accept': 'image/x-icon, image/png, image/webp, image/jpeg, image/gif' } try: - result = session.get(url, headers=session_headers, timeout=timeout_sec) + result = session.get(url, headers=session_headers, + timeout=timeout_sec, + allow_redirects=False) except requests.exceptions.RequestException as ex: print('EX: download_image_any_mime_type failed1: ' + str(url) + ', ' + str(ex)) diff --git a/webapp_utils.py b/webapp_utils.py index 3c57e913b..aaf7863c4 100644 --- a/webapp_utils.py +++ b/webapp_utils.py @@ -277,7 +277,8 @@ def update_avatar_image_cache(signing_priv_key_pem: str, print('avatar image url: ' + avatar_url) result = session.get(avatar_url, headers=session_headers, - params=None) + params=None, + allow_redirects=False) if result.status_code < 200 or \ result.status_code > 202: if debug: