From 37d5138c7767af2734524acd0c05c799c72f704f Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Wed, 4 Jan 2023 20:30:01 +0000 Subject: [PATCH 01/23] Scientific references --- manual/manual.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/manual/manual.md b/manual/manual.md index 046eb6aad..2a37d6a89 100644 --- a/manual/manual.md +++ b/manual/manual.md @@ -436,6 +436,12 @@ The location field on a post can be a description, but it can also be a map geol Selecting the *location* header will open the last known geolocation, so if your current location is near this makes it quicker to find. +## Scientific references +It is possible to have references to scientific papers linked automatically, such that they are readable with one click/press. Supported references are [arXiv](https://arxiv.org) and [Digital object identifier (DOI)](https://en.wikipedia.org/wiki/Digital_object_identifier). For example: +```text +This is a reference to a paper: arxiv:2203.15752 +``` + # The Timeline ## Layout ![Layout](manual-layout.png) From 4ae21596e47886d8d91e54536abfa7ec488512b4 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 5 Jan 2023 15:56:49 +0000 Subject: [PATCH 02/23] Returning json for hashtags --- daemon.py | 75 ++++++++++++++++++++++++++++++++++++++++++-- webapp_search.py | 81 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 154 insertions(+), 2 deletions(-) diff --git a/daemon.py b/daemon.py index 7b2c7c1f8..bea4943cf 100644 --- a/daemon.py +++ b/daemon.py @@ -14,6 +14,7 @@ import json import time import urllib.parse import datetime +import os from socket import error as SocketError import errno from functools import partial @@ -239,6 +240,7 @@ from webapp_search import html_skills_search from webapp_search import html_history_search from webapp_search import html_hashtag_search from webapp_search import rss_hashtag_search +from webapp_search import hashtag_search_json from webapp_search import html_search_emoji from webapp_search import html_search_shared_items from webapp_search import html_search_emoji_text_entry @@ -427,8 +429,6 @@ from maps import map_format_from_tagmaps_path from relationships import get_moved_feed from relationships import get_inactive_feed from relationships import update_moved_actors -import os - # maximum number of posts to list in outbox feed MAX_POSTS_IN_FEED = 12 @@ -9175,6 +9175,62 @@ class PubServer(BaseHTTPRequestHandler): '_GET', '_hashtag_search_rss2', self.server.debug) + def _hashtag_search_json(self, calling_domain: str, + referer_domain: str, + path: str, cookie: str, + base_dir: str, http_prefix: str, + domain: str, domain_full: str, port: int, + onion_domain: str, i2p_domain: str, + getreq_start_time) -> None: + """Return a json collection for a hashtag + """ + page_number = 1 + if '?page=' in path: + page_number_str = path.split('?page=')[1] + if page_number_str.isdigit(): + page_number = int(page_number_str) + path = path.split('?page=')[0] + hashtag = path.split('/tags/')[1] + if is_blocked_hashtag(base_dir, hashtag): + self._400() + return + nickname = None + if '/users/' in path: + actor = \ + http_prefix + '://' + domain_full + path + nickname = \ + get_nickname_from_actor(actor) + hashtag_json = \ + hashtag_search_json(nickname, + domain, port, + base_dir, hashtag, + page_number, MAX_POSTS_IN_FEED, + http_prefix) + if hashtag_json: + msg_str = json.dumps(hashtag_json) + msg_str = self._convert_domains(calling_domain, referer_domain, + msg_str) + msg = msg_str.encode('utf-8') + msglen = len(msg) + self._set_headers('application/json', msglen, + None, calling_domain, True) + self._write(msg) + else: + origin_path_str = path.split('/tags/')[0] + origin_path_str_absolute = \ + http_prefix + '://' + domain_full + origin_path_str + if calling_domain.endswith('.onion') and onion_domain: + origin_path_str_absolute = \ + 'http://' + onion_domain + origin_path_str + elif (calling_domain.endswith('.i2p') and onion_domain): + origin_path_str_absolute = \ + 'http://' + i2p_domain + origin_path_str + self._redirect_headers(origin_path_str_absolute, + cookie, calling_domain) + fitness_performance(getreq_start_time, self.server.fitness, + '_GET', '_hashtag_search_json', + self.server.debug) + def _announce_button(self, calling_domain: str, path: str, base_dir: str, cookie: str, proxy_type: str, @@ -18428,6 +18484,21 @@ class PubServer(BaseHTTPRequestHandler): curr_session) self.server.getreq_busy = False return + if not html_getreq: + # TODO + self._hashtag_search_json(calling_domain, referer_domain, + self.path, cookie, + self.server.base_dir, + self.server.http_prefix, + self.server.domain, + self.server.domain_full, + self.server.port, + self.server.onion_domain, + self.server.i2p_domain, + getreq_start_time, + curr_session) + self.server.getreq_busy = False + return self._hashtag_search(calling_domain, self.path, cookie, self.server.base_dir, diff --git a/webapp_search.py b/webapp_search.py index 41628549b..d97363a06 100644 --- a/webapp_search.py +++ b/webapp_search.py @@ -11,6 +11,8 @@ import os from shutil import copyfile import urllib.parse from datetime import datetime +from utils import remove_id_ending +from utils import has_object_dict from utils import acct_handle_dir from utils import get_base_content_from_post from utils import is_account_dir @@ -1157,3 +1159,82 @@ def rss_hashtag_search(nickname: str, domain: str, port: int, break return hashtag_feed + rss2tag_footer() + + +def hashtag_search_json(nickname: str, domain: str, port: int, + base_dir: str, hashtag: str, + page_number: int, posts_per_page: int, + http_prefix: str) -> {}: + """Show a json collection for a hashtag + """ + if hashtag.startswith('#'): + hashtag = hashtag[1:] + hashtag = urllib.parse.unquote(hashtag) + hashtag_index_file = base_dir + '/tags/' + hashtag + '.txt' + if not os.path.isfile(hashtag_index_file): + if hashtag != hashtag.lower(): + hashtag = hashtag.lower() + hashtag_index_file = base_dir + '/tags/' + hashtag + '.txt' + if not os.path.isfile(hashtag_index_file): + print('WARN: hashtag file not found ' + hashtag_index_file) + return None + + # check that the directory for the nickname exists + if nickname: + account_dir = acct_dir(base_dir, nickname, domain) + if not os.path.isdir(account_dir): + nickname = None + + # read the index + lines = [] + with open(hashtag_index_file, 'r', encoding='utf-8') as fp_hash: + lines = fp_hash.readlines() + if not lines: + return None + + domain_full = get_full_domain(domain, port) + + url = http_prefix + '://' + domain_full + '/tags/' + \ + hashtag + '?page=' + str(page_number) + hashtag_json = { + '@context': 'https://www.w3.org/ns/activitystreams', + 'id': url, + 'orderedItems': [], + 'totalItems': 0, + 'type': 'OrderedCollection' + } + page_items = 0 + for index, _ in enumerate(lines): + post_id = lines[index].strip('\n').strip('\r') + if ' ' not in post_id: + nickname = get_nickname_from_actor(post_id) + if not nickname: + continue + else: + post_fields = post_id.split(' ') + if len(post_fields) != 3: + continue + nickname = post_fields[1] + post_id = post_fields[2] + post_filename = locate_post(base_dir, nickname, domain, post_id) + if not post_filename: + continue + post_json_object = load_json(post_filename) + if post_json_object: + if not has_object_dict(post_json_object): + continue + if not is_public_post(post_json_object): + continue + if not post_json_object['object'].get('id'): + continue + # add to feed + page_items += 1 + if page_items < posts_per_page * (page_number - 1): + continue + id_str = remove_id_ending(post_json_object['object']['id']) + hashtag_json['orderedItems'].append(id_str) + hashtag_json['totalItems'] += 1 + if hashtag_json['totalItems'] >= posts_per_page: + break + + return hashtag_json From d5f4a4f9261d65645186f35b2287d1e940aaafdd Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 5 Jan 2023 16:01:41 +0000 Subject: [PATCH 03/23] Extra argument --- daemon.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/daemon.py b/daemon.py index bea4943cf..c4e13ef0a 100644 --- a/daemon.py +++ b/daemon.py @@ -18485,7 +18485,6 @@ class PubServer(BaseHTTPRequestHandler): self.server.getreq_busy = False return if not html_getreq: - # TODO self._hashtag_search_json(calling_domain, referer_domain, self.path, cookie, self.server.base_dir, @@ -18495,8 +18494,7 @@ class PubServer(BaseHTTPRequestHandler): self.server.port, self.server.onion_domain, self.server.i2p_domain, - getreq_start_time, - curr_session) + getreq_start_time) self.server.getreq_busy = False return self._hashtag_search(calling_domain, From df23bbbc97465d869c17d2d04a61781b099314ee Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 5 Jan 2023 17:12:55 +0000 Subject: [PATCH 04/23] Tidying --- webapp_search.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/webapp_search.py b/webapp_search.py index d97363a06..d4163fb5e 100644 --- a/webapp_search.py +++ b/webapp_search.py @@ -1220,20 +1220,21 @@ def hashtag_search_json(nickname: str, domain: str, port: int, if not post_filename: continue post_json_object = load_json(post_filename) - if post_json_object: - if not has_object_dict(post_json_object): - continue - if not is_public_post(post_json_object): - continue - if not post_json_object['object'].get('id'): - continue - # add to feed - page_items += 1 - if page_items < posts_per_page * (page_number - 1): - continue - id_str = remove_id_ending(post_json_object['object']['id']) - hashtag_json['orderedItems'].append(id_str) - hashtag_json['totalItems'] += 1 + if not post_json_object: + continue + if not has_object_dict(post_json_object): + continue + if not is_public_post(post_json_object): + continue + if not post_json_object['object'].get('id'): + continue + # add to feed + page_items += 1 + if page_items < posts_per_page * (page_number - 1): + continue + id_str = remove_id_ending(post_json_object['object']['id']) + hashtag_json['orderedItems'].append(id_str) + hashtag_json['totalItems'] += 1 if hashtag_json['totalItems'] >= posts_per_page: break From fd066e16a846e6fd2e50abac40fbae1812d6c9f3 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 5 Jan 2023 22:19:38 +0000 Subject: [PATCH 05/23] Display of remote hashtags to avoid showing Mastodon UI --- content.py | 40 ++++++++++ daemon.py | 64 ++++++++++++++++ webapp_post.py | 3 + webapp_search.py | 193 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 300 insertions(+) diff --git a/content.py b/content.py index c80dd4229..4194c3a6b 100644 --- a/content.py +++ b/content.py @@ -765,6 +765,46 @@ def _add_hash_tags(word_str: str, http_prefix: str, domain: str, return True +def replace_remote_hashtags(content: str, + nickname: str, domain: str) -> str: + """Replaces remote hashtags with a local version + """ + if not domain: + return content + + if ' href="' not in content: + return content + + sections = content.split(' href="') + ctr = 0 + replacements = {} + for section in sections: + if ctr == 0: + ctr += 1 + continue + if '"' not in section: + ctr += 1 + continue + link = section.split('"')[0] + if '?remotetag=' in link: + ctr += 1 + continue + if '/tags/' not in link: + ctr += 1 + continue + if '/' + domain not in link: + new_link = '/users/' + nickname + \ + '?remotetag=' + link.replace('/', '--') + replacements[link] = new_link + ctr += 1 + if not replacements: + return content + for old_link, new_link in replacements.items(): + content = content.replace('"' + old_link + '"', + '"' + new_link + '"') + return content + + def _add_emoji(base_dir: str, word_str: str, http_prefix: str, domain: str, replace_emoji: {}, post_tags: {}, diff --git a/daemon.py b/daemon.py index c4e13ef0a..7a1a8b790 100644 --- a/daemon.py +++ b/daemon.py @@ -239,6 +239,7 @@ from webapp_column_right import html_edit_news_post from webapp_search import html_skills_search from webapp_search import html_history_search from webapp_search import html_hashtag_search +from webapp_search import html_hashtag_search_remote from webapp_search import rss_hashtag_search from webapp_search import hashtag_search_json from webapp_search import html_search_emoji @@ -18467,6 +18468,69 @@ class PubServer(BaseHTTPRequestHandler): self.server.getreq_busy = False return + if '?remotetag=' in self.path and \ + '/users/' in self.path and authorized: + actor = self.path.split('?remotetag=')[0] + nickname = get_nickname_from_actor(actor) + hashtag_url = self.path.split('?remotetag=')[1] + if ';' in hashtag_url: + hashtag_url = hashtag_url.split(';')[0] + hashtag_url = hashtag_url.replace('--', '/') + + page_number = 1 + if ';page=' in self.path: + page_number_str = self.path.split(';page=')[1] + if ';' in page_number_str: + page_number_str = page_number_str.split(';')[0] + if page_number_str.isdigit(): + page_number = int(page_number_str) + + allow_local_network_access = self.server.allow_local_network_access + show_published_date_only = self.server.show_published_date_only + twitter_replacement_domain = self.server.twitter_replacement_domain + msg = \ + html_hashtag_search_remote(nickname, + self.server.domain, + self.server.port, + self.server.recent_posts_cache, + self.server.max_recent_posts, + self.server.translate, + self.server.base_dir, + hashtag_url, + page_number, MAX_POSTS_IN_FEED, + self.server.session, + self.server.cached_webfingers, + self.server.person_cache, + self.server.http_prefix, + self.server.project_version, + self.server.yt_replace_domain, + twitter_replacement_domain, + show_published_date_only, + self.server.peertube_instances, + allow_local_network_access, + self.server.theme_name, + self.server.system_language, + self.server.max_like_count, + self.server.signing_priv_key_pem, + self.server.cw_lists, + self.server.lists_enabled, + self.server.timezone, + self.server.bold_reading, + self.server.dogwhistles, + self.server.min_images_for_accounts, + self.server.debug) + if msg: + msg = msg.encode('utf-8') + msglen = len(msg) + self._set_headers('text/html', msglen, cookie, calling_domain, + False) + self._write(msg) + self.server.getreq_busy = False + return + self._404() + self.server.getreq_busy = False + return + # hashtag search if self.path.startswith('/tags/') or \ (authorized and '/tags/' in self.path): diff --git a/webapp_post.py b/webapp_post.py index 100475b5e..cba2e791f 100644 --- a/webapp_post.py +++ b/webapp_post.py @@ -67,6 +67,7 @@ from utils import get_domain_from_actor from utils import acct_dir from utils import local_actor_url from utils import is_unlisted_post +from content import replace_remote_hashtags from content import detect_dogwhistles from content import create_edits_html from content import bold_reading_string @@ -2445,6 +2446,8 @@ def individual_post_as_html(signing_priv_key_pem: str, system_language, translate) if not content_str: return '' + content_str = \ + replace_remote_hashtags(content_str, nickname, domain) summary_str = '' if content_str: diff --git a/webapp_search.py b/webapp_search.py index d4163fb5e..2f1e1b0f3 100644 --- a/webapp_search.py +++ b/webapp_search.py @@ -46,6 +46,7 @@ from webapp_utils import html_search_result_share from webapp_post import individual_post_as_html from webapp_hashtagswarm import html_hash_tag_swarm from maps import html_hashtag_maps +from session import get_json def html_search_emoji(translate: {}, base_dir: str, search_str: str, @@ -1049,6 +1050,198 @@ def html_hashtag_search(nickname: str, domain: str, port: int, return hashtag_search_form +def html_hashtag_search_remote(nickname: str, domain: str, port: int, + recent_posts_cache: {}, max_recent_posts: int, + translate: {}, + base_dir: str, hashtag_url: str, + page_number: int, posts_per_page: int, + session, cached_webfingers: {}, + person_cache: {}, + http_prefix: str, project_version: str, + yt_replace_domain: str, + twitter_replacement_domain: str, + show_published_date_only: bool, + peertube_instances: [], + allow_local_network_access: bool, + theme_name: str, system_language: str, + max_like_count: int, + signing_priv_key_pem: str, + cw_lists: {}, lists_enabled: str, + timezone: str, bold_reading: bool, + dogwhistles: {}, + min_images_for_accounts: [], + debug: bool) -> str: + """Show a page containing search results for a remote hashtag + """ + hashtag = hashtag_url.split('/')[-1] + + profile_str = 'https://www.w3.org/ns/activitystreams' + as_header = { + 'Accept': 'application/ld+json; profile="' + profile_str + '"' + } + hashtag_json = \ + get_json(signing_priv_key_pem, + session, hashtag_url, as_header, None, debug, + __version__, http_prefix, domain) + lines = [] + if not hashtag_json: + if 'orderedItems' in hashtag_json: + lines = hashtag_json['orderedItems'] + + separator_str = html_post_separator(base_dir, None) + + # check that the directory for the nickname exists + if nickname: + account_dir = acct_dir(base_dir, nickname, domain) + if not os.path.isdir(account_dir): + return None + + # read the css + css_filename = base_dir + '/epicyon-profile.css' + if os.path.isfile(base_dir + '/epicyon.css'): + css_filename = base_dir + '/epicyon.css' + + # ensure that the page number is in bounds + if not page_number: + page_number = 1 + elif page_number < 1: + page_number = 1 + + # get the start end end within the index file + start_index = int((page_number - 1) * posts_per_page) + end_index = start_index + posts_per_page + no_of_lines = len(lines) + if end_index >= no_of_lines and no_of_lines > 0: + end_index = no_of_lines - 1 + + instance_title = \ + get_config_param(base_dir, 'instanceTitle') + hashtag_search_form = \ + html_header_with_external_style(css_filename, instance_title, None) + + # add the page title + hashtag_search_form += '
\n' + \ + '

#' + hashtag + + # RSS link for hashtag feed + hashtag_rss = hashtag_url + if '.html' in hashtag_rss: + hashtag_rss = hashtag_rss.replace('.html', '') + hashtag_search_form += ' ' + hashtag_search_form += \ + '

\n' + + tag_link = '/users/' + nickname + '?remotetag=' + \ + hashtag_url.replace('/', '--') + if start_index > 0: + # previous page link + hashtag_search_form += \ + '
\n' + \ + ' ' + translate['Page up'] + \
+            '\n
\n' + index = start_index + while index <= end_index: + post_id = lines[index] + post_json_object = \ + get_json(signing_priv_key_pem, + session, post_id, as_header, None, debug, + __version__, http_prefix, domain) + if not post_json_object: + index += 1 + continue + if not isinstance(post_json_object, dict): + index += 1 + continue + if not has_object_dict(post_json_object): + if post_json_object.get('id') and \ + 'to' in post_json_object and \ + 'cc' in post_json_object and \ + post_json_object.get('actor'): + new_url = \ + remove_id_ending(post_json_object['id']) + '/activity' + new_post_json_object = { + "type": "Create", + "id": new_url, + "to": post_json_object['to'], + "cc": post_json_object['cc'], + "actor": post_json_object['actor'], + "object": post_json_object + } + post_json_object = new_post_json_object + else: + index += 1 + continue + if not is_public_post(post_json_object): + index += 1 + continue + show_individual_post_icons = False + allow_deletion = False + show_repeats = show_individual_post_icons + show_icons = show_individual_post_icons + manually_approves_followers = False + show_public_only = False + store_to_sache = False + allow_downloads = True + avatar_url = None + show_avatar_options = True + minimize_all_images = False + if nickname in min_images_for_accounts: + minimize_all_images = True + post_str = \ + individual_post_as_html(signing_priv_key_pem, + allow_downloads, recent_posts_cache, + max_recent_posts, + translate, None, + base_dir, session, cached_webfingers, + person_cache, + nickname, domain, port, + post_json_object, + avatar_url, show_avatar_options, + allow_deletion, + http_prefix, project_version, + 'search', + yt_replace_domain, + twitter_replacement_domain, + show_published_date_only, + peertube_instances, + allow_local_network_access, + theme_name, system_language, + max_like_count, + show_repeats, show_icons, + manually_approves_followers, + show_public_only, + store_to_sache, False, cw_lists, + lists_enabled, timezone, False, + bold_reading, dogwhistles, + minimize_all_images, None) + if post_str: + hashtag_search_form += separator_str + post_str + index += 1 + + if end_index < no_of_lines - 1: + # next page link + hashtag_search_form += \ + '
\n' + \ + ' ' + translate['Page down'] + '' + \ + '
' + hashtag_search_form += html_footer() + return hashtag_search_form + + def rss_hashtag_search(nickname: str, domain: str, port: int, recent_posts_cache: {}, max_recent_posts: int, translate: {}, From b80250f1a327b62f540078069af7e3022c1a936c Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 5 Jan 2023 22:22:42 +0000 Subject: [PATCH 06/23] Check link --- content.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/content.py b/content.py index 4194c3a6b..da079d768 100644 --- a/content.py +++ b/content.py @@ -786,6 +786,8 @@ def replace_remote_hashtags(content: str, ctr += 1 continue link = section.split('"')[0] + if '://' not in link: + continue if '?remotetag=' in link: ctr += 1 continue From f27a1dd9a3a778d981813542d936fa56f22c3d2d Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 5 Jan 2023 22:33:05 +0000 Subject: [PATCH 07/23] Unit test for hashtag link replacement --- tests.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests.py b/tests.py index c99597fee..478700958 100644 --- a/tests.py +++ b/tests.py @@ -135,6 +135,7 @@ from inbox import valid_inbox from inbox import valid_inbox_filenames from inbox import cache_svg_images from categories import guess_hashtag_category +from content import replace_remote_hashtags from content import add_name_emojis_to_tags from content import combine_textarea_lines from content import detect_dogwhistles @@ -7719,6 +7720,31 @@ def _test_replace_variable(): assert result == expected +def _test_replace_remote_tags() -> None: + print('replace_remote_tags') + nickname = 'mynick' + domain = 'furious.duck' + content = 'This is a test' + result = replace_remote_hashtags(content, nickname, domain) + assert result == content + + link = "https://something/else/mytag" + content = 'This is href="' + link + '" test' + result = replace_remote_hashtags(content, nickname, domain) + assert result == content + + link = "https://something/tags/mytag" + content = 'This is href="' + link + '" test' + result = replace_remote_hashtags(content, nickname, domain) + expected = \ + 'This is href="/users/' + nickname + '?remotetag=' + \ + link.replace('/', '--') + '" test' + if result != expected: + print(expected) + print(result) + assert result == expected + + def run_all_tests(): base_dir = os.getcwd() print('Running tests...') @@ -7736,6 +7762,7 @@ def run_all_tests(): _test_checkbox_names() _test_thread_functions() _test_functions() + _test_replace_remote_tags() _test_replace_variable() _test_missing_theme_colors(base_dir) _test_reply_language(base_dir) From 9d597396329a4ab5ec86c795b078c78f8df23a40 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 5 Jan 2023 22:47:27 +0000 Subject: [PATCH 08/23] Timezone argument --- daemon.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/daemon.py b/daemon.py index 7a1a8b790..2eb064f63 100644 --- a/daemon.py +++ b/daemon.py @@ -18488,6 +18488,10 @@ class PubServer(BaseHTTPRequestHandler): allow_local_network_access = self.server.allow_local_network_access show_published_date_only = self.server.show_published_date_only twitter_replacement_domain = self.server.twitter_replacement_domain + timezone = None + if self.server.account_timezone.get(nickname): + timezone = \ + self.server.account_timezone.get(nickname) msg = \ html_hashtag_search_remote(nickname, self.server.domain, @@ -18514,7 +18518,7 @@ class PubServer(BaseHTTPRequestHandler): self.server.signing_priv_key_pem, self.server.cw_lists, self.server.lists_enabled, - self.server.timezone, + timezone, self.server.bold_reading, self.server.dogwhistles, self.server.min_images_for_accounts, From 3b5b19ebf47a04e67b98e024ddb147df98e3755d Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 5 Jan 2023 22:54:29 +0000 Subject: [PATCH 09/23] Remove indexes --- webapp_search.py | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/webapp_search.py b/webapp_search.py index 2f1e1b0f3..5339e556c 100644 --- a/webapp_search.py +++ b/webapp_search.py @@ -1107,13 +1107,6 @@ def html_hashtag_search_remote(nickname: str, domain: str, port: int, elif page_number < 1: page_number = 1 - # get the start end end within the index file - start_index = int((page_number - 1) * posts_per_page) - end_index = start_index + posts_per_page - no_of_lines = len(lines) - if end_index >= no_of_lines and no_of_lines > 0: - end_index = no_of_lines - 1 - instance_title = \ get_config_param(base_dir, 'instanceTitle') hashtag_search_form = \ @@ -1136,7 +1129,7 @@ def html_hashtag_search_remote(nickname: str, domain: str, port: int, tag_link = '/users/' + nickname + '?remotetag=' + \ hashtag_url.replace('/', '--') - if start_index > 0: + if page_number > 1: # previous page link hashtag_search_form += \ '
\n' + \ @@ -1148,18 +1141,14 @@ def html_hashtag_search_remote(nickname: str, domain: str, port: int, translate['Page up'] + \ '" alt="' + translate['Page up'] + \ '">\n
\n' - index = start_index - while index <= end_index: - post_id = lines[index] + for post_id in lines: post_json_object = \ get_json(signing_priv_key_pem, session, post_id, as_header, None, debug, __version__, http_prefix, domain) if not post_json_object: - index += 1 continue if not isinstance(post_json_object, dict): - index += 1 continue if not has_object_dict(post_json_object): if post_json_object.get('id') and \ @@ -1178,10 +1167,8 @@ def html_hashtag_search_remote(nickname: str, domain: str, port: int, } post_json_object = new_post_json_object else: - index += 1 continue if not is_public_post(post_json_object): - index += 1 continue show_individual_post_icons = False allow_deletion = False @@ -1225,9 +1212,8 @@ def html_hashtag_search_remote(nickname: str, domain: str, port: int, minimize_all_images, None) if post_str: hashtag_search_form += separator_str + post_str - index += 1 - if end_index < no_of_lines - 1: + if len(lines) >= 5: # next page link hashtag_search_form += \ '
\n' + \ From d355a6f7fd678e82da631a9d979326dc63bb68ef Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 5 Jan 2023 23:03:16 +0000 Subject: [PATCH 10/23] debug --- webapp_search.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/webapp_search.py b/webapp_search.py index 5339e556c..4fb86313c 100644 --- a/webapp_search.py +++ b/webapp_search.py @@ -1087,6 +1087,8 @@ def html_hashtag_search_remote(nickname: str, domain: str, port: int, if not hashtag_json: if 'orderedItems' in hashtag_json: lines = hashtag_json['orderedItems'] + else: + print('WARN: no hashtags returned for url ' + hashtag_url) separator_str = html_post_separator(base_dir, None) @@ -1147,6 +1149,7 @@ def html_hashtag_search_remote(nickname: str, domain: str, port: int, session, post_id, as_header, None, debug, __version__, http_prefix, domain) if not post_json_object: + print('No hashtag post for ' + post_id) continue if not isinstance(post_json_object, dict): continue @@ -1169,6 +1172,7 @@ def html_hashtag_search_remote(nickname: str, domain: str, port: int, else: continue if not is_public_post(post_json_object): + print('Hashtag post is not public ' + post_id) continue show_individual_post_icons = False allow_deletion = False From 42d4a2ebae14c6494327ff3cfbf4c4f91186fcee Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 5 Jan 2023 23:08:18 +0000 Subject: [PATCH 11/23] Change accept --- webapp_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/webapp_search.py b/webapp_search.py index 4fb86313c..1129eeccd 100644 --- a/webapp_search.py +++ b/webapp_search.py @@ -1077,7 +1077,7 @@ def html_hashtag_search_remote(nickname: str, domain: str, port: int, profile_str = 'https://www.w3.org/ns/activitystreams' as_header = { - 'Accept': 'application/ld+json; profile="' + profile_str + '"' + 'Accept': 'application/activity+json; profile="' + profile_str + '"' } hashtag_json = \ get_json(signing_priv_key_pem, From d9d4577c5bedf751a954fd55ed78a0c46627f6a9 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 5 Jan 2023 23:15:40 +0000 Subject: [PATCH 12/23] debug --- webapp_search.py | 1 + 1 file changed, 1 insertion(+) diff --git a/webapp_search.py b/webapp_search.py index 1129eeccd..921e79f48 100644 --- a/webapp_search.py +++ b/webapp_search.py @@ -1079,6 +1079,7 @@ def html_hashtag_search_remote(nickname: str, domain: str, port: int, as_header = { 'Accept': 'application/activity+json; profile="' + profile_str + '"' } + debug = True hashtag_json = \ get_json(signing_priv_key_pem, session, hashtag_url, as_header, None, debug, From c715085883f8bfe5da3c8b15ec6bd1b0403fc293 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 5 Jan 2023 23:23:00 +0000 Subject: [PATCH 13/23] debug --- webapp_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/webapp_search.py b/webapp_search.py index 921e79f48..c3d9aad3c 100644 --- a/webapp_search.py +++ b/webapp_search.py @@ -1081,7 +1081,7 @@ def html_hashtag_search_remote(nickname: str, domain: str, port: int, } debug = True hashtag_json = \ - get_json(signing_priv_key_pem, + get_json(None, session, hashtag_url, as_header, None, debug, __version__, http_prefix, domain) lines = [] From dc6dc2c7ef5ca551ce1c9f5605a140e11d059d61 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 5 Jan 2023 23:29:12 +0000 Subject: [PATCH 14/23] Invert logic --- webapp_search.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/webapp_search.py b/webapp_search.py index c3d9aad3c..c0f89fcb4 100644 --- a/webapp_search.py +++ b/webapp_search.py @@ -1079,13 +1079,12 @@ def html_hashtag_search_remote(nickname: str, domain: str, port: int, as_header = { 'Accept': 'application/activity+json; profile="' + profile_str + '"' } - debug = True hashtag_json = \ - get_json(None, + get_json(signing_priv_key_pem, session, hashtag_url, as_header, None, debug, __version__, http_prefix, domain) lines = [] - if not hashtag_json: + if hashtag_json: if 'orderedItems' in hashtag_json: lines = hashtag_json['orderedItems'] else: From 8f806ea2348c0eda141bb3b1c35f9ab4240f8011 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 5 Jan 2023 23:34:30 +0000 Subject: [PATCH 15/23] debug --- webapp_search.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/webapp_search.py b/webapp_search.py index c0f89fcb4..6a6c7e17d 100644 --- a/webapp_search.py +++ b/webapp_search.py @@ -1087,6 +1087,8 @@ def html_hashtag_search_remote(nickname: str, domain: str, port: int, if hashtag_json: if 'orderedItems' in hashtag_json: lines = hashtag_json['orderedItems'] + else: + print('No orderedItems in hashtag collection ' + str(hashtag_json)) else: print('WARN: no hashtags returned for url ' + hashtag_url) From 966a62cea8685bcf9bde33e6cdc21405e4ca4a8d Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 5 Jan 2023 23:37:52 +0000 Subject: [PATCH 16/23] debug --- webapp_search.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/webapp_search.py b/webapp_search.py index 6a6c7e17d..54f9ab130 100644 --- a/webapp_search.py +++ b/webapp_search.py @@ -1146,6 +1146,7 @@ def html_hashtag_search_remote(nickname: str, domain: str, port: int, '" alt="' + translate['Page up'] + \ '">\n
\n' for post_id in lines: + print('Hashtag post_id ' + post_id) post_json_object = \ get_json(signing_priv_key_pem, session, post_id, as_header, None, debug, @@ -1154,6 +1155,7 @@ def html_hashtag_search_remote(nickname: str, domain: str, port: int, print('No hashtag post for ' + post_id) continue if not isinstance(post_json_object, dict): + print('Hashtag post is not a dict ' + str(post_json_object)) continue if not has_object_dict(post_json_object): if post_json_object.get('id') and \ @@ -1172,6 +1174,8 @@ def html_hashtag_search_remote(nickname: str, domain: str, port: int, } post_json_object = new_post_json_object else: + print('Hashtag post does not contain necessary fields ' + + str(post_json_object)) continue if not is_public_post(post_json_object): print('Hashtag post is not public ' + post_id) From 6de137a5594c9736eacba8d9bac44c40ae301ff7 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 5 Jan 2023 23:48:00 +0000 Subject: [PATCH 17/23] Get actor --- webapp_search.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/webapp_search.py b/webapp_search.py index 54f9ab130..fef1298da 100644 --- a/webapp_search.py +++ b/webapp_search.py @@ -1160,16 +1160,18 @@ def html_hashtag_search_remote(nickname: str, domain: str, port: int, if not has_object_dict(post_json_object): if post_json_object.get('id') and \ 'to' in post_json_object and \ - 'cc' in post_json_object and \ - post_json_object.get('actor'): + 'cc' in post_json_object: new_url = \ - remove_id_ending(post_json_object['id']) + '/activity' + remove_id_ending(post_json_object['id']) + actor = new_url + if '/statuses/' in actor: + actor = actor.split('/statuses/')[0] new_post_json_object = { "type": "Create", - "id": new_url, + "id": new_url + '/activity', "to": post_json_object['to'], "cc": post_json_object['cc'], - "actor": post_json_object['actor'], + "actor": actor, "object": post_json_object } post_json_object = new_post_json_object From 78a7c7f395b6a5738a3a54d8e807aebc69952ca7 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Fri, 6 Jan 2023 10:10:22 +0000 Subject: [PATCH 18/23] Include page in hashtag collection lookup --- webapp_search.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/webapp_search.py b/webapp_search.py index fef1298da..c9667285d 100644 --- a/webapp_search.py +++ b/webapp_search.py @@ -1079,9 +1079,12 @@ def html_hashtag_search_remote(nickname: str, domain: str, port: int, as_header = { 'Accept': 'application/activity+json; profile="' + profile_str + '"' } + hashtag_url_with_page = hashtag_url + if '?page=' not in hashtag_url_with_page: + hashtag_url_with_page += '?page=' + str(page_number) hashtag_json = \ get_json(signing_priv_key_pem, - session, hashtag_url, as_header, None, debug, + session, hashtag_url_with_page, as_header, None, debug, __version__, http_prefix, domain) lines = [] if hashtag_json: From 1a09c5887d96b805b51c0efa4bca09e096c2f5ac Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Fri, 6 Jan 2023 10:48:56 +0000 Subject: [PATCH 19/23] Only show page navigation on hashtag if it is available --- webapp_search.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/webapp_search.py b/webapp_search.py index c9667285d..d1dc38bfe 100644 --- a/webapp_search.py +++ b/webapp_search.py @@ -1136,7 +1136,7 @@ def html_hashtag_search_remote(nickname: str, domain: str, port: int, tag_link = '/users/' + nickname + '?remotetag=' + \ hashtag_url.replace('/', '--') - if page_number > 1: + if page_number > 1 and hashtag_json.get('first'): # previous page link hashtag_search_form += \ '
\n' + \ @@ -1228,7 +1228,7 @@ def html_hashtag_search_remote(nickname: str, domain: str, port: int, if post_str: hashtag_search_form += separator_str + post_str - if len(lines) >= 5: + if len(lines) >= 5 and hashtag_json.get('first'): # next page link hashtag_search_form += \ '
\n' + \ From 1974b89bd3010a49e09c150bdd008f757014d33a Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Fri, 6 Jan 2023 10:51:36 +0000 Subject: [PATCH 20/23] Only show page navigation on hashtag if it is available --- webapp_search.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/webapp_search.py b/webapp_search.py index d1dc38bfe..b2c18c0a1 100644 --- a/webapp_search.py +++ b/webapp_search.py @@ -1136,7 +1136,7 @@ def html_hashtag_search_remote(nickname: str, domain: str, port: int, tag_link = '/users/' + nickname + '?remotetag=' + \ hashtag_url.replace('/', '--') - if page_number > 1 and hashtag_json.get('first'): + if page_number > 1 and hashtag_json.get('prev'): # previous page link hashtag_search_form += \ '
\n' + \ @@ -1228,7 +1228,7 @@ def html_hashtag_search_remote(nickname: str, domain: str, port: int, if post_str: hashtag_search_form += separator_str + post_str - if len(lines) >= 5 and hashtag_json.get('first'): + if len(lines) >= 5 and hashtag_json.get('next'): # next page link hashtag_search_form += \ '
\n' + \ From 603bd968bc2b2125aae7d836a60e087f5bf480f0 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Fri, 6 Jan 2023 11:04:27 +0000 Subject: [PATCH 21/23] Add text mode separator --- webapp_search.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/webapp_search.py b/webapp_search.py index b2c18c0a1..d548f6d2f 100644 --- a/webapp_search.py +++ b/webapp_search.py @@ -964,6 +964,7 @@ def html_hashtag_search(nickname: str, domain: str, port: int, '" alt="' + translate['Page up'] + \ '">\n
\n' index = start_index + text_mode_separator = '

' while index <= end_index: post_id = lines[index].strip('\n').strip('\r') if ' ' not in post_id: @@ -1032,9 +1033,12 @@ def html_hashtag_search(nickname: str, domain: str, port: int, bold_reading, dogwhistles, minimize_all_images, None) if post_str: - hashtag_search_form += separator_str + post_str + hashtag_search_form += \ + text_mode_separator + separator_str + post_str index += 1 + hashtag_search_form += text_mode_separator + if end_index < no_of_lines - 1: # next page link hashtag_search_form += \ @@ -1148,6 +1152,7 @@ def html_hashtag_search_remote(nickname: str, domain: str, port: int, translate['Page up'] + \ '" alt="' + translate['Page up'] + \ '">\n
\n' + text_mode_separator = '

' for post_id in lines: print('Hashtag post_id ' + post_id) post_json_object = \ @@ -1226,7 +1231,10 @@ def html_hashtag_search_remote(nickname: str, domain: str, port: int, bold_reading, dogwhistles, minimize_all_images, None) if post_str: - hashtag_search_form += separator_str + post_str + hashtag_search_form += \ + text_mode_separator + separator_str + post_str + + hashtag_search_form += text_mode_separator if len(lines) >= 5 and hashtag_json.get('next'): # next page link From 6bf4e26ea898e7c11430ebb2b5a6455a7e22b373 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Fri, 6 Jan 2023 11:17:33 +0000 Subject: [PATCH 22/23] Ensure that the number of posts is not excessively large --- webapp_search.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/webapp_search.py b/webapp_search.py index d548f6d2f..1f2920c63 100644 --- a/webapp_search.py +++ b/webapp_search.py @@ -1153,6 +1153,7 @@ def html_hashtag_search_remote(nickname: str, domain: str, port: int, '" alt="' + translate['Page up'] + \ '">\n
\n' text_mode_separator = '

' + post_ctr = 0 for post_id in lines: print('Hashtag post_id ' + post_id) post_json_object = \ @@ -1233,10 +1234,13 @@ def html_hashtag_search_remote(nickname: str, domain: str, port: int, if post_str: hashtag_search_form += \ text_mode_separator + separator_str + post_str + post_ctr += 1 + if post_ctr >= posts_per_page: + break hashtag_search_form += text_mode_separator - if len(lines) >= 5 and hashtag_json.get('next'): + if post_ctr >= 5 and hashtag_json.get('next'): # next page link hashtag_search_form += \ '
\n' + \ From 00b47b218be772e54d8986b05d08e87560a7d48d Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Fri, 6 Jan 2023 12:08:01 +0000 Subject: [PATCH 23/23] Fix unit test --- webapp_post.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/webapp_post.py b/webapp_post.py index cba2e791f..91f7feb0a 100644 --- a/webapp_post.py +++ b/webapp_post.py @@ -1822,7 +1822,9 @@ def _get_copyright_footer(content_license_url: str, elif '/fdl' in content_license_url: icon_filename = 'license_fdl.png' - description = translate['Content License'] + description = 'Content License' + if translate.get('Content License'): + description = translate['Content License'] copyright_str = \ ' ' + \ '