diff --git a/content.py b/content.py index c80dd4229..da079d768 100644 --- a/content.py +++ b/content.py @@ -765,6 +765,48 @@ def _add_hash_tags(word_str: str, http_prefix: str, domain: str, return True +def replace_remote_hashtags(content: str, + nickname: str, domain: str) -> str: + """Replaces remote hashtags with a local version + """ + if not domain: + return content + + if ' href="' not in content: + return content + + sections = content.split(' href="') + ctr = 0 + replacements = {} + for section in sections: + if ctr == 0: + ctr += 1 + continue + if '"' not in section: + ctr += 1 + continue + link = section.split('"')[0] + if '://' not in link: + continue + if '?remotetag=' in link: + ctr += 1 + continue + if '/tags/' not in link: + ctr += 1 + continue + if '/' + domain not in link: + new_link = '/users/' + nickname + \ + '?remotetag=' + link.replace('/', '--') + replacements[link] = new_link + ctr += 1 + if not replacements: + return content + for old_link, new_link in replacements.items(): + content = content.replace('"' + old_link + '"', + '"' + new_link + '"') + return content + + def _add_emoji(base_dir: str, word_str: str, http_prefix: str, domain: str, replace_emoji: {}, post_tags: {}, diff --git a/daemon.py b/daemon.py index 7b2c7c1f8..2eb064f63 100644 --- a/daemon.py +++ b/daemon.py @@ -14,6 +14,7 @@ import json import time import urllib.parse import datetime +import os from socket import error as SocketError import errno from functools import partial @@ -238,7 +239,9 @@ from webapp_column_right import html_edit_news_post from webapp_search import html_skills_search from webapp_search import html_history_search from webapp_search import html_hashtag_search +from webapp_search import html_hashtag_search_remote from webapp_search import rss_hashtag_search +from webapp_search import hashtag_search_json from webapp_search import html_search_emoji from webapp_search import html_search_shared_items from webapp_search import html_search_emoji_text_entry @@ -427,8 +430,6 @@ from maps import map_format_from_tagmaps_path from relationships import get_moved_feed from relationships import get_inactive_feed from relationships import update_moved_actors -import os - # maximum number of posts to list in outbox feed MAX_POSTS_IN_FEED = 12 @@ -9175,6 +9176,62 @@ class PubServer(BaseHTTPRequestHandler): '_GET', '_hashtag_search_rss2', self.server.debug) + def _hashtag_search_json(self, calling_domain: str, + referer_domain: str, + path: str, cookie: str, + base_dir: str, http_prefix: str, + domain: str, domain_full: str, port: int, + onion_domain: str, i2p_domain: str, + getreq_start_time) -> None: + """Return a json collection for a hashtag + """ + page_number = 1 + if '?page=' in path: + page_number_str = path.split('?page=')[1] + if page_number_str.isdigit(): + page_number = int(page_number_str) + path = path.split('?page=')[0] + hashtag = path.split('/tags/')[1] + if is_blocked_hashtag(base_dir, hashtag): + self._400() + return + nickname = None + if '/users/' in path: + actor = \ + http_prefix + '://' + domain_full + path + nickname = \ + get_nickname_from_actor(actor) + hashtag_json = \ + hashtag_search_json(nickname, + domain, port, + base_dir, hashtag, + page_number, MAX_POSTS_IN_FEED, + http_prefix) + if hashtag_json: + msg_str = json.dumps(hashtag_json) + msg_str = self._convert_domains(calling_domain, referer_domain, + msg_str) + msg = msg_str.encode('utf-8') + msglen = len(msg) + self._set_headers('application/json', msglen, + None, calling_domain, True) + self._write(msg) + else: + origin_path_str = path.split('/tags/')[0] + origin_path_str_absolute = \ + http_prefix + '://' + domain_full + origin_path_str + if calling_domain.endswith('.onion') and onion_domain: + origin_path_str_absolute = \ + 'http://' + onion_domain + origin_path_str + elif (calling_domain.endswith('.i2p') and onion_domain): + origin_path_str_absolute = \ + 'http://' + i2p_domain + origin_path_str + self._redirect_headers(origin_path_str_absolute, + cookie, calling_domain) + fitness_performance(getreq_start_time, self.server.fitness, + '_GET', '_hashtag_search_json', + self.server.debug) + def _announce_button(self, calling_domain: str, path: str, base_dir: str, cookie: str, proxy_type: str, @@ -18411,6 +18468,73 @@ class PubServer(BaseHTTPRequestHandler): self.server.getreq_busy = False return + if '?remotetag=' in self.path and \ + '/users/' in self.path and authorized: + actor = self.path.split('?remotetag=')[0] + nickname = get_nickname_from_actor(actor) + hashtag_url = self.path.split('?remotetag=')[1] + if ';' in hashtag_url: + hashtag_url = hashtag_url.split(';')[0] + hashtag_url = hashtag_url.replace('--', '/') + + page_number = 1 + if ';page=' in self.path: + page_number_str = self.path.split(';page=')[1] + if ';' in page_number_str: + page_number_str = page_number_str.split(';')[0] + if page_number_str.isdigit(): + page_number = int(page_number_str) + + allow_local_network_access = self.server.allow_local_network_access + show_published_date_only = self.server.show_published_date_only + twitter_replacement_domain = self.server.twitter_replacement_domain + timezone = None + if self.server.account_timezone.get(nickname): + timezone = \ + self.server.account_timezone.get(nickname) + msg = \ + html_hashtag_search_remote(nickname, + self.server.domain, + self.server.port, + self.server.recent_posts_cache, + self.server.max_recent_posts, + self.server.translate, + self.server.base_dir, + hashtag_url, + page_number, MAX_POSTS_IN_FEED, + self.server.session, + self.server.cached_webfingers, + self.server.person_cache, + self.server.http_prefix, + self.server.project_version, + self.server.yt_replace_domain, + twitter_replacement_domain, + show_published_date_only, + self.server.peertube_instances, + allow_local_network_access, + self.server.theme_name, + self.server.system_language, + self.server.max_like_count, + self.server.signing_priv_key_pem, + self.server.cw_lists, + self.server.lists_enabled, + timezone, + self.server.bold_reading, + self.server.dogwhistles, + self.server.min_images_for_accounts, + self.server.debug) + if msg: + msg = msg.encode('utf-8') + msglen = len(msg) + self._set_headers('text/html', msglen, cookie, calling_domain, + False) + self._write(msg) + self.server.getreq_busy = False + return + self._404() + self.server.getreq_busy = False + return + # hashtag search if self.path.startswith('/tags/') or \ (authorized and '/tags/' in self.path): @@ -18428,6 +18552,19 @@ class PubServer(BaseHTTPRequestHandler): curr_session) self.server.getreq_busy = False return + if not html_getreq: + self._hashtag_search_json(calling_domain, referer_domain, + self.path, cookie, + self.server.base_dir, + self.server.http_prefix, + self.server.domain, + self.server.domain_full, + self.server.port, + self.server.onion_domain, + self.server.i2p_domain, + getreq_start_time) + self.server.getreq_busy = False + return self._hashtag_search(calling_domain, self.path, cookie, self.server.base_dir, diff --git a/manual/manual.md b/manual/manual.md index 046eb6aad..2a37d6a89 100644 --- a/manual/manual.md +++ b/manual/manual.md @@ -436,6 +436,12 @@ The location field on a post can be a description, but it can also be a map geol Selecting the *location* header will open the last known geolocation, so if your current location is near this makes it quicker to find. +## Scientific references +It is possible to have references to scientific papers linked automatically, such that they are readable with one click/press. Supported references are [arXiv](https://arxiv.org) and [Digital object identifier (DOI)](https://en.wikipedia.org/wiki/Digital_object_identifier). For example: +```text +This is a reference to a paper: arxiv:2203.15752 +``` + # The Timeline ## Layout ![Layout](manual-layout.png) diff --git a/tests.py b/tests.py index c99597fee..478700958 100644 --- a/tests.py +++ b/tests.py @@ -135,6 +135,7 @@ from inbox import valid_inbox from inbox import valid_inbox_filenames from inbox import cache_svg_images from categories import guess_hashtag_category +from content import replace_remote_hashtags from content import add_name_emojis_to_tags from content import combine_textarea_lines from content import detect_dogwhistles @@ -7719,6 +7720,31 @@ def _test_replace_variable(): assert result == expected +def _test_replace_remote_tags() -> None: + print('replace_remote_tags') + nickname = 'mynick' + domain = 'furious.duck' + content = 'This is a test' + result = replace_remote_hashtags(content, nickname, domain) + assert result == content + + link = "https://something/else/mytag" + content = 'This is href="' + link + '" test' + result = replace_remote_hashtags(content, nickname, domain) + assert result == content + + link = "https://something/tags/mytag" + content = 'This is href="' + link + '" test' + result = replace_remote_hashtags(content, nickname, domain) + expected = \ + 'This is href="/users/' + nickname + '?remotetag=' + \ + link.replace('/', '--') + '" test' + if result != expected: + print(expected) + print(result) + assert result == expected + + def run_all_tests(): base_dir = os.getcwd() print('Running tests...') @@ -7736,6 +7762,7 @@ def run_all_tests(): _test_checkbox_names() _test_thread_functions() _test_functions() + _test_replace_remote_tags() _test_replace_variable() _test_missing_theme_colors(base_dir) _test_reply_language(base_dir) diff --git a/webapp_post.py b/webapp_post.py index 100475b5e..91f7feb0a 100644 --- a/webapp_post.py +++ b/webapp_post.py @@ -67,6 +67,7 @@ from utils import get_domain_from_actor from utils import acct_dir from utils import local_actor_url from utils import is_unlisted_post +from content import replace_remote_hashtags from content import detect_dogwhistles from content import create_edits_html from content import bold_reading_string @@ -1821,7 +1822,9 @@ def _get_copyright_footer(content_license_url: str, elif '/fdl' in content_license_url: icon_filename = 'license_fdl.png' - description = translate['Content License'] + description = 'Content License' + if translate.get('Content License'): + description = translate['Content License'] copyright_str = \ ' ' + \ '\n \n' index = start_index + text_mode_separator = '

' while index <= end_index: post_id = lines[index].strip('\n').strip('\r') if ' ' not in post_id: @@ -1029,9 +1033,12 @@ def html_hashtag_search(nickname: str, domain: str, port: int, bold_reading, dogwhistles, minimize_all_images, None) if post_str: - hashtag_search_form += separator_str + post_str + hashtag_search_form += \ + text_mode_separator + separator_str + post_str index += 1 + hashtag_search_form += text_mode_separator + if end_index < no_of_lines - 1: # next page link hashtag_search_form += \ @@ -1047,6 +1054,207 @@ def html_hashtag_search(nickname: str, domain: str, port: int, return hashtag_search_form +def html_hashtag_search_remote(nickname: str, domain: str, port: int, + recent_posts_cache: {}, max_recent_posts: int, + translate: {}, + base_dir: str, hashtag_url: str, + page_number: int, posts_per_page: int, + session, cached_webfingers: {}, + person_cache: {}, + http_prefix: str, project_version: str, + yt_replace_domain: str, + twitter_replacement_domain: str, + show_published_date_only: bool, + peertube_instances: [], + allow_local_network_access: bool, + theme_name: str, system_language: str, + max_like_count: int, + signing_priv_key_pem: str, + cw_lists: {}, lists_enabled: str, + timezone: str, bold_reading: bool, + dogwhistles: {}, + min_images_for_accounts: [], + debug: bool) -> str: + """Show a page containing search results for a remote hashtag + """ + hashtag = hashtag_url.split('/')[-1] + + profile_str = 'https://www.w3.org/ns/activitystreams' + as_header = { + 'Accept': 'application/activity+json; profile="' + profile_str + '"' + } + hashtag_url_with_page = hashtag_url + if '?page=' not in hashtag_url_with_page: + hashtag_url_with_page += '?page=' + str(page_number) + hashtag_json = \ + get_json(signing_priv_key_pem, + session, hashtag_url_with_page, as_header, None, debug, + __version__, http_prefix, domain) + lines = [] + if hashtag_json: + if 'orderedItems' in hashtag_json: + lines = hashtag_json['orderedItems'] + else: + print('No orderedItems in hashtag collection ' + str(hashtag_json)) + else: + print('WARN: no hashtags returned for url ' + hashtag_url) + + separator_str = html_post_separator(base_dir, None) + + # check that the directory for the nickname exists + if nickname: + account_dir = acct_dir(base_dir, nickname, domain) + if not os.path.isdir(account_dir): + return None + + # read the css + css_filename = base_dir + '/epicyon-profile.css' + if os.path.isfile(base_dir + '/epicyon.css'): + css_filename = base_dir + '/epicyon.css' + + # ensure that the page number is in bounds + if not page_number: + page_number = 1 + elif page_number < 1: + page_number = 1 + + instance_title = \ + get_config_param(base_dir, 'instanceTitle') + hashtag_search_form = \ + html_header_with_external_style(css_filename, instance_title, None) + + # add the page title + hashtag_search_form += '
\n' + \ + '

#' + hashtag + + # RSS link for hashtag feed + hashtag_rss = hashtag_url + if '.html' in hashtag_rss: + hashtag_rss = hashtag_rss.replace('.html', '') + hashtag_search_form += ' ' + hashtag_search_form += \ + '

\n' + + tag_link = '/users/' + nickname + '?remotetag=' + \ + hashtag_url.replace('/', '--') + if page_number > 1 and hashtag_json.get('prev'): + # previous page link + hashtag_search_form += \ + '
\n' + \ + ' ' + translate['Page up'] + \
+            '\n
\n' + text_mode_separator = '

' + post_ctr = 0 + for post_id in lines: + print('Hashtag post_id ' + post_id) + post_json_object = \ + get_json(signing_priv_key_pem, + session, post_id, as_header, None, debug, + __version__, http_prefix, domain) + if not post_json_object: + print('No hashtag post for ' + post_id) + continue + if not isinstance(post_json_object, dict): + print('Hashtag post is not a dict ' + str(post_json_object)) + continue + if not has_object_dict(post_json_object): + if post_json_object.get('id') and \ + 'to' in post_json_object and \ + 'cc' in post_json_object: + new_url = \ + remove_id_ending(post_json_object['id']) + actor = new_url + if '/statuses/' in actor: + actor = actor.split('/statuses/')[0] + new_post_json_object = { + "type": "Create", + "id": new_url + '/activity', + "to": post_json_object['to'], + "cc": post_json_object['cc'], + "actor": actor, + "object": post_json_object + } + post_json_object = new_post_json_object + else: + print('Hashtag post does not contain necessary fields ' + + str(post_json_object)) + continue + if not is_public_post(post_json_object): + print('Hashtag post is not public ' + post_id) + continue + show_individual_post_icons = False + allow_deletion = False + show_repeats = show_individual_post_icons + show_icons = show_individual_post_icons + manually_approves_followers = False + show_public_only = False + store_to_sache = False + allow_downloads = True + avatar_url = None + show_avatar_options = True + minimize_all_images = False + if nickname in min_images_for_accounts: + minimize_all_images = True + post_str = \ + individual_post_as_html(signing_priv_key_pem, + allow_downloads, recent_posts_cache, + max_recent_posts, + translate, None, + base_dir, session, cached_webfingers, + person_cache, + nickname, domain, port, + post_json_object, + avatar_url, show_avatar_options, + allow_deletion, + http_prefix, project_version, + 'search', + yt_replace_domain, + twitter_replacement_domain, + show_published_date_only, + peertube_instances, + allow_local_network_access, + theme_name, system_language, + max_like_count, + show_repeats, show_icons, + manually_approves_followers, + show_public_only, + store_to_sache, False, cw_lists, + lists_enabled, timezone, False, + bold_reading, dogwhistles, + minimize_all_images, None) + if post_str: + hashtag_search_form += \ + text_mode_separator + separator_str + post_str + post_ctr += 1 + if post_ctr >= posts_per_page: + break + + hashtag_search_form += text_mode_separator + + if post_ctr >= 5 and hashtag_json.get('next'): + # next page link + hashtag_search_form += \ + '
\n' + \ + ' ' + translate['Page down'] + '' + \ + '
' + hashtag_search_form += html_footer() + return hashtag_search_form + + def rss_hashtag_search(nickname: str, domain: str, port: int, recent_posts_cache: {}, max_recent_posts: int, translate: {}, @@ -1157,3 +1365,83 @@ def rss_hashtag_search(nickname: str, domain: str, port: int, break return hashtag_feed + rss2tag_footer() + + +def hashtag_search_json(nickname: str, domain: str, port: int, + base_dir: str, hashtag: str, + page_number: int, posts_per_page: int, + http_prefix: str) -> {}: + """Show a json collection for a hashtag + """ + if hashtag.startswith('#'): + hashtag = hashtag[1:] + hashtag = urllib.parse.unquote(hashtag) + hashtag_index_file = base_dir + '/tags/' + hashtag + '.txt' + if not os.path.isfile(hashtag_index_file): + if hashtag != hashtag.lower(): + hashtag = hashtag.lower() + hashtag_index_file = base_dir + '/tags/' + hashtag + '.txt' + if not os.path.isfile(hashtag_index_file): + print('WARN: hashtag file not found ' + hashtag_index_file) + return None + + # check that the directory for the nickname exists + if nickname: + account_dir = acct_dir(base_dir, nickname, domain) + if not os.path.isdir(account_dir): + nickname = None + + # read the index + lines = [] + with open(hashtag_index_file, 'r', encoding='utf-8') as fp_hash: + lines = fp_hash.readlines() + if not lines: + return None + + domain_full = get_full_domain(domain, port) + + url = http_prefix + '://' + domain_full + '/tags/' + \ + hashtag + '?page=' + str(page_number) + hashtag_json = { + '@context': 'https://www.w3.org/ns/activitystreams', + 'id': url, + 'orderedItems': [], + 'totalItems': 0, + 'type': 'OrderedCollection' + } + page_items = 0 + for index, _ in enumerate(lines): + post_id = lines[index].strip('\n').strip('\r') + if ' ' not in post_id: + nickname = get_nickname_from_actor(post_id) + if not nickname: + continue + else: + post_fields = post_id.split(' ') + if len(post_fields) != 3: + continue + nickname = post_fields[1] + post_id = post_fields[2] + post_filename = locate_post(base_dir, nickname, domain, post_id) + if not post_filename: + continue + post_json_object = load_json(post_filename) + if not post_json_object: + continue + if not has_object_dict(post_json_object): + continue + if not is_public_post(post_json_object): + continue + if not post_json_object['object'].get('id'): + continue + # add to feed + page_items += 1 + if page_items < posts_per_page * (page_number - 1): + continue + id_str = remove_id_ending(post_json_object['object']['id']) + hashtag_json['orderedItems'].append(id_str) + hashtag_json['totalItems'] += 1 + if hashtag_json['totalItems'] >= posts_per_page: + break + + return hashtag_json