.

2023-01-06 12:09:21 +00:00 · 2023-01-06 12:09:21 +00:00 · 2446108faf
parent 7f5e591f15 00b47b218b
commit 2446108faf
6 changed files with 509 additions and 4 deletions
--- a/content.py
+++ b/content.py
@ -765,6 +765,48 @@ def _add_hash_tags(word_str: str, http_prefix: str, domain: str,
    return True


+def replace_remote_hashtags(content: str,
+                            nickname: str, domain: str) -> str:
+    """Replaces remote hashtags with a local version
+    """
+    if not domain:
+        return content
+
+    if ' href="' not in content:
+        return content
+
+    sections = content.split(' href="')
+    ctr = 0
+    replacements = {}
+    for section in sections:
+        if ctr == 0:
+            ctr += 1
+            continue
+        if '"' not in section:
+            ctr += 1
+            continue
+        link = section.split('"')[0]
+        if '://' not in link:
+            continue
+        if '?remotetag=' in link:
+            ctr += 1
+            continue
+        if '/tags/' not in link:
+            ctr += 1
+            continue
+        if '/' + domain not in link:
+            new_link = '/users/' + nickname + \
+                '?remotetag=' + link.replace('/', '--')
+            replacements[link] = new_link
+        ctr += 1
+    if not replacements:
+        return content
+    for old_link, new_link in replacements.items():
+        content = content.replace('"' + old_link + '"',
+                                  '"' + new_link + '"')
+    return content
+
+
 def _add_emoji(base_dir: str, word_str: str,
               http_prefix: str, domain: str,
               replace_emoji: {}, post_tags: {},
--- a/daemon.py
+++ b/daemon.py
@ -14,6 +14,7 @@ import json
 import time
 import urllib.parse
 import datetime
+import os
 from socket import error as SocketError
 import errno
 from functools import partial
@ -238,7 +239,9 @@ from webapp_column_right import html_edit_news_post
 from webapp_search import html_skills_search
 from webapp_search import html_history_search
 from webapp_search import html_hashtag_search
+from webapp_search import html_hashtag_search_remote
 from webapp_search import rss_hashtag_search
+from webapp_search import hashtag_search_json
 from webapp_search import html_search_emoji
 from webapp_search import html_search_shared_items
 from webapp_search import html_search_emoji_text_entry
@ -427,8 +430,6 @@ from maps import map_format_from_tagmaps_path
 from relationships import get_moved_feed
 from relationships import get_inactive_feed
 from relationships import update_moved_actors
-import os
-

 # maximum number of posts to list in outbox feed
 MAX_POSTS_IN_FEED = 12
@ -9175,6 +9176,62 @@ class PubServer(BaseHTTPRequestHandler):
                            '_GET', '_hashtag_search_rss2',
                            self.server.debug)

+    def _hashtag_search_json(self, calling_domain: str,
+                             referer_domain: str,
+                             path: str, cookie: str,
+                             base_dir: str, http_prefix: str,
+                             domain: str, domain_full: str, port: int,
+                             onion_domain: str, i2p_domain: str,
+                             getreq_start_time) -> None:
+        """Return a json collection for a hashtag
+        """
+        page_number = 1
+        if '?page=' in path:
+            page_number_str = path.split('?page=')[1]
+            if page_number_str.isdigit():
+                page_number = int(page_number_str)
+            path = path.split('?page=')[0]
+        hashtag = path.split('/tags/')[1]
+        if is_blocked_hashtag(base_dir, hashtag):
+            self._400()
+            return
+        nickname = None
+        if '/users/' in path:
+            actor = \
+                http_prefix + '://' + domain_full + path
+            nickname = \
+                get_nickname_from_actor(actor)
+        hashtag_json = \
+            hashtag_search_json(nickname,
+                                domain, port,
+                                base_dir, hashtag,
+                                page_number, MAX_POSTS_IN_FEED,
+                                http_prefix)
+        if hashtag_json:
+            msg_str = json.dumps(hashtag_json)
+            msg_str = self._convert_domains(calling_domain, referer_domain,
+                                            msg_str)
+            msg = msg_str.encode('utf-8')
+            msglen = len(msg)
+            self._set_headers('application/json', msglen,
+                              None, calling_domain, True)
+            self._write(msg)
+        else:
+            origin_path_str = path.split('/tags/')[0]
+            origin_path_str_absolute = \
+                http_prefix + '://' + domain_full + origin_path_str
+            if calling_domain.endswith('.onion') and onion_domain:
+                origin_path_str_absolute = \
+                    'http://' + onion_domain + origin_path_str
+            elif (calling_domain.endswith('.i2p') and onion_domain):
+                origin_path_str_absolute = \
+                    'http://' + i2p_domain + origin_path_str
+            self._redirect_headers(origin_path_str_absolute,
+                                   cookie, calling_domain)
+        fitness_performance(getreq_start_time, self.server.fitness,
+                            '_GET', '_hashtag_search_json',
+                            self.server.debug)
+
    def _announce_button(self, calling_domain: str, path: str,
                         base_dir: str,
                         cookie: str, proxy_type: str,
@ -18411,6 +18468,73 @@ class PubServer(BaseHTTPRequestHandler):
                self.server.getreq_busy = False
                return

+        if '?remotetag=' in self.path and \
+           '/users/' in self.path and authorized:
+            actor = self.path.split('?remotetag=')[0]
+            nickname = get_nickname_from_actor(actor)
+            hashtag_url = self.path.split('?remotetag=')[1]
+            if ';' in hashtag_url:
+                hashtag_url = hashtag_url.split(';')[0]
+            hashtag_url = hashtag_url.replace('--', '/')
+
+            page_number = 1
+            if ';page=' in self.path:
+                page_number_str = self.path.split(';page=')[1]
+                if ';' in page_number_str:
+                    page_number_str = page_number_str.split(';')[0]
+                if page_number_str.isdigit():
+                    page_number = int(page_number_str)
+
+            allow_local_network_access = self.server.allow_local_network_access
+            show_published_date_only = self.server.show_published_date_only
+            twitter_replacement_domain = self.server.twitter_replacement_domain
+            timezone = None
+            if self.server.account_timezone.get(nickname):
+                timezone = \
+                    self.server.account_timezone.get(nickname)
+            msg = \
+                html_hashtag_search_remote(nickname,
+                                           self.server.domain,
+                                           self.server.port,
+                                           self.server.recent_posts_cache,
+                                           self.server.max_recent_posts,
+                                           self.server.translate,
+                                           self.server.base_dir,
+                                           hashtag_url,
+                                           page_number, MAX_POSTS_IN_FEED,
+                                           self.server.session,
+                                           self.server.cached_webfingers,
+                                           self.server.person_cache,
+                                           self.server.http_prefix,
+                                           self.server.project_version,
+                                           self.server.yt_replace_domain,
+                                           twitter_replacement_domain,
+                                           show_published_date_only,
+                                           self.server.peertube_instances,
+                                           allow_local_network_access,
+                                           self.server.theme_name,
+                                           self.server.system_language,
+                                           self.server.max_like_count,
+                                           self.server.signing_priv_key_pem,
+                                           self.server.cw_lists,
+                                           self.server.lists_enabled,
+                                           timezone,
+                                           self.server.bold_reading,
+                                           self.server.dogwhistles,
+                                           self.server.min_images_for_accounts,
+                                           self.server.debug)
+            if msg:
+                msg = msg.encode('utf-8')
+                msglen = len(msg)
+                self._set_headers('text/html', msglen, cookie, calling_domain,
+                                  False)
+                self._write(msg)
+                self.server.getreq_busy = False
+                return
+            self._404()
+            self.server.getreq_busy = False
+            return
+
        # hashtag search
        if self.path.startswith('/tags/') or \
           (authorized and '/tags/' in self.path):
@ -18428,6 +18552,19 @@ class PubServer(BaseHTTPRequestHandler):
                                          curr_session)
                self.server.getreq_busy = False
                return
+            if not html_getreq:
+                self._hashtag_search_json(calling_domain, referer_domain,
+                                          self.path, cookie,
+                                          self.server.base_dir,
+                                          self.server.http_prefix,
+                                          self.server.domain,
+                                          self.server.domain_full,
+                                          self.server.port,
+                                          self.server.onion_domain,
+                                          self.server.i2p_domain,
+                                          getreq_start_time)
+                self.server.getreq_busy = False
+                return
            self._hashtag_search(calling_domain,
                                 self.path, cookie,
                                 self.server.base_dir,
--- a/manual/manual.md
+++ b/manual/manual.md
@ -436,6 +436,12 @@ The location field on a post can be a description, but it can also be a map geol

 Selecting the *location* header will open the last known geolocation, so if your current location is near this makes it quicker to find. 

+## Scientific references
+It is possible to have references to scientific papers linked automatically, such that they are readable with one click/press. Supported references are [arXiv](https://arxiv.org) and [Digital object identifier (DOI)](https://en.wikipedia.org/wiki/Digital_object_identifier). For example:
+```text
+This is a reference to a paper: arxiv:2203.15752
+```
+
 # The Timeline
 ## Layout
 ![Layout](manual-layout.png)
--- a/tests.py
+++ b/tests.py
@ -135,6 +135,7 @@ from inbox import valid_inbox
 from inbox import valid_inbox_filenames
 from inbox import cache_svg_images
 from categories import guess_hashtag_category
+from content import replace_remote_hashtags
 from content import add_name_emojis_to_tags
 from content import combine_textarea_lines
 from content import detect_dogwhistles
@ -7719,6 +7720,31 @@ def _test_replace_variable():
    assert result == expected


+def _test_replace_remote_tags() -> None:
+    print('replace_remote_tags')
+    nickname = 'mynick'
+    domain = 'furious.duck'
+    content = 'This is a test'
+    result = replace_remote_hashtags(content, nickname, domain)
+    assert result == content
+
+    link = "https://something/else/mytag"
+    content = 'This is href="' + link + '" test'
+    result = replace_remote_hashtags(content, nickname, domain)
+    assert result == content
+
+    link = "https://something/tags/mytag"
+    content = 'This is href="' + link + '" test'
+    result = replace_remote_hashtags(content, nickname, domain)
+    expected = \
+        'This is href="/users/' + nickname + '?remotetag=' + \
+        link.replace('/', '--') + '" test'
+    if result != expected:
+        print(expected)
+        print(result)
+    assert result == expected
+
+
 def run_all_tests():
    base_dir = os.getcwd()
    print('Running tests...')
@ -7736,6 +7762,7 @@ def run_all_tests():
    _test_checkbox_names()
    _test_thread_functions()
    _test_functions()
+    _test_replace_remote_tags()
    _test_replace_variable()
    _test_missing_theme_colors(base_dir)
    _test_reply_language(base_dir)
--- a/webapp_post.py
+++ b/webapp_post.py
@ -67,6 +67,7 @@ from utils import get_domain_from_actor
 from utils import acct_dir
 from utils import local_actor_url
 from utils import is_unlisted_post
+from content import replace_remote_hashtags
 from content import detect_dogwhistles
 from content import create_edits_html
 from content import bold_reading_string
@ -1821,6 +1822,8 @@ def _get_copyright_footer(content_license_url: str,
    elif '/fdl' in content_license_url:
        icon_filename = 'license_fdl.png'

+    description = 'Content License'
+    if translate.get('Content License'):
        description = translate['Content License']
    copyright_str = \
        '        ' + \
@ -2445,6 +2448,8 @@ def individual_post_as_html(signing_priv_key_pem: str,
                                system_language, translate)
        if not content_str:
            return ''
+    content_str = \
+        replace_remote_hashtags(content_str, nickname, domain)

    summary_str = ''
    if content_str:
--- a/webapp_search.py
+++ b/webapp_search.py
@ -11,6 +11,8 @@ import os
 from shutil import copyfile
 import urllib.parse
 from datetime import datetime
+from utils import remove_id_ending
+from utils import has_object_dict
 from utils import acct_handle_dir
 from utils import get_base_content_from_post
 from utils import is_account_dir
@ -44,6 +46,7 @@ from webapp_utils import html_search_result_share
 from webapp_post import individual_post_as_html
 from webapp_hashtagswarm import html_hash_tag_swarm
 from maps import html_hashtag_maps
+from session import get_json


 def html_search_emoji(translate: {}, base_dir: str, search_str: str,
@ -961,6 +964,7 @@ def html_hashtag_search(nickname: str, domain: str, port: int,
            '" alt="' + translate['Page up'] + \
            '"></a>\n  </center>\n'
    index = start_index
+    text_mode_separator = '<div class="transparent"><hr></div>'
    while index <= end_index:
        post_id = lines[index].strip('\n').strip('\r')
        if '  ' not in post_id:
@ -1029,9 +1033,12 @@ def html_hashtag_search(nickname: str, domain: str, port: int,
                                    bold_reading, dogwhistles,
                                    minimize_all_images, None)
        if post_str:
-            hashtag_search_form += separator_str + post_str
+            hashtag_search_form += \
+                text_mode_separator + separator_str + post_str
        index += 1

+    hashtag_search_form += text_mode_separator
+
    if end_index < no_of_lines - 1:
        # next page link
        hashtag_search_form += \
@ -1047,6 +1054,207 @@ def html_hashtag_search(nickname: str, domain: str, port: int,
    return hashtag_search_form


+def html_hashtag_search_remote(nickname: str, domain: str, port: int,
+                               recent_posts_cache: {}, max_recent_posts: int,
+                               translate: {},
+                               base_dir: str, hashtag_url: str,
+                               page_number: int, posts_per_page: int,
+                               session, cached_webfingers: {},
+                               person_cache: {},
+                               http_prefix: str, project_version: str,
+                               yt_replace_domain: str,
+                               twitter_replacement_domain: str,
+                               show_published_date_only: bool,
+                               peertube_instances: [],
+                               allow_local_network_access: bool,
+                               theme_name: str, system_language: str,
+                               max_like_count: int,
+                               signing_priv_key_pem: str,
+                               cw_lists: {}, lists_enabled: str,
+                               timezone: str, bold_reading: bool,
+                               dogwhistles: {},
+                               min_images_for_accounts: [],
+                               debug: bool) -> str:
+    """Show a page containing search results for a remote hashtag
+    """
+    hashtag = hashtag_url.split('/')[-1]
+
+    profile_str = 'https://www.w3.org/ns/activitystreams'
+    as_header = {
+        'Accept': 'application/activity+json; profile="' + profile_str + '"'
+    }
+    hashtag_url_with_page = hashtag_url
+    if '?page=' not in hashtag_url_with_page:
+        hashtag_url_with_page += '?page=' + str(page_number)
+    hashtag_json = \
+        get_json(signing_priv_key_pem,
+                 session, hashtag_url_with_page, as_header, None, debug,
+                 __version__, http_prefix, domain)
+    lines = []
+    if hashtag_json:
+        if 'orderedItems' in hashtag_json:
+            lines = hashtag_json['orderedItems']
+        else:
+            print('No orderedItems in hashtag collection ' + str(hashtag_json))
+    else:
+        print('WARN: no hashtags returned for url ' + hashtag_url)
+
+    separator_str = html_post_separator(base_dir, None)
+
+    # check that the directory for the nickname exists
+    if nickname:
+        account_dir = acct_dir(base_dir, nickname, domain)
+        if not os.path.isdir(account_dir):
+            return None
+
+    # read the css
+    css_filename = base_dir + '/epicyon-profile.css'
+    if os.path.isfile(base_dir + '/epicyon.css'):
+        css_filename = base_dir + '/epicyon.css'
+
+    # ensure that the page number is in bounds
+    if not page_number:
+        page_number = 1
+    elif page_number < 1:
+        page_number = 1
+
+    instance_title = \
+        get_config_param(base_dir, 'instanceTitle')
+    hashtag_search_form = \
+        html_header_with_external_style(css_filename, instance_title, None)
+
+    # add the page title
+    hashtag_search_form += '<center>\n' + \
+        '<h1>#' + hashtag
+
+    # RSS link for hashtag feed
+    hashtag_rss = hashtag_url
+    if '.html' in hashtag_rss:
+        hashtag_rss = hashtag_rss.replace('.html', '')
+    hashtag_search_form += ' <a href="' + hashtag_rss + '.rss">'
+    hashtag_search_form += \
+        '<img style="width:3%;min-width:50px" ' + \
+        'loading="lazy" decoding="async" ' + \
+        'alt="RSS 2.0" title="RSS 2.0" src="/' + \
+        'icons/logorss.png" /></a></h1>\n'
+
+    tag_link = '/users/' + nickname + '?remotetag=' + \
+        hashtag_url.replace('/', '--')
+    if page_number > 1 and hashtag_json.get('prev'):
+        # previous page link
+        hashtag_search_form += \
+            '  <center>\n' + \
+            '    <a href="' + tag_link + ';page=' + \
+            str(page_number - 1) + \
+            '"><img loading="lazy" decoding="async" ' + \
+            'class="pageicon" src="/' + \
+            'icons/pageup.png" title="' + \
+            translate['Page up'] + \
+            '" alt="' + translate['Page up'] + \
+            '"></a>\n  </center>\n'
+    text_mode_separator = '<div class="transparent"><hr></div>'
+    post_ctr = 0
+    for post_id in lines:
+        print('Hashtag post_id ' + post_id)
+        post_json_object = \
+            get_json(signing_priv_key_pem,
+                     session, post_id, as_header, None, debug,
+                     __version__, http_prefix, domain)
+        if not post_json_object:
+            print('No hashtag post for ' + post_id)
+            continue
+        if not isinstance(post_json_object, dict):
+            print('Hashtag post is not a dict ' + str(post_json_object))
+            continue
+        if not has_object_dict(post_json_object):
+            if post_json_object.get('id') and \
+               'to' in post_json_object and \
+               'cc' in post_json_object:
+                new_url = \
+                    remove_id_ending(post_json_object['id'])
+                actor = new_url
+                if '/statuses/' in actor:
+                    actor = actor.split('/statuses/')[0]
+                new_post_json_object = {
+                    "type": "Create",
+                    "id": new_url + '/activity',
+                    "to": post_json_object['to'],
+                    "cc": post_json_object['cc'],
+                    "actor": actor,
+                    "object": post_json_object
+                }
+                post_json_object = new_post_json_object
+            else:
+                print('Hashtag post does not contain necessary fields ' +
+                      str(post_json_object))
+                continue
+        if not is_public_post(post_json_object):
+            print('Hashtag post is not public ' + post_id)
+            continue
+        show_individual_post_icons = False
+        allow_deletion = False
+        show_repeats = show_individual_post_icons
+        show_icons = show_individual_post_icons
+        manually_approves_followers = False
+        show_public_only = False
+        store_to_sache = False
+        allow_downloads = True
+        avatar_url = None
+        show_avatar_options = True
+        minimize_all_images = False
+        if nickname in min_images_for_accounts:
+            minimize_all_images = True
+        post_str = \
+            individual_post_as_html(signing_priv_key_pem,
+                                    allow_downloads, recent_posts_cache,
+                                    max_recent_posts,
+                                    translate, None,
+                                    base_dir, session, cached_webfingers,
+                                    person_cache,
+                                    nickname, domain, port,
+                                    post_json_object,
+                                    avatar_url, show_avatar_options,
+                                    allow_deletion,
+                                    http_prefix, project_version,
+                                    'search',
+                                    yt_replace_domain,
+                                    twitter_replacement_domain,
+                                    show_published_date_only,
+                                    peertube_instances,
+                                    allow_local_network_access,
+                                    theme_name, system_language,
+                                    max_like_count,
+                                    show_repeats, show_icons,
+                                    manually_approves_followers,
+                                    show_public_only,
+                                    store_to_sache, False, cw_lists,
+                                    lists_enabled, timezone, False,
+                                    bold_reading, dogwhistles,
+                                    minimize_all_images, None)
+        if post_str:
+            hashtag_search_form += \
+                text_mode_separator + separator_str + post_str
+            post_ctr += 1
+            if post_ctr >= posts_per_page:
+                break
+
+    hashtag_search_form += text_mode_separator
+
+    if post_ctr >= 5 and hashtag_json.get('next'):
+        # next page link
+        hashtag_search_form += \
+            '  <center>\n' + \
+            '    <a href="' + tag_link + \
+            ';page=' + str(page_number + 1) + \
+            '"><img loading="lazy" decoding="async" ' + \
+            'class="pageicon" src="/icons' + \
+            '/pagedown.png" title="' + translate['Page down'] + \
+            '" alt="' + translate['Page down'] + '"></a>' + \
+            '  </center>'
+    hashtag_search_form += html_footer()
+    return hashtag_search_form
+
+
 def rss_hashtag_search(nickname: str, domain: str, port: int,
                       recent_posts_cache: {}, max_recent_posts: int,
                       translate: {},
@ -1157,3 +1365,83 @@ def rss_hashtag_search(nickname: str, domain: str, port: int,
            break

    return hashtag_feed + rss2tag_footer()
+
+
+def hashtag_search_json(nickname: str, domain: str, port: int,
+                        base_dir: str, hashtag: str,
+                        page_number: int, posts_per_page: int,
+                        http_prefix: str) -> {}:
+    """Show a json collection for a hashtag
+    """
+    if hashtag.startswith('#'):
+        hashtag = hashtag[1:]
+    hashtag = urllib.parse.unquote(hashtag)
+    hashtag_index_file = base_dir + '/tags/' + hashtag + '.txt'
+    if not os.path.isfile(hashtag_index_file):
+        if hashtag != hashtag.lower():
+            hashtag = hashtag.lower()
+            hashtag_index_file = base_dir + '/tags/' + hashtag + '.txt'
+    if not os.path.isfile(hashtag_index_file):
+        print('WARN: hashtag file not found ' + hashtag_index_file)
+        return None
+
+    # check that the directory for the nickname exists
+    if nickname:
+        account_dir = acct_dir(base_dir, nickname, domain)
+        if not os.path.isdir(account_dir):
+            nickname = None
+
+    # read the index
+    lines = []
+    with open(hashtag_index_file, 'r', encoding='utf-8') as fp_hash:
+        lines = fp_hash.readlines()
+    if not lines:
+        return None
+
+    domain_full = get_full_domain(domain, port)
+
+    url = http_prefix + '://' + domain_full + '/tags/' + \
+        hashtag + '?page=' + str(page_number)
+    hashtag_json = {
+        '@context': 'https://www.w3.org/ns/activitystreams',
+        'id': url,
+        'orderedItems': [],
+        'totalItems': 0,
+        'type': 'OrderedCollection'
+    }
+    page_items = 0
+    for index, _ in enumerate(lines):
+        post_id = lines[index].strip('\n').strip('\r')
+        if '  ' not in post_id:
+            nickname = get_nickname_from_actor(post_id)
+            if not nickname:
+                continue
+        else:
+            post_fields = post_id.split('  ')
+            if len(post_fields) != 3:
+                continue
+            nickname = post_fields[1]
+            post_id = post_fields[2]
+        post_filename = locate_post(base_dir, nickname, domain, post_id)
+        if not post_filename:
+            continue
+        post_json_object = load_json(post_filename)
+        if not post_json_object:
+            continue
+        if not has_object_dict(post_json_object):
+            continue
+        if not is_public_post(post_json_object):
+            continue
+        if not post_json_object['object'].get('id'):
+            continue
+        # add to feed
+        page_items += 1
+        if page_items < posts_per_page * (page_number - 1):
+            continue
+        id_str = remove_id_ending(post_json_object['object']['id'])
+        hashtag_json['orderedItems'].append(id_str)
+        hashtag_json['totalItems'] += 1
+        if hashtag_json['totalItems'] >= posts_per_page:
+            break
+
+    return hashtag_json