Returning json for hashtags

2023-01-05 15:56:49 +00:00 · 2023-01-05 15:56:49 +00:00 · 4ae21596e4
parent 37d5138c77
commit 4ae21596e4
2 changed files with 154 additions and 2 deletions
--- a/daemon.py
+++ b/daemon.py
@ -14,6 +14,7 @@ import json
 import time
 import urllib.parse
 import datetime
+import os
 from socket import error as SocketError
 import errno
 from functools import partial
@ -239,6 +240,7 @@ from webapp_search import html_skills_search
 from webapp_search import html_history_search
 from webapp_search import html_hashtag_search
 from webapp_search import rss_hashtag_search
+from webapp_search import hashtag_search_json
 from webapp_search import html_search_emoji
 from webapp_search import html_search_shared_items
 from webapp_search import html_search_emoji_text_entry
@ -427,8 +429,6 @@ from maps import map_format_from_tagmaps_path
 from relationships import get_moved_feed
 from relationships import get_inactive_feed
 from relationships import update_moved_actors
-import os
-

 # maximum number of posts to list in outbox feed
 MAX_POSTS_IN_FEED = 12
@ -9175,6 +9175,62 @@ class PubServer(BaseHTTPRequestHandler):
                            '_GET', '_hashtag_search_rss2',
                            self.server.debug)

+    def _hashtag_search_json(self, calling_domain: str,
+                             referer_domain: str,
+                             path: str, cookie: str,
+                             base_dir: str, http_prefix: str,
+                             domain: str, domain_full: str, port: int,
+                             onion_domain: str, i2p_domain: str,
+                             getreq_start_time) -> None:
+        """Return a json collection for a hashtag
+        """
+        page_number = 1
+        if '?page=' in path:
+            page_number_str = path.split('?page=')[1]
+            if page_number_str.isdigit():
+                page_number = int(page_number_str)
+            path = path.split('?page=')[0]
+        hashtag = path.split('/tags/')[1]
+        if is_blocked_hashtag(base_dir, hashtag):
+            self._400()
+            return
+        nickname = None
+        if '/users/' in path:
+            actor = \
+                http_prefix + '://' + domain_full + path
+            nickname = \
+                get_nickname_from_actor(actor)
+        hashtag_json = \
+            hashtag_search_json(nickname,
+                                domain, port,
+                                base_dir, hashtag,
+                                page_number, MAX_POSTS_IN_FEED,
+                                http_prefix)
+        if hashtag_json:
+            msg_str = json.dumps(hashtag_json)
+            msg_str = self._convert_domains(calling_domain, referer_domain,
+                                            msg_str)
+            msg = msg_str.encode('utf-8')
+            msglen = len(msg)
+            self._set_headers('application/json', msglen,
+                              None, calling_domain, True)
+            self._write(msg)
+        else:
+            origin_path_str = path.split('/tags/')[0]
+            origin_path_str_absolute = \
+                http_prefix + '://' + domain_full + origin_path_str
+            if calling_domain.endswith('.onion') and onion_domain:
+                origin_path_str_absolute = \
+                    'http://' + onion_domain + origin_path_str
+            elif (calling_domain.endswith('.i2p') and onion_domain):
+                origin_path_str_absolute = \
+                    'http://' + i2p_domain + origin_path_str
+            self._redirect_headers(origin_path_str_absolute,
+                                   cookie, calling_domain)
+        fitness_performance(getreq_start_time, self.server.fitness,
+                            '_GET', '_hashtag_search_json',
+                            self.server.debug)
+
    def _announce_button(self, calling_domain: str, path: str,
                         base_dir: str,
                         cookie: str, proxy_type: str,
@ -18428,6 +18484,21 @@ class PubServer(BaseHTTPRequestHandler):
                                          curr_session)
                self.server.getreq_busy = False
                return
+            if not html_getreq:
+                # TODO
+                self._hashtag_search_json(calling_domain, referer_domain,
+                                          self.path, cookie,
+                                          self.server.base_dir,
+                                          self.server.http_prefix,
+                                          self.server.domain,
+                                          self.server.domain_full,
+                                          self.server.port,
+                                          self.server.onion_domain,
+                                          self.server.i2p_domain,
+                                          getreq_start_time,
+                                          curr_session)
+                self.server.getreq_busy = False
+                return
            self._hashtag_search(calling_domain,
                                 self.path, cookie,
                                 self.server.base_dir,
--- a/webapp_search.py
+++ b/webapp_search.py
@ -11,6 +11,8 @@ import os
 from shutil import copyfile
 import urllib.parse
 from datetime import datetime
+from utils import remove_id_ending
+from utils import has_object_dict
 from utils import acct_handle_dir
 from utils import get_base_content_from_post
 from utils import is_account_dir
@ -1157,3 +1159,82 @@ def rss_hashtag_search(nickname: str, domain: str, port: int,
            break

    return hashtag_feed + rss2tag_footer()
+
+
+def hashtag_search_json(nickname: str, domain: str, port: int,
+                        base_dir: str, hashtag: str,
+                        page_number: int, posts_per_page: int,
+                        http_prefix: str) -> {}:
+    """Show a json collection for a hashtag
+    """
+    if hashtag.startswith('#'):
+        hashtag = hashtag[1:]
+    hashtag = urllib.parse.unquote(hashtag)
+    hashtag_index_file = base_dir + '/tags/' + hashtag + '.txt'
+    if not os.path.isfile(hashtag_index_file):
+        if hashtag != hashtag.lower():
+            hashtag = hashtag.lower()
+            hashtag_index_file = base_dir + '/tags/' + hashtag + '.txt'
+    if not os.path.isfile(hashtag_index_file):
+        print('WARN: hashtag file not found ' + hashtag_index_file)
+        return None
+
+    # check that the directory for the nickname exists
+    if nickname:
+        account_dir = acct_dir(base_dir, nickname, domain)
+        if not os.path.isdir(account_dir):
+            nickname = None
+
+    # read the index
+    lines = []
+    with open(hashtag_index_file, 'r', encoding='utf-8') as fp_hash:
+        lines = fp_hash.readlines()
+    if not lines:
+        return None
+
+    domain_full = get_full_domain(domain, port)
+
+    url = http_prefix + '://' + domain_full + '/tags/' + \
+        hashtag + '?page=' + str(page_number)
+    hashtag_json = {
+        '@context': 'https://www.w3.org/ns/activitystreams',
+        'id': url,
+        'orderedItems': [],
+        'totalItems': 0,
+        'type': 'OrderedCollection'
+    }
+    page_items = 0
+    for index, _ in enumerate(lines):
+        post_id = lines[index].strip('\n').strip('\r')
+        if '  ' not in post_id:
+            nickname = get_nickname_from_actor(post_id)
+            if not nickname:
+                continue
+        else:
+            post_fields = post_id.split('  ')
+            if len(post_fields) != 3:
+                continue
+            nickname = post_fields[1]
+            post_id = post_fields[2]
+        post_filename = locate_post(base_dir, nickname, domain, post_id)
+        if not post_filename:
+            continue
+        post_json_object = load_json(post_filename)
+        if post_json_object:
+            if not has_object_dict(post_json_object):
+                continue
+            if not is_public_post(post_json_object):
+                continue
+            if not post_json_object['object'].get('id'):
+                continue
+            # add to feed
+            page_items += 1
+            if page_items < posts_per_page * (page_number - 1):
+                continue
+            id_str = remove_id_ending(post_json_object['object']['id'])
+            hashtag_json['orderedItems'].append(id_str)
+            hashtag_json['totalItems'] += 1
+        if hashtag_json['totalItems'] >= posts_per_page:
+            break
+
+    return hashtag_json