Returning json for hashtags

merge-requests/30/head
Bob Mottram 2023-01-05 15:56:49 +00:00
parent 37d5138c77
commit 4ae21596e4
2 changed files with 154 additions and 2 deletions

View File

@ -14,6 +14,7 @@ import json
import time
import urllib.parse
import datetime
import os
from socket import error as SocketError
import errno
from functools import partial
@ -239,6 +240,7 @@ from webapp_search import html_skills_search
from webapp_search import html_history_search
from webapp_search import html_hashtag_search
from webapp_search import rss_hashtag_search
from webapp_search import hashtag_search_json
from webapp_search import html_search_emoji
from webapp_search import html_search_shared_items
from webapp_search import html_search_emoji_text_entry
@ -427,8 +429,6 @@ from maps import map_format_from_tagmaps_path
from relationships import get_moved_feed
from relationships import get_inactive_feed
from relationships import update_moved_actors
import os
# maximum number of posts to list in outbox feed
MAX_POSTS_IN_FEED = 12
@ -9175,6 +9175,62 @@ class PubServer(BaseHTTPRequestHandler):
'_GET', '_hashtag_search_rss2',
self.server.debug)
def _hashtag_search_json(self, calling_domain: str,
referer_domain: str,
path: str, cookie: str,
base_dir: str, http_prefix: str,
domain: str, domain_full: str, port: int,
onion_domain: str, i2p_domain: str,
getreq_start_time) -> None:
"""Return a json collection for a hashtag
"""
page_number = 1
if '?page=' in path:
page_number_str = path.split('?page=')[1]
if page_number_str.isdigit():
page_number = int(page_number_str)
path = path.split('?page=')[0]
hashtag = path.split('/tags/')[1]
if is_blocked_hashtag(base_dir, hashtag):
self._400()
return
nickname = None
if '/users/' in path:
actor = \
http_prefix + '://' + domain_full + path
nickname = \
get_nickname_from_actor(actor)
hashtag_json = \
hashtag_search_json(nickname,
domain, port,
base_dir, hashtag,
page_number, MAX_POSTS_IN_FEED,
http_prefix)
if hashtag_json:
msg_str = json.dumps(hashtag_json)
msg_str = self._convert_domains(calling_domain, referer_domain,
msg_str)
msg = msg_str.encode('utf-8')
msglen = len(msg)
self._set_headers('application/json', msglen,
None, calling_domain, True)
self._write(msg)
else:
origin_path_str = path.split('/tags/')[0]
origin_path_str_absolute = \
http_prefix + '://' + domain_full + origin_path_str
if calling_domain.endswith('.onion') and onion_domain:
origin_path_str_absolute = \
'http://' + onion_domain + origin_path_str
elif (calling_domain.endswith('.i2p') and onion_domain):
origin_path_str_absolute = \
'http://' + i2p_domain + origin_path_str
self._redirect_headers(origin_path_str_absolute,
cookie, calling_domain)
fitness_performance(getreq_start_time, self.server.fitness,
'_GET', '_hashtag_search_json',
self.server.debug)
def _announce_button(self, calling_domain: str, path: str,
base_dir: str,
cookie: str, proxy_type: str,
@ -18428,6 +18484,21 @@ class PubServer(BaseHTTPRequestHandler):
curr_session)
self.server.getreq_busy = False
return
if not html_getreq:
# TODO
self._hashtag_search_json(calling_domain, referer_domain,
self.path, cookie,
self.server.base_dir,
self.server.http_prefix,
self.server.domain,
self.server.domain_full,
self.server.port,
self.server.onion_domain,
self.server.i2p_domain,
getreq_start_time,
curr_session)
self.server.getreq_busy = False
return
self._hashtag_search(calling_domain,
self.path, cookie,
self.server.base_dir,

View File

@ -11,6 +11,8 @@ import os
from shutil import copyfile
import urllib.parse
from datetime import datetime
from utils import remove_id_ending
from utils import has_object_dict
from utils import acct_handle_dir
from utils import get_base_content_from_post
from utils import is_account_dir
@ -1157,3 +1159,82 @@ def rss_hashtag_search(nickname: str, domain: str, port: int,
break
return hashtag_feed + rss2tag_footer()
def hashtag_search_json(nickname: str, domain: str, port: int,
base_dir: str, hashtag: str,
page_number: int, posts_per_page: int,
http_prefix: str) -> {}:
"""Show a json collection for a hashtag
"""
if hashtag.startswith('#'):
hashtag = hashtag[1:]
hashtag = urllib.parse.unquote(hashtag)
hashtag_index_file = base_dir + '/tags/' + hashtag + '.txt'
if not os.path.isfile(hashtag_index_file):
if hashtag != hashtag.lower():
hashtag = hashtag.lower()
hashtag_index_file = base_dir + '/tags/' + hashtag + '.txt'
if not os.path.isfile(hashtag_index_file):
print('WARN: hashtag file not found ' + hashtag_index_file)
return None
# check that the directory for the nickname exists
if nickname:
account_dir = acct_dir(base_dir, nickname, domain)
if not os.path.isdir(account_dir):
nickname = None
# read the index
lines = []
with open(hashtag_index_file, 'r', encoding='utf-8') as fp_hash:
lines = fp_hash.readlines()
if not lines:
return None
domain_full = get_full_domain(domain, port)
url = http_prefix + '://' + domain_full + '/tags/' + \
hashtag + '?page=' + str(page_number)
hashtag_json = {
'@context': 'https://www.w3.org/ns/activitystreams',
'id': url,
'orderedItems': [],
'totalItems': 0,
'type': 'OrderedCollection'
}
page_items = 0
for index, _ in enumerate(lines):
post_id = lines[index].strip('\n').strip('\r')
if ' ' not in post_id:
nickname = get_nickname_from_actor(post_id)
if not nickname:
continue
else:
post_fields = post_id.split(' ')
if len(post_fields) != 3:
continue
nickname = post_fields[1]
post_id = post_fields[2]
post_filename = locate_post(base_dir, nickname, domain, post_id)
if not post_filename:
continue
post_json_object = load_json(post_filename)
if post_json_object:
if not has_object_dict(post_json_object):
continue
if not is_public_post(post_json_object):
continue
if not post_json_object['object'].get('id'):
continue
# add to feed
page_items += 1
if page_items < posts_per_page * (page_number - 1):
continue
id_str = remove_id_ending(post_json_object['object']['id'])
hashtag_json['orderedItems'].append(id_str)
hashtag_json['totalItems'] += 1
if hashtag_json['totalItems'] >= posts_per_page:
break
return hashtag_json