Returning json for hashtags

merge-requests/30/head
Bob Mottram 2023-01-05 15:56:49 +00:00
parent 37d5138c77
commit 4ae21596e4
2 changed files with 154 additions and 2 deletions

View File

@ -14,6 +14,7 @@ import json
import time import time
import urllib.parse import urllib.parse
import datetime import datetime
import os
from socket import error as SocketError from socket import error as SocketError
import errno import errno
from functools import partial from functools import partial
@ -239,6 +240,7 @@ from webapp_search import html_skills_search
from webapp_search import html_history_search from webapp_search import html_history_search
from webapp_search import html_hashtag_search from webapp_search import html_hashtag_search
from webapp_search import rss_hashtag_search from webapp_search import rss_hashtag_search
from webapp_search import hashtag_search_json
from webapp_search import html_search_emoji from webapp_search import html_search_emoji
from webapp_search import html_search_shared_items from webapp_search import html_search_shared_items
from webapp_search import html_search_emoji_text_entry from webapp_search import html_search_emoji_text_entry
@ -427,8 +429,6 @@ from maps import map_format_from_tagmaps_path
from relationships import get_moved_feed from relationships import get_moved_feed
from relationships import get_inactive_feed from relationships import get_inactive_feed
from relationships import update_moved_actors from relationships import update_moved_actors
import os
# maximum number of posts to list in outbox feed # maximum number of posts to list in outbox feed
MAX_POSTS_IN_FEED = 12 MAX_POSTS_IN_FEED = 12
@ -9175,6 +9175,62 @@ class PubServer(BaseHTTPRequestHandler):
'_GET', '_hashtag_search_rss2', '_GET', '_hashtag_search_rss2',
self.server.debug) self.server.debug)
def _hashtag_search_json(self, calling_domain: str,
referer_domain: str,
path: str, cookie: str,
base_dir: str, http_prefix: str,
domain: str, domain_full: str, port: int,
onion_domain: str, i2p_domain: str,
getreq_start_time) -> None:
"""Return a json collection for a hashtag
"""
page_number = 1
if '?page=' in path:
page_number_str = path.split('?page=')[1]
if page_number_str.isdigit():
page_number = int(page_number_str)
path = path.split('?page=')[0]
hashtag = path.split('/tags/')[1]
if is_blocked_hashtag(base_dir, hashtag):
self._400()
return
nickname = None
if '/users/' in path:
actor = \
http_prefix + '://' + domain_full + path
nickname = \
get_nickname_from_actor(actor)
hashtag_json = \
hashtag_search_json(nickname,
domain, port,
base_dir, hashtag,
page_number, MAX_POSTS_IN_FEED,
http_prefix)
if hashtag_json:
msg_str = json.dumps(hashtag_json)
msg_str = self._convert_domains(calling_domain, referer_domain,
msg_str)
msg = msg_str.encode('utf-8')
msglen = len(msg)
self._set_headers('application/json', msglen,
None, calling_domain, True)
self._write(msg)
else:
origin_path_str = path.split('/tags/')[0]
origin_path_str_absolute = \
http_prefix + '://' + domain_full + origin_path_str
if calling_domain.endswith('.onion') and onion_domain:
origin_path_str_absolute = \
'http://' + onion_domain + origin_path_str
elif (calling_domain.endswith('.i2p') and onion_domain):
origin_path_str_absolute = \
'http://' + i2p_domain + origin_path_str
self._redirect_headers(origin_path_str_absolute,
cookie, calling_domain)
fitness_performance(getreq_start_time, self.server.fitness,
'_GET', '_hashtag_search_json',
self.server.debug)
def _announce_button(self, calling_domain: str, path: str, def _announce_button(self, calling_domain: str, path: str,
base_dir: str, base_dir: str,
cookie: str, proxy_type: str, cookie: str, proxy_type: str,
@ -18428,6 +18484,21 @@ class PubServer(BaseHTTPRequestHandler):
curr_session) curr_session)
self.server.getreq_busy = False self.server.getreq_busy = False
return return
if not html_getreq:
# TODO
self._hashtag_search_json(calling_domain, referer_domain,
self.path, cookie,
self.server.base_dir,
self.server.http_prefix,
self.server.domain,
self.server.domain_full,
self.server.port,
self.server.onion_domain,
self.server.i2p_domain,
getreq_start_time,
curr_session)
self.server.getreq_busy = False
return
self._hashtag_search(calling_domain, self._hashtag_search(calling_domain,
self.path, cookie, self.path, cookie,
self.server.base_dir, self.server.base_dir,

View File

@ -11,6 +11,8 @@ import os
from shutil import copyfile from shutil import copyfile
import urllib.parse import urllib.parse
from datetime import datetime from datetime import datetime
from utils import remove_id_ending
from utils import has_object_dict
from utils import acct_handle_dir from utils import acct_handle_dir
from utils import get_base_content_from_post from utils import get_base_content_from_post
from utils import is_account_dir from utils import is_account_dir
@ -1157,3 +1159,82 @@ def rss_hashtag_search(nickname: str, domain: str, port: int,
break break
return hashtag_feed + rss2tag_footer() return hashtag_feed + rss2tag_footer()
def hashtag_search_json(nickname: str, domain: str, port: int,
base_dir: str, hashtag: str,
page_number: int, posts_per_page: int,
http_prefix: str) -> {}:
"""Show a json collection for a hashtag
"""
if hashtag.startswith('#'):
hashtag = hashtag[1:]
hashtag = urllib.parse.unquote(hashtag)
hashtag_index_file = base_dir + '/tags/' + hashtag + '.txt'
if not os.path.isfile(hashtag_index_file):
if hashtag != hashtag.lower():
hashtag = hashtag.lower()
hashtag_index_file = base_dir + '/tags/' + hashtag + '.txt'
if not os.path.isfile(hashtag_index_file):
print('WARN: hashtag file not found ' + hashtag_index_file)
return None
# check that the directory for the nickname exists
if nickname:
account_dir = acct_dir(base_dir, nickname, domain)
if not os.path.isdir(account_dir):
nickname = None
# read the index
lines = []
with open(hashtag_index_file, 'r', encoding='utf-8') as fp_hash:
lines = fp_hash.readlines()
if not lines:
return None
domain_full = get_full_domain(domain, port)
url = http_prefix + '://' + domain_full + '/tags/' + \
hashtag + '?page=' + str(page_number)
hashtag_json = {
'@context': 'https://www.w3.org/ns/activitystreams',
'id': url,
'orderedItems': [],
'totalItems': 0,
'type': 'OrderedCollection'
}
page_items = 0
for index, _ in enumerate(lines):
post_id = lines[index].strip('\n').strip('\r')
if ' ' not in post_id:
nickname = get_nickname_from_actor(post_id)
if not nickname:
continue
else:
post_fields = post_id.split(' ')
if len(post_fields) != 3:
continue
nickname = post_fields[1]
post_id = post_fields[2]
post_filename = locate_post(base_dir, nickname, domain, post_id)
if not post_filename:
continue
post_json_object = load_json(post_filename)
if post_json_object:
if not has_object_dict(post_json_object):
continue
if not is_public_post(post_json_object):
continue
if not post_json_object['object'].get('id'):
continue
# add to feed
page_items += 1
if page_items < posts_per_page * (page_number - 1):
continue
id_str = remove_id_ending(post_json_object['object']['id'])
hashtag_json['orderedItems'].append(id_str)
hashtag_json['totalItems'] += 1
if hashtag_json['totalItems'] >= posts_per_page:
break
return hashtag_json