Bob Mottram 2023-01-06 12:09:21 +00:00
commit 2446108faf
6 changed files with 509 additions and 4 deletions

View File

@ -765,6 +765,48 @@ def _add_hash_tags(word_str: str, http_prefix: str, domain: str,
return True return True
def replace_remote_hashtags(content: str,
nickname: str, domain: str) -> str:
"""Replaces remote hashtags with a local version
"""
if not domain:
return content
if ' href="' not in content:
return content
sections = content.split(' href="')
ctr = 0
replacements = {}
for section in sections:
if ctr == 0:
ctr += 1
continue
if '"' not in section:
ctr += 1
continue
link = section.split('"')[0]
if '://' not in link:
continue
if '?remotetag=' in link:
ctr += 1
continue
if '/tags/' not in link:
ctr += 1
continue
if '/' + domain not in link:
new_link = '/users/' + nickname + \
'?remotetag=' + link.replace('/', '--')
replacements[link] = new_link
ctr += 1
if not replacements:
return content
for old_link, new_link in replacements.items():
content = content.replace('"' + old_link + '"',
'"' + new_link + '"')
return content
def _add_emoji(base_dir: str, word_str: str, def _add_emoji(base_dir: str, word_str: str,
http_prefix: str, domain: str, http_prefix: str, domain: str,
replace_emoji: {}, post_tags: {}, replace_emoji: {}, post_tags: {},

141
daemon.py
View File

@ -14,6 +14,7 @@ import json
import time import time
import urllib.parse import urllib.parse
import datetime import datetime
import os
from socket import error as SocketError from socket import error as SocketError
import errno import errno
from functools import partial from functools import partial
@ -238,7 +239,9 @@ from webapp_column_right import html_edit_news_post
from webapp_search import html_skills_search from webapp_search import html_skills_search
from webapp_search import html_history_search from webapp_search import html_history_search
from webapp_search import html_hashtag_search from webapp_search import html_hashtag_search
from webapp_search import html_hashtag_search_remote
from webapp_search import rss_hashtag_search from webapp_search import rss_hashtag_search
from webapp_search import hashtag_search_json
from webapp_search import html_search_emoji from webapp_search import html_search_emoji
from webapp_search import html_search_shared_items from webapp_search import html_search_shared_items
from webapp_search import html_search_emoji_text_entry from webapp_search import html_search_emoji_text_entry
@ -427,8 +430,6 @@ from maps import map_format_from_tagmaps_path
from relationships import get_moved_feed from relationships import get_moved_feed
from relationships import get_inactive_feed from relationships import get_inactive_feed
from relationships import update_moved_actors from relationships import update_moved_actors
import os
# maximum number of posts to list in outbox feed # maximum number of posts to list in outbox feed
MAX_POSTS_IN_FEED = 12 MAX_POSTS_IN_FEED = 12
@ -9175,6 +9176,62 @@ class PubServer(BaseHTTPRequestHandler):
'_GET', '_hashtag_search_rss2', '_GET', '_hashtag_search_rss2',
self.server.debug) self.server.debug)
def _hashtag_search_json(self, calling_domain: str,
referer_domain: str,
path: str, cookie: str,
base_dir: str, http_prefix: str,
domain: str, domain_full: str, port: int,
onion_domain: str, i2p_domain: str,
getreq_start_time) -> None:
"""Return a json collection for a hashtag
"""
page_number = 1
if '?page=' in path:
page_number_str = path.split('?page=')[1]
if page_number_str.isdigit():
page_number = int(page_number_str)
path = path.split('?page=')[0]
hashtag = path.split('/tags/')[1]
if is_blocked_hashtag(base_dir, hashtag):
self._400()
return
nickname = None
if '/users/' in path:
actor = \
http_prefix + '://' + domain_full + path
nickname = \
get_nickname_from_actor(actor)
hashtag_json = \
hashtag_search_json(nickname,
domain, port,
base_dir, hashtag,
page_number, MAX_POSTS_IN_FEED,
http_prefix)
if hashtag_json:
msg_str = json.dumps(hashtag_json)
msg_str = self._convert_domains(calling_domain, referer_domain,
msg_str)
msg = msg_str.encode('utf-8')
msglen = len(msg)
self._set_headers('application/json', msglen,
None, calling_domain, True)
self._write(msg)
else:
origin_path_str = path.split('/tags/')[0]
origin_path_str_absolute = \
http_prefix + '://' + domain_full + origin_path_str
if calling_domain.endswith('.onion') and onion_domain:
origin_path_str_absolute = \
'http://' + onion_domain + origin_path_str
elif (calling_domain.endswith('.i2p') and onion_domain):
origin_path_str_absolute = \
'http://' + i2p_domain + origin_path_str
self._redirect_headers(origin_path_str_absolute,
cookie, calling_domain)
fitness_performance(getreq_start_time, self.server.fitness,
'_GET', '_hashtag_search_json',
self.server.debug)
def _announce_button(self, calling_domain: str, path: str, def _announce_button(self, calling_domain: str, path: str,
base_dir: str, base_dir: str,
cookie: str, proxy_type: str, cookie: str, proxy_type: str,
@ -18411,6 +18468,73 @@ class PubServer(BaseHTTPRequestHandler):
self.server.getreq_busy = False self.server.getreq_busy = False
return return
if '?remotetag=' in self.path and \
'/users/' in self.path and authorized:
actor = self.path.split('?remotetag=')[0]
nickname = get_nickname_from_actor(actor)
hashtag_url = self.path.split('?remotetag=')[1]
if ';' in hashtag_url:
hashtag_url = hashtag_url.split(';')[0]
hashtag_url = hashtag_url.replace('--', '/')
page_number = 1
if ';page=' in self.path:
page_number_str = self.path.split(';page=')[1]
if ';' in page_number_str:
page_number_str = page_number_str.split(';')[0]
if page_number_str.isdigit():
page_number = int(page_number_str)
allow_local_network_access = self.server.allow_local_network_access
show_published_date_only = self.server.show_published_date_only
twitter_replacement_domain = self.server.twitter_replacement_domain
timezone = None
if self.server.account_timezone.get(nickname):
timezone = \
self.server.account_timezone.get(nickname)
msg = \
html_hashtag_search_remote(nickname,
self.server.domain,
self.server.port,
self.server.recent_posts_cache,
self.server.max_recent_posts,
self.server.translate,
self.server.base_dir,
hashtag_url,
page_number, MAX_POSTS_IN_FEED,
self.server.session,
self.server.cached_webfingers,
self.server.person_cache,
self.server.http_prefix,
self.server.project_version,
self.server.yt_replace_domain,
twitter_replacement_domain,
show_published_date_only,
self.server.peertube_instances,
allow_local_network_access,
self.server.theme_name,
self.server.system_language,
self.server.max_like_count,
self.server.signing_priv_key_pem,
self.server.cw_lists,
self.server.lists_enabled,
timezone,
self.server.bold_reading,
self.server.dogwhistles,
self.server.min_images_for_accounts,
self.server.debug)
if msg:
msg = msg.encode('utf-8')
msglen = len(msg)
self._set_headers('text/html', msglen, cookie, calling_domain,
False)
self._write(msg)
self.server.getreq_busy = False
return
self._404()
self.server.getreq_busy = False
return
# hashtag search # hashtag search
if self.path.startswith('/tags/') or \ if self.path.startswith('/tags/') or \
(authorized and '/tags/' in self.path): (authorized and '/tags/' in self.path):
@ -18428,6 +18552,19 @@ class PubServer(BaseHTTPRequestHandler):
curr_session) curr_session)
self.server.getreq_busy = False self.server.getreq_busy = False
return return
if not html_getreq:
self._hashtag_search_json(calling_domain, referer_domain,
self.path, cookie,
self.server.base_dir,
self.server.http_prefix,
self.server.domain,
self.server.domain_full,
self.server.port,
self.server.onion_domain,
self.server.i2p_domain,
getreq_start_time)
self.server.getreq_busy = False
return
self._hashtag_search(calling_domain, self._hashtag_search(calling_domain,
self.path, cookie, self.path, cookie,
self.server.base_dir, self.server.base_dir,

View File

@ -436,6 +436,12 @@ The location field on a post can be a description, but it can also be a map geol
Selecting the *location* header will open the last known geolocation, so if your current location is near this makes it quicker to find. Selecting the *location* header will open the last known geolocation, so if your current location is near this makes it quicker to find.
## Scientific references
It is possible to have references to scientific papers linked automatically, such that they are readable with one click/press. Supported references are [arXiv](https://arxiv.org) and [Digital object identifier (DOI)](https://en.wikipedia.org/wiki/Digital_object_identifier). For example:
```text
This is a reference to a paper: arxiv:2203.15752
```
# The Timeline # The Timeline
## Layout ## Layout
![Layout](manual-layout.png) ![Layout](manual-layout.png)

View File

@ -135,6 +135,7 @@ from inbox import valid_inbox
from inbox import valid_inbox_filenames from inbox import valid_inbox_filenames
from inbox import cache_svg_images from inbox import cache_svg_images
from categories import guess_hashtag_category from categories import guess_hashtag_category
from content import replace_remote_hashtags
from content import add_name_emojis_to_tags from content import add_name_emojis_to_tags
from content import combine_textarea_lines from content import combine_textarea_lines
from content import detect_dogwhistles from content import detect_dogwhistles
@ -7719,6 +7720,31 @@ def _test_replace_variable():
assert result == expected assert result == expected
def _test_replace_remote_tags() -> None:
print('replace_remote_tags')
nickname = 'mynick'
domain = 'furious.duck'
content = 'This is a test'
result = replace_remote_hashtags(content, nickname, domain)
assert result == content
link = "https://something/else/mytag"
content = 'This is href="' + link + '" test'
result = replace_remote_hashtags(content, nickname, domain)
assert result == content
link = "https://something/tags/mytag"
content = 'This is href="' + link + '" test'
result = replace_remote_hashtags(content, nickname, domain)
expected = \
'This is href="/users/' + nickname + '?remotetag=' + \
link.replace('/', '--') + '" test'
if result != expected:
print(expected)
print(result)
assert result == expected
def run_all_tests(): def run_all_tests():
base_dir = os.getcwd() base_dir = os.getcwd()
print('Running tests...') print('Running tests...')
@ -7736,6 +7762,7 @@ def run_all_tests():
_test_checkbox_names() _test_checkbox_names()
_test_thread_functions() _test_thread_functions()
_test_functions() _test_functions()
_test_replace_remote_tags()
_test_replace_variable() _test_replace_variable()
_test_missing_theme_colors(base_dir) _test_missing_theme_colors(base_dir)
_test_reply_language(base_dir) _test_reply_language(base_dir)

View File

@ -67,6 +67,7 @@ from utils import get_domain_from_actor
from utils import acct_dir from utils import acct_dir
from utils import local_actor_url from utils import local_actor_url
from utils import is_unlisted_post from utils import is_unlisted_post
from content import replace_remote_hashtags
from content import detect_dogwhistles from content import detect_dogwhistles
from content import create_edits_html from content import create_edits_html
from content import bold_reading_string from content import bold_reading_string
@ -1821,7 +1822,9 @@ def _get_copyright_footer(content_license_url: str,
elif '/fdl' in content_license_url: elif '/fdl' in content_license_url:
icon_filename = 'license_fdl.png' icon_filename = 'license_fdl.png'
description = translate['Content License'] description = 'Content License'
if translate.get('Content License'):
description = translate['Content License']
copyright_str = \ copyright_str = \
' ' + \ ' ' + \
'<a class="imageAnchor" href="' + content_license_url + \ '<a class="imageAnchor" href="' + content_license_url + \
@ -2445,6 +2448,8 @@ def individual_post_as_html(signing_priv_key_pem: str,
system_language, translate) system_language, translate)
if not content_str: if not content_str:
return '' return ''
content_str = \
replace_remote_hashtags(content_str, nickname, domain)
summary_str = '' summary_str = ''
if content_str: if content_str:

View File

@ -11,6 +11,8 @@ import os
from shutil import copyfile from shutil import copyfile
import urllib.parse import urllib.parse
from datetime import datetime from datetime import datetime
from utils import remove_id_ending
from utils import has_object_dict
from utils import acct_handle_dir from utils import acct_handle_dir
from utils import get_base_content_from_post from utils import get_base_content_from_post
from utils import is_account_dir from utils import is_account_dir
@ -44,6 +46,7 @@ from webapp_utils import html_search_result_share
from webapp_post import individual_post_as_html from webapp_post import individual_post_as_html
from webapp_hashtagswarm import html_hash_tag_swarm from webapp_hashtagswarm import html_hash_tag_swarm
from maps import html_hashtag_maps from maps import html_hashtag_maps
from session import get_json
def html_search_emoji(translate: {}, base_dir: str, search_str: str, def html_search_emoji(translate: {}, base_dir: str, search_str: str,
@ -961,6 +964,7 @@ def html_hashtag_search(nickname: str, domain: str, port: int,
'" alt="' + translate['Page up'] + \ '" alt="' + translate['Page up'] + \
'"></a>\n </center>\n' '"></a>\n </center>\n'
index = start_index index = start_index
text_mode_separator = '<div class="transparent"><hr></div>'
while index <= end_index: while index <= end_index:
post_id = lines[index].strip('\n').strip('\r') post_id = lines[index].strip('\n').strip('\r')
if ' ' not in post_id: if ' ' not in post_id:
@ -1029,9 +1033,12 @@ def html_hashtag_search(nickname: str, domain: str, port: int,
bold_reading, dogwhistles, bold_reading, dogwhistles,
minimize_all_images, None) minimize_all_images, None)
if post_str: if post_str:
hashtag_search_form += separator_str + post_str hashtag_search_form += \
text_mode_separator + separator_str + post_str
index += 1 index += 1
hashtag_search_form += text_mode_separator
if end_index < no_of_lines - 1: if end_index < no_of_lines - 1:
# next page link # next page link
hashtag_search_form += \ hashtag_search_form += \
@ -1047,6 +1054,207 @@ def html_hashtag_search(nickname: str, domain: str, port: int,
return hashtag_search_form return hashtag_search_form
def html_hashtag_search_remote(nickname: str, domain: str, port: int,
recent_posts_cache: {}, max_recent_posts: int,
translate: {},
base_dir: str, hashtag_url: str,
page_number: int, posts_per_page: int,
session, cached_webfingers: {},
person_cache: {},
http_prefix: str, project_version: str,
yt_replace_domain: str,
twitter_replacement_domain: str,
show_published_date_only: bool,
peertube_instances: [],
allow_local_network_access: bool,
theme_name: str, system_language: str,
max_like_count: int,
signing_priv_key_pem: str,
cw_lists: {}, lists_enabled: str,
timezone: str, bold_reading: bool,
dogwhistles: {},
min_images_for_accounts: [],
debug: bool) -> str:
"""Show a page containing search results for a remote hashtag
"""
hashtag = hashtag_url.split('/')[-1]
profile_str = 'https://www.w3.org/ns/activitystreams'
as_header = {
'Accept': 'application/activity+json; profile="' + profile_str + '"'
}
hashtag_url_with_page = hashtag_url
if '?page=' not in hashtag_url_with_page:
hashtag_url_with_page += '?page=' + str(page_number)
hashtag_json = \
get_json(signing_priv_key_pem,
session, hashtag_url_with_page, as_header, None, debug,
__version__, http_prefix, domain)
lines = []
if hashtag_json:
if 'orderedItems' in hashtag_json:
lines = hashtag_json['orderedItems']
else:
print('No orderedItems in hashtag collection ' + str(hashtag_json))
else:
print('WARN: no hashtags returned for url ' + hashtag_url)
separator_str = html_post_separator(base_dir, None)
# check that the directory for the nickname exists
if nickname:
account_dir = acct_dir(base_dir, nickname, domain)
if not os.path.isdir(account_dir):
return None
# read the css
css_filename = base_dir + '/epicyon-profile.css'
if os.path.isfile(base_dir + '/epicyon.css'):
css_filename = base_dir + '/epicyon.css'
# ensure that the page number is in bounds
if not page_number:
page_number = 1
elif page_number < 1:
page_number = 1
instance_title = \
get_config_param(base_dir, 'instanceTitle')
hashtag_search_form = \
html_header_with_external_style(css_filename, instance_title, None)
# add the page title
hashtag_search_form += '<center>\n' + \
'<h1>#' + hashtag
# RSS link for hashtag feed
hashtag_rss = hashtag_url
if '.html' in hashtag_rss:
hashtag_rss = hashtag_rss.replace('.html', '')
hashtag_search_form += ' <a href="' + hashtag_rss + '.rss">'
hashtag_search_form += \
'<img style="width:3%;min-width:50px" ' + \
'loading="lazy" decoding="async" ' + \
'alt="RSS 2.0" title="RSS 2.0" src="/' + \
'icons/logorss.png" /></a></h1>\n'
tag_link = '/users/' + nickname + '?remotetag=' + \
hashtag_url.replace('/', '--')
if page_number > 1 and hashtag_json.get('prev'):
# previous page link
hashtag_search_form += \
' <center>\n' + \
' <a href="' + tag_link + ';page=' + \
str(page_number - 1) + \
'"><img loading="lazy" decoding="async" ' + \
'class="pageicon" src="/' + \
'icons/pageup.png" title="' + \
translate['Page up'] + \
'" alt="' + translate['Page up'] + \
'"></a>\n </center>\n'
text_mode_separator = '<div class="transparent"><hr></div>'
post_ctr = 0
for post_id in lines:
print('Hashtag post_id ' + post_id)
post_json_object = \
get_json(signing_priv_key_pem,
session, post_id, as_header, None, debug,
__version__, http_prefix, domain)
if not post_json_object:
print('No hashtag post for ' + post_id)
continue
if not isinstance(post_json_object, dict):
print('Hashtag post is not a dict ' + str(post_json_object))
continue
if not has_object_dict(post_json_object):
if post_json_object.get('id') and \
'to' in post_json_object and \
'cc' in post_json_object:
new_url = \
remove_id_ending(post_json_object['id'])
actor = new_url
if '/statuses/' in actor:
actor = actor.split('/statuses/')[0]
new_post_json_object = {
"type": "Create",
"id": new_url + '/activity',
"to": post_json_object['to'],
"cc": post_json_object['cc'],
"actor": actor,
"object": post_json_object
}
post_json_object = new_post_json_object
else:
print('Hashtag post does not contain necessary fields ' +
str(post_json_object))
continue
if not is_public_post(post_json_object):
print('Hashtag post is not public ' + post_id)
continue
show_individual_post_icons = False
allow_deletion = False
show_repeats = show_individual_post_icons
show_icons = show_individual_post_icons
manually_approves_followers = False
show_public_only = False
store_to_sache = False
allow_downloads = True
avatar_url = None
show_avatar_options = True
minimize_all_images = False
if nickname in min_images_for_accounts:
minimize_all_images = True
post_str = \
individual_post_as_html(signing_priv_key_pem,
allow_downloads, recent_posts_cache,
max_recent_posts,
translate, None,
base_dir, session, cached_webfingers,
person_cache,
nickname, domain, port,
post_json_object,
avatar_url, show_avatar_options,
allow_deletion,
http_prefix, project_version,
'search',
yt_replace_domain,
twitter_replacement_domain,
show_published_date_only,
peertube_instances,
allow_local_network_access,
theme_name, system_language,
max_like_count,
show_repeats, show_icons,
manually_approves_followers,
show_public_only,
store_to_sache, False, cw_lists,
lists_enabled, timezone, False,
bold_reading, dogwhistles,
minimize_all_images, None)
if post_str:
hashtag_search_form += \
text_mode_separator + separator_str + post_str
post_ctr += 1
if post_ctr >= posts_per_page:
break
hashtag_search_form += text_mode_separator
if post_ctr >= 5 and hashtag_json.get('next'):
# next page link
hashtag_search_form += \
' <center>\n' + \
' <a href="' + tag_link + \
';page=' + str(page_number + 1) + \
'"><img loading="lazy" decoding="async" ' + \
'class="pageicon" src="/icons' + \
'/pagedown.png" title="' + translate['Page down'] + \
'" alt="' + translate['Page down'] + '"></a>' + \
' </center>'
hashtag_search_form += html_footer()
return hashtag_search_form
def rss_hashtag_search(nickname: str, domain: str, port: int, def rss_hashtag_search(nickname: str, domain: str, port: int,
recent_posts_cache: {}, max_recent_posts: int, recent_posts_cache: {}, max_recent_posts: int,
translate: {}, translate: {},
@ -1157,3 +1365,83 @@ def rss_hashtag_search(nickname: str, domain: str, port: int,
break break
return hashtag_feed + rss2tag_footer() return hashtag_feed + rss2tag_footer()
def hashtag_search_json(nickname: str, domain: str, port: int,
base_dir: str, hashtag: str,
page_number: int, posts_per_page: int,
http_prefix: str) -> {}:
"""Show a json collection for a hashtag
"""
if hashtag.startswith('#'):
hashtag = hashtag[1:]
hashtag = urllib.parse.unquote(hashtag)
hashtag_index_file = base_dir + '/tags/' + hashtag + '.txt'
if not os.path.isfile(hashtag_index_file):
if hashtag != hashtag.lower():
hashtag = hashtag.lower()
hashtag_index_file = base_dir + '/tags/' + hashtag + '.txt'
if not os.path.isfile(hashtag_index_file):
print('WARN: hashtag file not found ' + hashtag_index_file)
return None
# check that the directory for the nickname exists
if nickname:
account_dir = acct_dir(base_dir, nickname, domain)
if not os.path.isdir(account_dir):
nickname = None
# read the index
lines = []
with open(hashtag_index_file, 'r', encoding='utf-8') as fp_hash:
lines = fp_hash.readlines()
if not lines:
return None
domain_full = get_full_domain(domain, port)
url = http_prefix + '://' + domain_full + '/tags/' + \
hashtag + '?page=' + str(page_number)
hashtag_json = {
'@context': 'https://www.w3.org/ns/activitystreams',
'id': url,
'orderedItems': [],
'totalItems': 0,
'type': 'OrderedCollection'
}
page_items = 0
for index, _ in enumerate(lines):
post_id = lines[index].strip('\n').strip('\r')
if ' ' not in post_id:
nickname = get_nickname_from_actor(post_id)
if not nickname:
continue
else:
post_fields = post_id.split(' ')
if len(post_fields) != 3:
continue
nickname = post_fields[1]
post_id = post_fields[2]
post_filename = locate_post(base_dir, nickname, domain, post_id)
if not post_filename:
continue
post_json_object = load_json(post_filename)
if not post_json_object:
continue
if not has_object_dict(post_json_object):
continue
if not is_public_post(post_json_object):
continue
if not post_json_object['object'].get('id'):
continue
# add to feed
page_items += 1
if page_items < posts_per_page * (page_number - 1):
continue
id_str = remove_id_ending(post_json_object['object']['id'])
hashtag_json['orderedItems'].append(id_str)
hashtag_json['totalItems'] += 1
if hashtag_json['totalItems'] >= posts_per_page:
break
return hashtag_json