Bob Mottram 2023-01-06 12:09:21 +00:00
commit 2446108faf
6 changed files with 509 additions and 4 deletions

View File

@ -765,6 +765,48 @@ def _add_hash_tags(word_str: str, http_prefix: str, domain: str,
return True
def replace_remote_hashtags(content: str,
nickname: str, domain: str) -> str:
"""Replaces remote hashtags with a local version
"""
if not domain:
return content
if ' href="' not in content:
return content
sections = content.split(' href="')
ctr = 0
replacements = {}
for section in sections:
if ctr == 0:
ctr += 1
continue
if '"' not in section:
ctr += 1
continue
link = section.split('"')[0]
if '://' not in link:
continue
if '?remotetag=' in link:
ctr += 1
continue
if '/tags/' not in link:
ctr += 1
continue
if '/' + domain not in link:
new_link = '/users/' + nickname + \
'?remotetag=' + link.replace('/', '--')
replacements[link] = new_link
ctr += 1
if not replacements:
return content
for old_link, new_link in replacements.items():
content = content.replace('"' + old_link + '"',
'"' + new_link + '"')
return content
def _add_emoji(base_dir: str, word_str: str,
http_prefix: str, domain: str,
replace_emoji: {}, post_tags: {},

141
daemon.py
View File

@ -14,6 +14,7 @@ import json
import time
import urllib.parse
import datetime
import os
from socket import error as SocketError
import errno
from functools import partial
@ -238,7 +239,9 @@ from webapp_column_right import html_edit_news_post
from webapp_search import html_skills_search
from webapp_search import html_history_search
from webapp_search import html_hashtag_search
from webapp_search import html_hashtag_search_remote
from webapp_search import rss_hashtag_search
from webapp_search import hashtag_search_json
from webapp_search import html_search_emoji
from webapp_search import html_search_shared_items
from webapp_search import html_search_emoji_text_entry
@ -427,8 +430,6 @@ from maps import map_format_from_tagmaps_path
from relationships import get_moved_feed
from relationships import get_inactive_feed
from relationships import update_moved_actors
import os
# maximum number of posts to list in outbox feed
MAX_POSTS_IN_FEED = 12
@ -9175,6 +9176,62 @@ class PubServer(BaseHTTPRequestHandler):
'_GET', '_hashtag_search_rss2',
self.server.debug)
def _hashtag_search_json(self, calling_domain: str,
referer_domain: str,
path: str, cookie: str,
base_dir: str, http_prefix: str,
domain: str, domain_full: str, port: int,
onion_domain: str, i2p_domain: str,
getreq_start_time) -> None:
"""Return a json collection for a hashtag
"""
page_number = 1
if '?page=' in path:
page_number_str = path.split('?page=')[1]
if page_number_str.isdigit():
page_number = int(page_number_str)
path = path.split('?page=')[0]
hashtag = path.split('/tags/')[1]
if is_blocked_hashtag(base_dir, hashtag):
self._400()
return
nickname = None
if '/users/' in path:
actor = \
http_prefix + '://' + domain_full + path
nickname = \
get_nickname_from_actor(actor)
hashtag_json = \
hashtag_search_json(nickname,
domain, port,
base_dir, hashtag,
page_number, MAX_POSTS_IN_FEED,
http_prefix)
if hashtag_json:
msg_str = json.dumps(hashtag_json)
msg_str = self._convert_domains(calling_domain, referer_domain,
msg_str)
msg = msg_str.encode('utf-8')
msglen = len(msg)
self._set_headers('application/json', msglen,
None, calling_domain, True)
self._write(msg)
else:
origin_path_str = path.split('/tags/')[0]
origin_path_str_absolute = \
http_prefix + '://' + domain_full + origin_path_str
if calling_domain.endswith('.onion') and onion_domain:
origin_path_str_absolute = \
'http://' + onion_domain + origin_path_str
elif (calling_domain.endswith('.i2p') and onion_domain):
origin_path_str_absolute = \
'http://' + i2p_domain + origin_path_str
self._redirect_headers(origin_path_str_absolute,
cookie, calling_domain)
fitness_performance(getreq_start_time, self.server.fitness,
'_GET', '_hashtag_search_json',
self.server.debug)
def _announce_button(self, calling_domain: str, path: str,
base_dir: str,
cookie: str, proxy_type: str,
@ -18411,6 +18468,73 @@ class PubServer(BaseHTTPRequestHandler):
self.server.getreq_busy = False
return
if '?remotetag=' in self.path and \
'/users/' in self.path and authorized:
actor = self.path.split('?remotetag=')[0]
nickname = get_nickname_from_actor(actor)
hashtag_url = self.path.split('?remotetag=')[1]
if ';' in hashtag_url:
hashtag_url = hashtag_url.split(';')[0]
hashtag_url = hashtag_url.replace('--', '/')
page_number = 1
if ';page=' in self.path:
page_number_str = self.path.split(';page=')[1]
if ';' in page_number_str:
page_number_str = page_number_str.split(';')[0]
if page_number_str.isdigit():
page_number = int(page_number_str)
allow_local_network_access = self.server.allow_local_network_access
show_published_date_only = self.server.show_published_date_only
twitter_replacement_domain = self.server.twitter_replacement_domain
timezone = None
if self.server.account_timezone.get(nickname):
timezone = \
self.server.account_timezone.get(nickname)
msg = \
html_hashtag_search_remote(nickname,
self.server.domain,
self.server.port,
self.server.recent_posts_cache,
self.server.max_recent_posts,
self.server.translate,
self.server.base_dir,
hashtag_url,
page_number, MAX_POSTS_IN_FEED,
self.server.session,
self.server.cached_webfingers,
self.server.person_cache,
self.server.http_prefix,
self.server.project_version,
self.server.yt_replace_domain,
twitter_replacement_domain,
show_published_date_only,
self.server.peertube_instances,
allow_local_network_access,
self.server.theme_name,
self.server.system_language,
self.server.max_like_count,
self.server.signing_priv_key_pem,
self.server.cw_lists,
self.server.lists_enabled,
timezone,
self.server.bold_reading,
self.server.dogwhistles,
self.server.min_images_for_accounts,
self.server.debug)
if msg:
msg = msg.encode('utf-8')
msglen = len(msg)
self._set_headers('text/html', msglen, cookie, calling_domain,
False)
self._write(msg)
self.server.getreq_busy = False
return
self._404()
self.server.getreq_busy = False
return
# hashtag search
if self.path.startswith('/tags/') or \
(authorized and '/tags/' in self.path):
@ -18428,6 +18552,19 @@ class PubServer(BaseHTTPRequestHandler):
curr_session)
self.server.getreq_busy = False
return
if not html_getreq:
self._hashtag_search_json(calling_domain, referer_domain,
self.path, cookie,
self.server.base_dir,
self.server.http_prefix,
self.server.domain,
self.server.domain_full,
self.server.port,
self.server.onion_domain,
self.server.i2p_domain,
getreq_start_time)
self.server.getreq_busy = False
return
self._hashtag_search(calling_domain,
self.path, cookie,
self.server.base_dir,

View File

@ -436,6 +436,12 @@ The location field on a post can be a description, but it can also be a map geol
Selecting the *location* header will open the last known geolocation, so if your current location is near this makes it quicker to find.
## Scientific references
It is possible to have references to scientific papers linked automatically, such that they are readable with one click/press. Supported references are [arXiv](https://arxiv.org) and [Digital object identifier (DOI)](https://en.wikipedia.org/wiki/Digital_object_identifier). For example:
```text
This is a reference to a paper: arxiv:2203.15752
```
# The Timeline
## Layout
![Layout](manual-layout.png)

View File

@ -135,6 +135,7 @@ from inbox import valid_inbox
from inbox import valid_inbox_filenames
from inbox import cache_svg_images
from categories import guess_hashtag_category
from content import replace_remote_hashtags
from content import add_name_emojis_to_tags
from content import combine_textarea_lines
from content import detect_dogwhistles
@ -7719,6 +7720,31 @@ def _test_replace_variable():
assert result == expected
def _test_replace_remote_tags() -> None:
print('replace_remote_tags')
nickname = 'mynick'
domain = 'furious.duck'
content = 'This is a test'
result = replace_remote_hashtags(content, nickname, domain)
assert result == content
link = "https://something/else/mytag"
content = 'This is href="' + link + '" test'
result = replace_remote_hashtags(content, nickname, domain)
assert result == content
link = "https://something/tags/mytag"
content = 'This is href="' + link + '" test'
result = replace_remote_hashtags(content, nickname, domain)
expected = \
'This is href="/users/' + nickname + '?remotetag=' + \
link.replace('/', '--') + '" test'
if result != expected:
print(expected)
print(result)
assert result == expected
def run_all_tests():
base_dir = os.getcwd()
print('Running tests...')
@ -7736,6 +7762,7 @@ def run_all_tests():
_test_checkbox_names()
_test_thread_functions()
_test_functions()
_test_replace_remote_tags()
_test_replace_variable()
_test_missing_theme_colors(base_dir)
_test_reply_language(base_dir)

View File

@ -67,6 +67,7 @@ from utils import get_domain_from_actor
from utils import acct_dir
from utils import local_actor_url
from utils import is_unlisted_post
from content import replace_remote_hashtags
from content import detect_dogwhistles
from content import create_edits_html
from content import bold_reading_string
@ -1821,6 +1822,8 @@ def _get_copyright_footer(content_license_url: str,
elif '/fdl' in content_license_url:
icon_filename = 'license_fdl.png'
description = 'Content License'
if translate.get('Content License'):
description = translate['Content License']
copyright_str = \
' ' + \
@ -2445,6 +2448,8 @@ def individual_post_as_html(signing_priv_key_pem: str,
system_language, translate)
if not content_str:
return ''
content_str = \
replace_remote_hashtags(content_str, nickname, domain)
summary_str = ''
if content_str:

View File

@ -11,6 +11,8 @@ import os
from shutil import copyfile
import urllib.parse
from datetime import datetime
from utils import remove_id_ending
from utils import has_object_dict
from utils import acct_handle_dir
from utils import get_base_content_from_post
from utils import is_account_dir
@ -44,6 +46,7 @@ from webapp_utils import html_search_result_share
from webapp_post import individual_post_as_html
from webapp_hashtagswarm import html_hash_tag_swarm
from maps import html_hashtag_maps
from session import get_json
def html_search_emoji(translate: {}, base_dir: str, search_str: str,
@ -961,6 +964,7 @@ def html_hashtag_search(nickname: str, domain: str, port: int,
'" alt="' + translate['Page up'] + \
'"></a>\n </center>\n'
index = start_index
text_mode_separator = '<div class="transparent"><hr></div>'
while index <= end_index:
post_id = lines[index].strip('\n').strip('\r')
if ' ' not in post_id:
@ -1029,9 +1033,12 @@ def html_hashtag_search(nickname: str, domain: str, port: int,
bold_reading, dogwhistles,
minimize_all_images, None)
if post_str:
hashtag_search_form += separator_str + post_str
hashtag_search_form += \
text_mode_separator + separator_str + post_str
index += 1
hashtag_search_form += text_mode_separator
if end_index < no_of_lines - 1:
# next page link
hashtag_search_form += \
@ -1047,6 +1054,207 @@ def html_hashtag_search(nickname: str, domain: str, port: int,
return hashtag_search_form
def html_hashtag_search_remote(nickname: str, domain: str, port: int,
recent_posts_cache: {}, max_recent_posts: int,
translate: {},
base_dir: str, hashtag_url: str,
page_number: int, posts_per_page: int,
session, cached_webfingers: {},
person_cache: {},
http_prefix: str, project_version: str,
yt_replace_domain: str,
twitter_replacement_domain: str,
show_published_date_only: bool,
peertube_instances: [],
allow_local_network_access: bool,
theme_name: str, system_language: str,
max_like_count: int,
signing_priv_key_pem: str,
cw_lists: {}, lists_enabled: str,
timezone: str, bold_reading: bool,
dogwhistles: {},
min_images_for_accounts: [],
debug: bool) -> str:
"""Show a page containing search results for a remote hashtag
"""
hashtag = hashtag_url.split('/')[-1]
profile_str = 'https://www.w3.org/ns/activitystreams'
as_header = {
'Accept': 'application/activity+json; profile="' + profile_str + '"'
}
hashtag_url_with_page = hashtag_url
if '?page=' not in hashtag_url_with_page:
hashtag_url_with_page += '?page=' + str(page_number)
hashtag_json = \
get_json(signing_priv_key_pem,
session, hashtag_url_with_page, as_header, None, debug,
__version__, http_prefix, domain)
lines = []
if hashtag_json:
if 'orderedItems' in hashtag_json:
lines = hashtag_json['orderedItems']
else:
print('No orderedItems in hashtag collection ' + str(hashtag_json))
else:
print('WARN: no hashtags returned for url ' + hashtag_url)
separator_str = html_post_separator(base_dir, None)
# check that the directory for the nickname exists
if nickname:
account_dir = acct_dir(base_dir, nickname, domain)
if not os.path.isdir(account_dir):
return None
# read the css
css_filename = base_dir + '/epicyon-profile.css'
if os.path.isfile(base_dir + '/epicyon.css'):
css_filename = base_dir + '/epicyon.css'
# ensure that the page number is in bounds
if not page_number:
page_number = 1
elif page_number < 1:
page_number = 1
instance_title = \
get_config_param(base_dir, 'instanceTitle')
hashtag_search_form = \
html_header_with_external_style(css_filename, instance_title, None)
# add the page title
hashtag_search_form += '<center>\n' + \
'<h1>#' + hashtag
# RSS link for hashtag feed
hashtag_rss = hashtag_url
if '.html' in hashtag_rss:
hashtag_rss = hashtag_rss.replace('.html', '')
hashtag_search_form += ' <a href="' + hashtag_rss + '.rss">'
hashtag_search_form += \
'<img style="width:3%;min-width:50px" ' + \
'loading="lazy" decoding="async" ' + \
'alt="RSS 2.0" title="RSS 2.0" src="/' + \
'icons/logorss.png" /></a></h1>\n'
tag_link = '/users/' + nickname + '?remotetag=' + \
hashtag_url.replace('/', '--')
if page_number > 1 and hashtag_json.get('prev'):
# previous page link
hashtag_search_form += \
' <center>\n' + \
' <a href="' + tag_link + ';page=' + \
str(page_number - 1) + \
'"><img loading="lazy" decoding="async" ' + \
'class="pageicon" src="/' + \
'icons/pageup.png" title="' + \
translate['Page up'] + \
'" alt="' + translate['Page up'] + \
'"></a>\n </center>\n'
text_mode_separator = '<div class="transparent"><hr></div>'
post_ctr = 0
for post_id in lines:
print('Hashtag post_id ' + post_id)
post_json_object = \
get_json(signing_priv_key_pem,
session, post_id, as_header, None, debug,
__version__, http_prefix, domain)
if not post_json_object:
print('No hashtag post for ' + post_id)
continue
if not isinstance(post_json_object, dict):
print('Hashtag post is not a dict ' + str(post_json_object))
continue
if not has_object_dict(post_json_object):
if post_json_object.get('id') and \
'to' in post_json_object and \
'cc' in post_json_object:
new_url = \
remove_id_ending(post_json_object['id'])
actor = new_url
if '/statuses/' in actor:
actor = actor.split('/statuses/')[0]
new_post_json_object = {
"type": "Create",
"id": new_url + '/activity',
"to": post_json_object['to'],
"cc": post_json_object['cc'],
"actor": actor,
"object": post_json_object
}
post_json_object = new_post_json_object
else:
print('Hashtag post does not contain necessary fields ' +
str(post_json_object))
continue
if not is_public_post(post_json_object):
print('Hashtag post is not public ' + post_id)
continue
show_individual_post_icons = False
allow_deletion = False
show_repeats = show_individual_post_icons
show_icons = show_individual_post_icons
manually_approves_followers = False
show_public_only = False
store_to_sache = False
allow_downloads = True
avatar_url = None
show_avatar_options = True
minimize_all_images = False
if nickname in min_images_for_accounts:
minimize_all_images = True
post_str = \
individual_post_as_html(signing_priv_key_pem,
allow_downloads, recent_posts_cache,
max_recent_posts,
translate, None,
base_dir, session, cached_webfingers,
person_cache,
nickname, domain, port,
post_json_object,
avatar_url, show_avatar_options,
allow_deletion,
http_prefix, project_version,
'search',
yt_replace_domain,
twitter_replacement_domain,
show_published_date_only,
peertube_instances,
allow_local_network_access,
theme_name, system_language,
max_like_count,
show_repeats, show_icons,
manually_approves_followers,
show_public_only,
store_to_sache, False, cw_lists,
lists_enabled, timezone, False,
bold_reading, dogwhistles,
minimize_all_images, None)
if post_str:
hashtag_search_form += \
text_mode_separator + separator_str + post_str
post_ctr += 1
if post_ctr >= posts_per_page:
break
hashtag_search_form += text_mode_separator
if post_ctr >= 5 and hashtag_json.get('next'):
# next page link
hashtag_search_form += \
' <center>\n' + \
' <a href="' + tag_link + \
';page=' + str(page_number + 1) + \
'"><img loading="lazy" decoding="async" ' + \
'class="pageicon" src="/icons' + \
'/pagedown.png" title="' + translate['Page down'] + \
'" alt="' + translate['Page down'] + '"></a>' + \
' </center>'
hashtag_search_form += html_footer()
return hashtag_search_form
def rss_hashtag_search(nickname: str, domain: str, port: int,
recent_posts_cache: {}, max_recent_posts: int,
translate: {},
@ -1157,3 +1365,83 @@ def rss_hashtag_search(nickname: str, domain: str, port: int,
break
return hashtag_feed + rss2tag_footer()
def hashtag_search_json(nickname: str, domain: str, port: int,
base_dir: str, hashtag: str,
page_number: int, posts_per_page: int,
http_prefix: str) -> {}:
"""Show a json collection for a hashtag
"""
if hashtag.startswith('#'):
hashtag = hashtag[1:]
hashtag = urllib.parse.unquote(hashtag)
hashtag_index_file = base_dir + '/tags/' + hashtag + '.txt'
if not os.path.isfile(hashtag_index_file):
if hashtag != hashtag.lower():
hashtag = hashtag.lower()
hashtag_index_file = base_dir + '/tags/' + hashtag + '.txt'
if not os.path.isfile(hashtag_index_file):
print('WARN: hashtag file not found ' + hashtag_index_file)
return None
# check that the directory for the nickname exists
if nickname:
account_dir = acct_dir(base_dir, nickname, domain)
if not os.path.isdir(account_dir):
nickname = None
# read the index
lines = []
with open(hashtag_index_file, 'r', encoding='utf-8') as fp_hash:
lines = fp_hash.readlines()
if not lines:
return None
domain_full = get_full_domain(domain, port)
url = http_prefix + '://' + domain_full + '/tags/' + \
hashtag + '?page=' + str(page_number)
hashtag_json = {
'@context': 'https://www.w3.org/ns/activitystreams',
'id': url,
'orderedItems': [],
'totalItems': 0,
'type': 'OrderedCollection'
}
page_items = 0
for index, _ in enumerate(lines):
post_id = lines[index].strip('\n').strip('\r')
if ' ' not in post_id:
nickname = get_nickname_from_actor(post_id)
if not nickname:
continue
else:
post_fields = post_id.split(' ')
if len(post_fields) != 3:
continue
nickname = post_fields[1]
post_id = post_fields[2]
post_filename = locate_post(base_dir, nickname, domain, post_id)
if not post_filename:
continue
post_json_object = load_json(post_filename)
if not post_json_object:
continue
if not has_object_dict(post_json_object):
continue
if not is_public_post(post_json_object):
continue
if not post_json_object['object'].get('id'):
continue
# add to feed
page_items += 1
if page_items < posts_per_page * (page_number - 1):
continue
id_str = remove_id_ending(post_json_object['object']['id'])
hashtag_json['orderedItems'].append(id_str)
hashtag_json['totalItems'] += 1
if hashtag_json['totalItems'] >= posts_per_page:
break
return hashtag_json