epicyon/posts.py

5860 lines
231 KiB
Python
Raw Normal View History

2020-04-04 10:05:27 +00:00
__filename__ = "posts.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
2022-02-03 13:58:20 +00:00
__version__ = "1.3.0"
2020-04-04 10:05:27 +00:00
__maintainer__ = "Bob Mottram"
2021-09-10 16:14:50 +00:00
__email__ = "bob@libreserver.org"
2020-04-04 10:05:27 +00:00
__status__ = "Production"
2021-06-15 15:08:12 +00:00
__module_group__ = "ActivityPub"
2020-04-04 10:05:27 +00:00
2019-06-28 18:55:29 +00:00
import json
import html
2019-06-29 10:08:59 +00:00
import datetime
2019-06-30 15:03:26 +00:00
import os
import shutil
import sys
2019-07-01 11:48:54 +00:00
import time
2020-09-25 12:33:28 +00:00
import random
2019-10-10 13:48:05 +00:00
from time import gmtime, strftime
from collections import OrderedDict
2021-12-28 21:36:27 +00:00
from threads import thread_with_trace
2022-07-28 09:59:18 +00:00
from threads import begin_thread
2021-12-29 21:55:09 +00:00
from cache import store_person_in_cache
from cache import get_person_from_cache
from cache import expire_person_cache
2019-06-29 10:08:59 +00:00
from pprint import pprint
2021-12-28 16:56:57 +00:00
from session import create_session
2021-12-29 21:55:09 +00:00
from session import get_json
from session import post_json
from session import post_json_string
from session import post_image
from webfinger import webfinger_handle
from httpsig import create_signed_header
from siteactive import site_is_active
from languages import understood_post_language
2022-08-10 19:54:01 +00:00
from utils import is_dm
2022-06-21 11:58:50 +00:00
from utils import remove_eol
2022-06-10 11:43:33 +00:00
from utils import text_in_file
from utils import get_media_descriptions_from_post
from utils import valid_hash_tag
from utils import get_audio_extensions
from utils import get_summary_from_post
2021-12-26 12:24:40 +00:00
from utils import get_user_paths
2021-12-26 19:15:36 +00:00
from utils import invalid_ciphertext
2022-04-09 15:11:22 +00:00
from utils import has_object_string_type
2021-12-27 11:20:57 +00:00
from utils import remove_id_ending
2021-12-26 17:21:37 +00:00
from utils import replace_users_with_at
2021-12-26 17:53:07 +00:00
from utils import has_group_type
2021-12-26 11:29:40 +00:00
from utils import get_base_content_from_post
2021-12-26 18:17:37 +00:00
from utils import remove_domain_port
2021-12-26 18:14:21 +00:00
from utils import get_port_from_domain
2021-12-26 10:57:03 +00:00
from utils import has_object_dict
2021-12-26 20:20:36 +00:00
from utils import reject_post_id
2021-12-27 19:33:45 +00:00
from utils import remove_invalid_chars
2021-12-28 14:01:37 +00:00
from utils import file_last_modified
2021-12-28 14:41:10 +00:00
from utils import is_public_post
2021-12-26 12:19:00 +00:00
from utils import has_users_path
2021-12-26 12:31:47 +00:00
from utils import valid_post_date
2021-12-26 12:45:03 +00:00
from utils import get_full_domain
2021-12-27 13:58:17 +00:00
from utils import get_followers_list
2021-12-27 17:49:35 +00:00
from utils import is_evil
2021-12-27 17:42:35 +00:00
from utils import get_status_number
2021-12-27 19:26:54 +00:00
from utils import create_person_dir
2021-12-27 20:47:05 +00:00
from utils import url_permitted
2021-12-27 22:19:18 +00:00
from utils import get_nickname_from_actor
2021-12-27 19:05:25 +00:00
from utils import get_domain_from_actor
2021-12-28 14:55:45 +00:00
from utils import delete_post
2021-12-28 14:41:10 +00:00
from utils import valid_nickname
2021-12-26 20:36:08 +00:00
from utils import locate_post
2021-12-26 15:13:34 +00:00
from utils import load_json
2021-12-26 14:47:21 +00:00
from utils import save_json
2021-12-26 14:08:58 +00:00
from utils import get_config_param
2021-12-27 22:38:48 +00:00
from utils import locate_news_votes
2021-12-27 22:46:10 +00:00
from utils import locate_news_arrival
2021-12-27 22:32:59 +00:00
from utils import votes_on_newswire_item
2021-12-27 15:43:22 +00:00
from utils import remove_html
2021-12-27 21:42:08 +00:00
from utils import dangerous_markup
2021-12-26 12:02:29 +00:00
from utils import acct_dir
2021-12-26 10:19:59 +00:00
from utils import local_actor_url
from media import get_music_metadata
2021-12-28 21:36:27 +00:00
from media import attach_media
from media import replace_you_tube
from media import replace_twitter
from content import reject_twitter_summary
2021-12-29 21:55:09 +00:00
from content import words_similarity
from content import limit_repeated_words
from content import post_tag_exists
from content import remove_long_words
from content import add_html_tags
from content import replace_emoji_from_tags
from content import remove_text_formatting
2021-12-28 21:36:27 +00:00
from auth import create_basic_auth_header
2022-05-20 15:55:02 +00:00
from blocking import is_blocked_hashtag
2021-12-29 21:55:09 +00:00
from blocking import is_blocked
2021-12-28 21:55:38 +00:00
from blocking import is_blocked_domain
2021-12-29 21:55:09 +00:00
from filters import is_filtered
from git import convert_post_to_patch
from linked_data_sig import generate_json_signature
from petnames import resolve_petnames
from video import convert_video_to_note
from context import get_individual_post_context
from maps import geocoords_from_map_link
2020-04-04 10:05:27 +00:00
2019-06-28 18:55:29 +00:00
2021-12-28 19:33:29 +00:00
def is_moderator(base_dir: str, nickname: str) -> bool:
2019-08-12 13:22:17 +00:00
"""Returns true if the given nickname is a moderator
"""
2021-12-31 15:16:53 +00:00
moderators_file = base_dir + '/accounts/moderators.txt'
2019-08-12 13:22:17 +00:00
2021-12-31 15:16:53 +00:00
if not os.path.isfile(moderators_file):
2021-12-26 14:11:30 +00:00
admin_name = get_config_param(base_dir, 'admin')
if not admin_name:
2020-10-10 16:10:32 +00:00
return False
2021-12-26 14:11:30 +00:00
if admin_name == nickname:
2019-08-12 13:22:17 +00:00
return True
return False
2022-06-09 14:46:30 +00:00
with open(moderators_file, 'r', encoding='utf-8') as fp_mod:
2021-12-31 15:16:53 +00:00
lines = fp_mod.readlines()
2020-04-04 10:05:27 +00:00
if len(lines) == 0:
2021-12-26 14:11:30 +00:00
admin_name = get_config_param(base_dir, 'admin')
if not admin_name:
2020-10-10 16:10:32 +00:00
return False
2021-12-26 14:11:30 +00:00
if admin_name == nickname:
2019-08-12 13:22:17 +00:00
return True
for moderator in lines:
2020-05-22 11:32:38 +00:00
moderator = moderator.strip('\n').strip('\r')
2020-04-04 10:05:27 +00:00
if moderator == nickname:
2019-08-12 13:22:17 +00:00
return True
return False
2020-04-04 10:05:27 +00:00
2021-12-29 21:55:09 +00:00
def no_of_followers_on_domain(base_dir: str, handle: str,
2021-12-31 15:16:53 +00:00
domain: str, follow_file='followers.txt') -> int:
2022-09-21 20:00:57 +00:00
"""Returns the number of followers of the given handle from the
given domain
2019-07-05 14:39:24 +00:00
"""
2021-12-31 15:16:53 +00:00
filename = base_dir + '/accounts/' + handle + '/' + follow_file
2019-07-05 14:39:24 +00:00
if not os.path.isfile(filename):
return 0
2020-04-04 10:05:27 +00:00
ctr = 0
2022-06-09 14:46:30 +00:00
with open(filename, 'r', encoding='utf-8') as followers_file:
2021-12-31 15:16:53 +00:00
for follower_handle in followers_file:
if '@' in follower_handle:
follower_domain = follower_handle.split('@')[1]
2022-06-21 11:58:50 +00:00
follower_domain = remove_eol(follower_domain)
2021-12-31 15:16:53 +00:00
if domain == follower_domain:
2020-04-04 10:05:27 +00:00
ctr += 1
2019-07-05 14:39:24 +00:00
return ctr
2020-04-04 10:05:27 +00:00
2021-12-29 21:55:09 +00:00
def _get_local_private_key(base_dir: str, nickname: str, domain: str) -> str:
"""Returns the private key for a local account
"""
2021-09-02 10:29:35 +00:00
if not domain or not nickname:
return None
handle = nickname + '@' + domain
2021-12-31 15:16:53 +00:00
key_filename = base_dir + '/keys/private/' + handle.lower() + '.key'
if not os.path.isfile(key_filename):
return None
2022-06-09 14:46:30 +00:00
with open(key_filename, 'r', encoding='utf-8') as pem_file:
2021-12-31 15:16:53 +00:00
return pem_file.read()
return None
2021-12-28 18:13:52 +00:00
def get_instance_actor_key(base_dir: str, domain: str) -> str:
"""Returns the private key for the instance actor used for
signing GET posts
"""
2021-12-29 21:55:09 +00:00
return _get_local_private_key(base_dir, 'inbox', domain)
2021-12-29 21:55:09 +00:00
def _get_local_public_key(base_dir: str, nickname: str, domain: str) -> str:
"""Returns the public key for a local account
"""
2021-09-02 10:29:35 +00:00
if not domain or not nickname:
return None
handle = nickname + '@' + domain
2021-12-31 15:16:53 +00:00
key_filename = base_dir + '/keys/public/' + handle.lower() + '.key'
if not os.path.isfile(key_filename):
return None
2022-06-09 14:46:30 +00:00
with open(key_filename, 'r', encoding='utf-8') as pem_file:
2021-12-31 15:16:53 +00:00
return pem_file.read()
return None
2021-12-29 21:55:09 +00:00
def _get_person_key(nickname: str, domain: str, base_dir: str,
2021-12-31 15:16:53 +00:00
key_type: str = 'public', debug: bool = False):
2019-06-30 15:03:26 +00:00
"""Returns the public or private key of a person
"""
2021-12-31 15:16:53 +00:00
if key_type == 'private':
2021-12-31 17:38:22 +00:00
key_pem = _get_local_private_key(base_dir, nickname, domain)
else:
2021-12-31 17:38:22 +00:00
key_pem = _get_local_public_key(base_dir, nickname, domain)
if not key_pem:
2019-07-06 13:49:25 +00:00
if debug:
2021-12-31 15:16:53 +00:00
print('DEBUG: ' + key_type + ' key file not found')
2019-06-30 15:03:26 +00:00
return ''
2021-12-31 17:38:22 +00:00
if len(key_pem) < 20:
2019-07-06 13:49:25 +00:00
if debug:
2021-12-31 17:38:22 +00:00
print('DEBUG: private key was too short: ' + key_pem)
2019-06-30 15:03:26 +00:00
return ''
2021-12-31 17:38:22 +00:00
return key_pem
2020-03-22 21:16:02 +00:00
2020-04-04 10:05:27 +00:00
2021-12-31 15:16:53 +00:00
def _clean_html(raw_html: str) -> str:
# text=BeautifulSoup(raw_html, 'html.parser').get_text()
text = raw_html
2019-06-28 18:55:29 +00:00
return html.unescape(text)
2020-04-04 10:05:27 +00:00
2021-12-31 15:16:53 +00:00
def get_user_url(wf_request: {}, source_id: int, debug: bool) -> str:
2020-12-18 17:08:35 +00:00
"""Gets the actor url from a webfinger request
"""
2021-12-31 15:16:53 +00:00
if not wf_request.get('links'):
if source_id == 72367:
print('get_user_url ' + str(source_id) +
2021-01-10 10:13:10 +00:00
' failed to get display name for webfinger ' +
2021-12-31 15:16:53 +00:00
str(wf_request))
2020-12-30 10:29:14 +00:00
else:
2021-12-29 21:55:09 +00:00
print('get_user_url webfinger activity+json contains no links ' +
2021-12-31 15:16:53 +00:00
str(source_id) + ' ' + str(wf_request))
2020-12-18 17:02:26 +00:00
return None
2021-12-31 15:16:53 +00:00
for link in wf_request['links']:
2020-12-18 17:02:26 +00:00
if not (link.get('type') and link.get('href')):
continue
if link['type'] != 'application/activity+json':
continue
2021-01-24 22:20:23 +00:00
if '/@' not in link['href']:
2021-12-26 12:19:00 +00:00
if debug and not has_users_path(link['href']):
2021-12-29 21:55:09 +00:00
print('get_user_url webfinger activity+json ' +
2021-01-24 22:20:23 +00:00
'contains single user instance actor ' +
2021-12-31 15:16:53 +00:00
str(source_id) + ' ' + str(link))
2021-01-24 22:25:08 +00:00
else:
return link['href'].replace('/@', '/users/')
2020-12-18 17:02:26 +00:00
return link['href']
2019-06-28 18:55:29 +00:00
return None
2020-04-04 10:05:27 +00:00
2021-12-29 21:55:09 +00:00
def parse_user_feed(signing_priv_key_pem: str,
2021-12-31 15:16:53 +00:00
session, feed_url: str, as_header: {},
2021-12-29 21:55:09 +00:00
project_version: str, http_prefix: str,
2021-12-31 15:16:53 +00:00
origin_domain: str, debug: bool, depth: int = 0) -> []:
2021-01-08 21:43:04 +00:00
if depth > 10:
2021-08-01 13:44:27 +00:00
if debug:
print('Maximum search depth reached')
2020-07-08 12:28:41 +00:00
return None
2021-08-01 13:44:27 +00:00
if debug:
2021-12-31 15:16:53 +00:00
print('Getting user feed for ' + feed_url)
print('User feed header ' + str(as_header))
2021-12-25 17:09:22 +00:00
print('http_prefix ' + str(http_prefix))
2021-12-31 15:16:53 +00:00
print('origin_domain ' + str(origin_domain))
feed_json = \
get_json(signing_priv_key_pem, session, feed_url, as_header, None,
debug, project_version, http_prefix, origin_domain)
if not feed_json:
profile_str = 'https://www.w3.org/ns/activitystreams'
accept_str = 'application/ld+json; profile="' + profile_str + '"'
if as_header['Accept'] != accept_str:
as_header = {
'Accept': accept_str
2021-09-11 13:10:50 +00:00
}
2021-12-31 15:16:53 +00:00
feed_json = get_json(signing_priv_key_pem, session, feed_url,
as_header, None, debug, project_version,
http_prefix, origin_domain)
if not feed_json:
2021-08-01 13:44:27 +00:00
if debug:
print('No user feed was returned')
2020-07-08 12:28:41 +00:00
return None
2019-06-28 18:55:29 +00:00
2021-08-01 14:11:20 +00:00
if debug:
print('User feed:')
2021-12-31 15:16:53 +00:00
pprint(feed_json)
2021-08-01 14:11:20 +00:00
2021-12-31 15:16:53 +00:00
if 'orderedItems' in feed_json:
return feed_json['orderedItems']
2021-12-31 17:38:22 +00:00
if 'items' in feed_json:
2021-12-31 15:16:53 +00:00
return feed_json['items']
2019-06-28 18:55:29 +00:00
2021-12-31 15:16:53 +00:00
next_url = None
if 'first' in feed_json:
next_url = feed_json['first']
elif 'next' in feed_json:
next_url = feed_json['next']
2019-06-28 18:55:29 +00:00
2021-08-01 13:44:27 +00:00
if debug:
2021-12-31 15:16:53 +00:00
print('User feed next url: ' + str(next_url))
2021-08-01 16:23:32 +00:00
2021-12-31 15:16:53 +00:00
if next_url:
if isinstance(next_url, str):
if '?max_id=0' not in next_url:
2021-12-31 17:38:22 +00:00
user_feed = \
2021-12-29 21:55:09 +00:00
parse_user_feed(signing_priv_key_pem,
2021-12-31 15:16:53 +00:00
session, next_url, as_header,
2021-12-29 21:55:09 +00:00
project_version, http_prefix,
2021-12-31 15:16:53 +00:00
origin_domain, debug, depth + 1)
2021-12-31 17:38:22 +00:00
if user_feed:
return user_feed
2021-12-31 15:16:53 +00:00
elif isinstance(next_url, dict):
2021-12-31 17:38:22 +00:00
user_feed = next_url
if user_feed.get('orderedItems'):
return user_feed['orderedItems']
2022-01-03 15:09:45 +00:00
if user_feed.get('items'):
2021-12-31 17:38:22 +00:00
return user_feed['items']
2021-08-01 14:47:31 +00:00
return None
2020-03-22 21:16:02 +00:00
2020-04-04 10:05:27 +00:00
2021-12-29 21:55:09 +00:00
def _get_person_box_actor(session, base_dir: str, actor: str,
2021-12-31 15:16:53 +00:00
profile_str: str, as_header: {},
2021-12-29 21:55:09 +00:00
debug: bool, project_version: str,
2021-12-31 15:16:53 +00:00
http_prefix: str, origin_domain: str,
2021-12-29 21:55:09 +00:00
person_cache: {},
signing_priv_key_pem: str,
2021-12-31 15:16:53 +00:00
source_id: int) -> {}:
2021-07-29 22:24:22 +00:00
"""Returns the actor json for the given actor url
"""
2021-12-31 15:16:53 +00:00
person_json = \
2022-06-09 16:54:44 +00:00
get_person_from_cache(base_dir, actor, person_cache)
2021-12-31 15:16:53 +00:00
if person_json:
return person_json
2021-07-29 22:24:22 +00:00
if '/channel/' in actor or '/accounts/' in actor:
2021-12-31 15:16:53 +00:00
as_header = {
'Accept': 'application/ld+json; profile="' + profile_str + '"'
2021-07-29 22:24:22 +00:00
}
2021-12-31 15:16:53 +00:00
person_json = \
get_json(signing_priv_key_pem, session, actor, as_header, None,
debug, project_version, http_prefix, origin_domain)
if person_json:
return person_json
as_header = {
'Accept': 'application/ld+json; profile="' + profile_str + '"'
2021-07-29 22:24:22 +00:00
}
2021-12-31 15:16:53 +00:00
person_json = \
get_json(signing_priv_key_pem, session, actor, as_header, None,
debug, project_version, http_prefix, origin_domain)
if person_json:
return person_json
print('Unable to get actor for ' + actor + ' ' + str(source_id))
2021-12-25 23:03:28 +00:00
if not signing_priv_key_pem:
2021-09-20 13:20:30 +00:00
print('No signing key provided when getting actor')
2021-07-29 22:24:22 +00:00
return None
2021-12-31 15:16:53 +00:00
def get_person_box(signing_priv_key_pem: str, origin_domain: str,
base_dir: str, session, wf_request: {}, person_cache: {},
2021-12-29 21:55:09 +00:00
project_version: str, http_prefix: str,
nickname: str, domain: str,
2021-12-31 15:16:53 +00:00
box_name: str = 'inbox',
source_id=0) -> (str, str, str, str, str, str, str, bool):
2021-03-14 20:55:37 +00:00
debug = False
2021-12-31 15:16:53 +00:00
profile_str = 'https://www.w3.org/ns/activitystreams'
as_header = {
'Accept': 'application/activity+json; profile="' + profile_str + '"'
2020-03-22 20:36:19 +00:00
}
2021-12-31 15:16:53 +00:00
if not wf_request:
2021-01-13 23:43:11 +00:00
print('No webfinger given')
2021-09-26 21:28:33 +00:00
return None, None, None, None, None, None, None, None
2021-01-13 23:43:11 +00:00
2021-12-31 15:16:53 +00:00
# get the actor / person_url
if not wf_request.get('errors'):
2021-07-29 22:27:54 +00:00
# get the actor url from webfinger links
2021-12-31 15:16:53 +00:00
person_url = get_user_url(wf_request, source_id, debug)
2019-10-17 15:55:05 +00:00
else:
2020-04-04 10:05:27 +00:00
if nickname == 'dev':
2019-10-21 16:03:44 +00:00
# try single user instance
2021-12-29 21:55:09 +00:00
print('get_person_box: Trying single user instance with ld+json')
2021-12-31 15:16:53 +00:00
person_url = http_prefix + '://' + domain
as_header = {
'Accept': 'application/ld+json; profile="' + profile_str + '"'
2020-03-22 20:36:19 +00:00
}
2019-10-21 16:03:44 +00:00
else:
2021-07-29 22:27:54 +00:00
# the final fallback is a mastodon style url
2021-12-31 15:16:53 +00:00
person_url = local_actor_url(http_prefix, nickname, domain)
if not person_url:
2021-09-26 21:28:33 +00:00
return None, None, None, None, None, None, None, None
2021-07-29 22:24:22 +00:00
# get the actor json from the url
2021-12-31 15:16:53 +00:00
person_json = \
_get_person_box_actor(session, base_dir, person_url,
profile_str, as_header,
2021-12-29 21:55:09 +00:00
debug, project_version,
2021-12-31 15:16:53 +00:00
http_prefix, origin_domain,
2021-12-29 21:55:09 +00:00
person_cache, signing_priv_key_pem,
2021-12-31 15:16:53 +00:00
source_id)
if not person_json:
2021-09-26 21:28:33 +00:00
return None, None, None, None, None, None, None, None
2021-07-29 22:24:22 +00:00
2021-12-31 15:16:53 +00:00
is_group = False
if person_json.get('type'):
if person_json['type'] == 'Group':
is_group = True
2021-07-29 22:24:22 +00:00
# get the url for the box/collection
2021-12-31 15:16:53 +00:00
box_json = None
if not person_json.get(box_name):
if person_json.get('endpoints'):
if person_json['endpoints'].get(box_name):
box_json = person_json['endpoints'][box_name]
2019-07-05 13:38:29 +00:00
else:
2021-12-31 15:16:53 +00:00
box_json = person_json[box_name]
if not box_json:
2021-09-26 21:28:33 +00:00
return None, None, None, None, None, None, None, None
2019-07-05 13:38:29 +00:00
2022-01-03 15:09:45 +00:00
person_id = None
2021-12-31 15:16:53 +00:00
if person_json.get('id'):
2022-01-03 15:09:45 +00:00
person_id = person_json['id']
2021-12-31 15:16:53 +00:00
pub_key_id = None
pub_key = None
if person_json.get('publicKey'):
if person_json['publicKey'].get('id'):
pub_key_id = person_json['publicKey']['id']
if person_json['publicKey'].get('publicKeyPem'):
pub_key = person_json['publicKey']['publicKeyPem']
shared_inbox = None
if person_json.get('sharedInbox'):
shared_inbox = person_json['sharedInbox']
2019-07-05 13:50:27 +00:00
else:
2021-12-31 15:16:53 +00:00
if person_json.get('endpoints'):
if person_json['endpoints'].get('sharedInbox'):
shared_inbox = person_json['endpoints']['sharedInbox']
avatar_url = None
if person_json.get('icon'):
if person_json['icon'].get('url'):
avatar_url = person_json['icon']['url']
display_name = None
if person_json.get('name'):
display_name = person_json['name']
if dangerous_markup(person_json['name'], False):
display_name = '*ADVERSARY*'
2021-12-29 21:55:09 +00:00
elif is_filtered(base_dir,
nickname, domain,
2022-09-25 17:26:11 +00:00
display_name, 'en'):
2021-12-31 15:16:53 +00:00
display_name = '*FILTERED*'
2021-01-12 10:38:57 +00:00
# have they moved?
2021-12-31 15:16:53 +00:00
if person_json.get('movedTo'):
display_name += ''
2020-04-04 10:05:27 +00:00
2021-12-31 15:16:53 +00:00
store_person_in_cache(base_dir, person_url, person_json,
person_cache, True)
2019-06-30 10:21:07 +00:00
2022-01-03 15:09:45 +00:00
return box_json, pub_key_id, pub_key, person_id, shared_inbox, \
2021-12-31 15:16:53 +00:00
avatar_url, display_name, is_group
2019-06-30 10:21:07 +00:00
2019-06-30 10:14:02 +00:00
2021-12-31 15:16:53 +00:00
def _is_public_feed_post(item: {}, person_posts: {}, debug: bool) -> bool:
2021-09-11 13:42:17 +00:00
"""Is the given post a public feed post?
"""
2021-09-11 13:59:40 +00:00
if not isinstance(item, dict):
if debug:
print('item object is not a dict')
pprint(item)
return False
2021-09-11 13:42:17 +00:00
if not item.get('id'):
if debug:
print('No id')
return False
if not item.get('type'):
if debug:
print('No type')
return False
2021-10-27 12:46:38 +00:00
if item['type'] != 'Create' and \
item['type'] != 'Announce' and \
2021-11-18 18:43:58 +00:00
item['type'] != 'Page' and \
2021-10-27 12:46:38 +00:00
item['type'] != 'Note':
2021-09-11 13:42:17 +00:00
if debug:
2021-10-27 12:46:38 +00:00
print('Not a Create/Note/Announce type')
2021-09-11 13:42:17 +00:00
return False
if item.get('object'):
if isinstance(item['object'], dict):
if not item['object'].get('published'):
if debug:
print('No published attribute')
return False
elif isinstance(item['object'], str):
if not item.get('published'):
if debug:
print('No published attribute')
return False
else:
if debug:
print('object is not a dict or string')
return False
2021-11-18 18:43:58 +00:00
elif item['type'] == 'Note' or item['type'] == 'Page':
2021-10-27 12:46:38 +00:00
if not item.get('published'):
if debug:
print('No published attribute')
return False
2021-12-31 15:16:53 +00:00
if not person_posts.get(item['id']):
this_item = item
2021-10-27 12:46:38 +00:00
if item.get('object'):
2021-12-31 15:16:53 +00:00
this_item = item['object']
2021-09-11 13:42:17 +00:00
# check that this is a public post
# #Public should appear in the "to" list
item_is_note = False
2021-11-18 18:43:58 +00:00
if item['type'] == 'Note' or item['type'] == 'Page':
item_is_note = True
2021-11-18 18:43:58 +00:00
2021-12-31 15:16:53 +00:00
if isinstance(this_item, dict):
if this_item.get('to'):
is_public = False
for recipient in this_item['to']:
2021-09-11 13:42:17 +00:00
if recipient.endswith('#Public'):
2021-12-31 15:16:53 +00:00
is_public = True
2021-09-11 13:42:17 +00:00
break
2021-12-31 15:16:53 +00:00
if not is_public:
2021-09-11 13:42:17 +00:00
return False
elif isinstance(this_item, str) or item_is_note:
2021-09-11 13:42:17 +00:00
if item.get('to'):
2021-12-31 15:16:53 +00:00
is_public = False
2021-09-11 13:42:17 +00:00
for recipient in item['to']:
if recipient.endswith('#Public'):
2021-12-31 15:16:53 +00:00
is_public = True
2021-09-11 13:42:17 +00:00
break
2021-12-31 15:16:53 +00:00
if not is_public:
2021-09-11 13:42:17 +00:00
return False
return True
2021-12-29 21:55:09 +00:00
def is_create_inside_announce(item: {}) -> bool:
2021-09-11 13:59:40 +00:00
""" is this a Create inside of an Announce?
eg. lemmy feed item
"""
if not isinstance(item, dict):
return False
if item['type'] != 'Announce':
return False
if not item.get('object'):
return False
if not isinstance(item['object'], dict):
return False
if not item['object'].get('type'):
return False
if item['object']['type'] != 'Create':
return False
return True
2021-12-31 15:16:53 +00:00
def _get_posts(session, outbox_url: str, max_posts: int,
2021-12-29 21:55:09 +00:00
max_mentions: int,
2021-12-31 15:16:53 +00:00
max_emoji: int, max_attachments: int,
2022-05-31 13:45:18 +00:00
federation_list: [], raw: bool,
2021-12-29 21:55:09 +00:00
simple: bool, debug: bool,
project_version: str, http_prefix: str,
2021-12-31 15:16:53 +00:00
origin_domain: str, system_language: str,
2021-12-29 21:55:09 +00:00
signing_priv_key_pem: str) -> {}:
2019-07-28 11:08:14 +00:00
"""Gets public posts from an outbox
"""
2021-08-01 13:44:27 +00:00
if debug:
2021-12-31 15:16:53 +00:00
print('Getting outbox posts for ' + outbox_url)
person_posts = {}
if not outbox_url:
return person_posts
profile_str = 'https://www.w3.org/ns/activitystreams'
accept_str = \
2021-09-14 13:05:10 +00:00
'application/activity+json; ' + \
2021-12-31 15:16:53 +00:00
'profile="' + profile_str + '"'
as_header = {
'Accept': accept_str
2020-03-22 20:36:19 +00:00
}
2021-12-31 15:16:53 +00:00
if '/outbox/' in outbox_url:
accept_str = \
2021-09-14 13:05:10 +00:00
'application/ld+json; ' + \
2021-12-31 15:16:53 +00:00
'profile="' + profile_str + '"'
as_header = {
'Accept': accept_str
2020-03-22 20:36:19 +00:00
}
2019-07-03 11:24:38 +00:00
if raw:
2021-08-01 13:44:27 +00:00
if debug:
print('Returning the raw feed')
2020-04-04 10:05:27 +00:00
result = []
i = 0
2021-12-31 17:38:22 +00:00
user_feed = parse_user_feed(signing_priv_key_pem,
session, outbox_url, as_header,
project_version, http_prefix,
origin_domain, debug)
for item in user_feed:
2019-07-03 11:24:38 +00:00
result.append(item)
i += 1
2021-12-31 15:16:53 +00:00
if i == max_posts:
2019-07-03 11:24:38 +00:00
break
pprint(result)
return None
2021-08-01 13:44:27 +00:00
if debug:
print('Returning a human readable version of the feed')
2021-12-31 17:38:22 +00:00
user_feed = parse_user_feed(signing_priv_key_pem,
session, outbox_url, as_header,
project_version, http_prefix,
origin_domain, debug)
if not user_feed:
2021-12-31 15:16:53 +00:00
return person_posts
2021-08-02 20:43:53 +00:00
i = 0
2021-12-31 17:38:22 +00:00
for item in user_feed:
2021-12-29 21:55:09 +00:00
if is_create_inside_announce(item):
2021-09-11 13:59:40 +00:00
item = item['object']
2021-12-31 15:16:53 +00:00
if not _is_public_feed_post(item, person_posts, debug):
2019-06-28 18:55:29 +00:00
continue
2021-09-11 13:42:17 +00:00
2021-12-31 15:16:53 +00:00
this_item = item
2021-12-31 17:38:22 +00:00
this_item_type = item['type']
if this_item_type not in ('Note', 'Page'):
2021-12-31 15:16:53 +00:00
this_item = item['object']
2021-10-27 12:46:38 +00:00
2021-12-26 11:29:40 +00:00
content = get_base_content_from_post(item, system_language)
2021-09-11 13:42:17 +00:00
content = content.replace('&apos;', "'")
mentions = []
emoji = {}
summary = ''
2021-12-31 15:16:53 +00:00
in_reply_to = ''
2021-09-11 13:42:17 +00:00
attachment = []
sensitive = False
2021-12-31 15:16:53 +00:00
if isinstance(this_item, dict):
if this_item.get('tag'):
for tag_item in this_item['tag']:
if not tag_item.get('type'):
2021-10-27 15:10:18 +00:00
continue
2021-12-31 17:38:22 +00:00
tag_type = tag_item['type'].lower()
if tag_type == 'emoji':
2021-12-31 15:16:53 +00:00
if tag_item.get('name') and tag_item.get('icon'):
if tag_item['icon'].get('url'):
2021-09-11 13:42:17 +00:00
# No emoji from non-permitted domains
2021-12-31 15:16:53 +00:00
if url_permitted(tag_item['icon']['url'],
2021-12-27 20:47:05 +00:00
federation_list):
2021-12-31 15:16:53 +00:00
emoji_name = tag_item['name']
emoji_icon = tag_item['icon']['url']
emoji[emoji_name] = emoji_icon
2021-08-01 14:47:31 +00:00
else:
if debug:
print('url not permitted ' +
2021-12-31 15:16:53 +00:00
tag_item['icon']['url'])
2021-12-31 17:38:22 +00:00
if tag_type == 'mention':
2021-12-31 15:16:53 +00:00
if tag_item.get('name'):
if tag_item['name'] not in mentions:
mentions.append(tag_item['name'])
2021-12-25 21:02:44 +00:00
if len(mentions) > max_mentions:
2021-09-11 13:42:17 +00:00
if debug:
print('max mentions reached')
continue
2021-12-25 21:04:51 +00:00
if len(emoji) > max_emoji:
2021-09-11 13:42:17 +00:00
if debug:
print('max emojis reached')
continue
2019-06-28 18:55:29 +00:00
2022-01-28 11:08:30 +00:00
if this_item.get('summaryMap'):
if this_item['summaryMap'].get(system_language):
summary = this_item['summaryMap'][system_language]
if not summary and this_item.get('summary'):
2021-12-31 15:16:53 +00:00
if this_item['summary']:
summary = this_item['summary']
2021-09-11 13:42:17 +00:00
2021-12-31 15:16:53 +00:00
if this_item.get('inReplyTo'):
if this_item['inReplyTo']:
if isinstance(this_item['inReplyTo'], str):
2021-09-11 13:42:17 +00:00
# No replies to non-permitted domains
2021-12-31 15:16:53 +00:00
if not url_permitted(this_item['inReplyTo'],
2021-12-27 20:47:05 +00:00
federation_list):
2021-09-11 13:42:17 +00:00
if debug:
print('url not permitted ' +
2021-12-31 15:16:53 +00:00
this_item['inReplyTo'])
2021-09-11 13:42:17 +00:00
continue
2021-12-31 15:16:53 +00:00
in_reply_to = this_item['inReplyTo']
2021-09-11 13:42:17 +00:00
2021-12-31 15:16:53 +00:00
if this_item.get('attachment'):
2022-05-31 13:45:18 +00:00
if len(this_item['attachment']) > max_attachments:
if debug:
print('max attachments reached')
continue
2021-12-31 15:16:53 +00:00
if this_item['attachment']:
for attach in this_item['attachment']:
2021-09-11 13:42:17 +00:00
if attach.get('name') and attach.get('url'):
# no attachments from non-permitted domains
2021-12-27 20:47:05 +00:00
if url_permitted(attach['url'],
federation_list):
2021-09-11 13:42:17 +00:00
attachment.append([attach['name'],
attach['url']])
else:
if debug:
print('url not permitted ' +
attach['url'])
2019-07-03 11:24:38 +00:00
2021-09-11 13:42:17 +00:00
sensitive = False
2021-12-31 15:16:53 +00:00
if this_item.get('sensitive'):
sensitive = this_item['sensitive']
2021-09-11 13:42:17 +00:00
if content:
2019-07-03 11:24:38 +00:00
if simple:
2021-12-29 21:55:09 +00:00
print(_clean_html(content) + '\n')
2019-07-03 11:24:38 +00:00
else:
2019-07-19 16:56:55 +00:00
pprint(item)
2021-12-31 15:16:53 +00:00
person_posts[item['id']] = {
2019-07-03 11:24:38 +00:00
"sensitive": sensitive,
2021-12-31 15:16:53 +00:00
"inreplyto": in_reply_to,
2019-07-03 11:24:38 +00:00
"summary": summary,
"html": content,
2021-12-29 21:55:09 +00:00
"plaintext": _clean_html(content),
2019-07-03 11:24:38 +00:00
"attachment": attachment,
"mentions": mentions,
2021-08-01 16:23:32 +00:00
"emoji": emoji
2019-07-03 11:24:38 +00:00
}
2021-09-11 13:42:17 +00:00
i += 1
2019-06-28 18:55:29 +00:00
2021-12-31 15:16:53 +00:00
if i == max_posts:
2021-09-11 13:42:17 +00:00
break
2021-12-31 15:16:53 +00:00
return person_posts
2019-06-29 10:08:59 +00:00
2020-04-04 10:05:27 +00:00
2021-12-29 21:55:09 +00:00
def _get_common_words() -> str:
2021-07-23 11:57:39 +00:00
"""Returns a list of common words
"""
2021-07-23 11:57:39 +00:00
return (
2021-01-11 13:57:08 +00:00
'that', 'some', 'about', 'then', 'they', 'were',
2021-01-11 14:01:26 +00:00
'also', 'from', 'with', 'this', 'have', 'more',
'need', 'here', 'would', 'these', 'into', 'very',
2021-01-11 14:05:16 +00:00
'well', 'when', 'what', 'your', 'there', 'which',
'even', 'there', 'such', 'just', 'those', 'only',
2021-01-11 20:57:27 +00:00
'will', 'much', 'than', 'them', 'each', 'goes',
2021-07-23 11:31:23 +00:00
'been', 'over', 'their', 'where', 'could', 'though',
'like', 'think', 'same', 'maybe', 'really', 'thing',
'something', 'possible', 'actual', 'actually',
2021-07-23 11:57:39 +00:00
'because', 'around', 'having', 'especially', 'other',
'making', 'made', 'make', 'makes', 'including',
'includes', 'know', 'knowing', 'knows', 'things',
'say', 'says', 'saying', 'many', 'somewhat',
2021-07-23 12:05:24 +00:00
'problem', 'problems', 'idea', 'ideas',
2021-07-23 12:12:48 +00:00
'using', 'uses', 'https', 'still', 'want', 'wants'
2021-01-11 13:57:08 +00:00
)
2021-07-23 11:57:39 +00:00
2021-07-23 13:50:32 +00:00
2021-12-31 15:16:53 +00:00
def _update_word_frequency(content: str, word_frequency: {}) -> None:
2021-07-23 11:57:39 +00:00
"""Creates a dictionary containing words and the number of times
that they appear
"""
2021-12-31 15:16:53 +00:00
plain_text = remove_html(content)
remove_chars = ('.', ';', '?', '\n', ':')
for char in remove_chars:
plain_text = plain_text.replace(char, ' ')
words_list = plain_text.split(' ')
common_words = _get_common_words()
for word in words_list:
2022-05-31 17:13:28 +00:00
word_len = len(word)
if word_len < 3:
continue
2022-05-31 17:13:28 +00:00
if word_len < 4:
if word.upper() != word:
continue
2021-01-11 13:57:08 +00:00
if '&' in word or \
'"' in word or \
'@' in word or \
2021-07-23 12:05:24 +00:00
"'" in word or \
2021-07-23 12:08:17 +00:00
"--" in word or \
'//' in word:
2021-01-11 13:57:08 +00:00
continue
2021-12-31 15:16:53 +00:00
if word.lower() in common_words:
2021-01-11 13:57:08 +00:00
continue
2021-12-31 15:16:53 +00:00
if word_frequency.get(word):
word_frequency[word] += 1
else:
2021-12-31 15:16:53 +00:00
word_frequency[word] = 1
2022-05-31 17:13:28 +00:00
def get_post_domains(session, outbox_url: str, max_posts: int, debug: bool,
project_version: str, http_prefix: str, domain: str,
word_frequency: {}, domain_list: [],
system_language: str, signing_priv_key_pem: str) -> []:
2020-07-08 10:09:51 +00:00
"""Returns a list of domains referenced within public posts
"""
2021-12-31 15:16:53 +00:00
if not outbox_url:
2020-07-08 10:09:51 +00:00
return []
2021-12-31 15:16:53 +00:00
profile_str = 'https://www.w3.org/ns/activitystreams'
accept_str = \
2021-09-14 13:05:10 +00:00
'application/activity+json; ' + \
2021-12-31 15:16:53 +00:00
'profile="' + profile_str + '"'
as_header = {
'Accept': accept_str
2020-07-08 10:09:51 +00:00
}
2021-12-31 15:16:53 +00:00
if '/outbox/' in outbox_url:
accept_str = \
2021-09-14 13:05:10 +00:00
'application/ld+json; ' + \
2021-12-31 15:16:53 +00:00
'profile="' + profile_str + '"'
as_header = {
'Accept': accept_str
2020-07-08 10:09:51 +00:00
}
2021-12-31 15:16:53 +00:00
post_domains = domain_list
2020-07-08 10:09:51 +00:00
i = 0
2021-12-31 17:38:22 +00:00
user_feed = parse_user_feed(signing_priv_key_pem,
session, outbox_url, as_header,
project_version, http_prefix, domain, debug)
for item in user_feed:
2020-07-08 12:28:41 +00:00
i += 1
2021-12-31 15:16:53 +00:00
if i > max_posts:
2020-07-08 12:28:41 +00:00
break
2021-12-26 10:57:03 +00:00
if not has_object_dict(item):
2020-07-08 10:09:51 +00:00
continue
2021-12-31 15:16:53 +00:00
content_str = get_base_content_from_post(item, system_language)
if content_str:
_update_word_frequency(content_str, word_frequency)
2020-07-08 10:09:51 +00:00
if item['object'].get('inReplyTo'):
2020-08-28 14:45:07 +00:00
if isinstance(item['object']['inReplyTo'], str):
2022-05-31 17:13:28 +00:00
post_domain, _ = \
2021-12-27 19:05:25 +00:00
get_domain_from_actor(item['object']['inReplyTo'])
2021-12-31 15:16:53 +00:00
if post_domain not in post_domains:
post_domains.append(post_domain)
2020-07-08 10:09:51 +00:00
if item['object'].get('tag'):
2021-12-31 15:16:53 +00:00
for tag_item in item['object']['tag']:
if not tag_item.get('type'):
2021-10-27 15:10:18 +00:00
continue
2021-12-31 17:38:22 +00:00
tag_type = tag_item['type'].lower()
if tag_type == 'mention':
2021-12-31 15:16:53 +00:00
if tag_item.get('href'):
2022-06-15 09:07:08 +00:00
post_domain, _ = \
2021-12-31 15:16:53 +00:00
get_domain_from_actor(tag_item['href'])
if post_domain not in post_domains:
post_domains.append(post_domain)
return post_domains
2020-07-08 10:09:51 +00:00
2021-12-29 21:55:09 +00:00
def _get_posts_for_blocked_domains(base_dir: str,
2021-12-31 15:16:53 +00:00
session, outbox_url: str, max_posts: int,
2021-12-29 21:55:09 +00:00
debug: bool,
project_version: str, http_prefix: str,
domain: str,
signing_priv_key_pem: str) -> {}:
"""Returns a dictionary of posts for blocked domains
"""
2021-12-31 15:16:53 +00:00
if not outbox_url:
return {}
2021-12-31 15:16:53 +00:00
profile_str = 'https://www.w3.org/ns/activitystreams'
accept_str = \
2021-09-14 13:05:10 +00:00
'application/activity+json; ' + \
2021-12-31 15:16:53 +00:00
'profile="' + profile_str + '"'
as_header = {
'Accept': accept_str
}
2021-12-31 15:16:53 +00:00
if '/outbox/' in outbox_url:
accept_str = \
2021-09-14 13:05:10 +00:00
'application/ld+json; ' + \
2021-12-31 15:16:53 +00:00
'profile="' + profile_str + '"'
as_header = {
'Accept': accept_str
}
2021-12-31 15:16:53 +00:00
blocked_posts = {}
i = 0
2021-12-31 17:38:22 +00:00
user_feed = parse_user_feed(signing_priv_key_pem,
session, outbox_url, as_header,
project_version, http_prefix, domain, debug)
for item in user_feed:
i += 1
2021-12-31 15:16:53 +00:00
if i > max_posts:
break
2021-12-26 10:57:03 +00:00
if not has_object_dict(item):
continue
if item['object'].get('inReplyTo'):
if isinstance(item['object']['inReplyTo'], str):
2022-06-15 09:07:08 +00:00
post_domain, _ = \
2021-12-27 19:05:25 +00:00
get_domain_from_actor(item['object']['inReplyTo'])
2021-12-31 15:16:53 +00:00
if is_blocked_domain(base_dir, post_domain):
2020-12-17 19:54:07 +00:00
if item['object'].get('url'):
url = item['object']['url']
else:
url = item['object']['id']
2021-12-31 15:16:53 +00:00
if not blocked_posts.get(post_domain):
blocked_posts[post_domain] = [url]
else:
2021-12-31 15:16:53 +00:00
if url not in blocked_posts[post_domain]:
blocked_posts[post_domain].append(url)
if item['object'].get('tag'):
2021-12-31 15:16:53 +00:00
for tag_item in item['object']['tag']:
if not tag_item.get('type'):
2021-10-27 15:10:18 +00:00
continue
2021-12-31 17:38:22 +00:00
tag_type = tag_item['type'].lower()
if tag_type == 'mention' and tag_item.get('href'):
2022-06-15 09:07:08 +00:00
post_domain, _ = \
2021-12-31 15:16:53 +00:00
get_domain_from_actor(tag_item['href'])
if is_blocked_domain(base_dir, post_domain):
2021-10-27 19:38:35 +00:00
if item['object'].get('url'):
url = item['object']['url']
else:
url = item['object']['id']
2021-12-31 15:16:53 +00:00
if not blocked_posts.get(post_domain):
blocked_posts[post_domain] = [url]
2021-10-27 19:38:35 +00:00
else:
2021-12-31 15:16:53 +00:00
if url not in blocked_posts[post_domain]:
blocked_posts[post_domain].append(url)
return blocked_posts
2021-12-29 21:55:09 +00:00
def delete_all_posts(base_dir: str,
nickname: str, domain: str, boxname: str) -> None:
2019-07-04 16:24:23 +00:00
"""Deletes all posts for a person from inbox or outbox
2019-06-29 11:47:33 +00:00
"""
2021-12-31 17:38:22 +00:00
if boxname not in ('inbox', 'outbox', 'tlblogs', 'tlnews'):
2019-07-04 16:24:23 +00:00
return
2021-12-31 15:16:53 +00:00
box_dir = create_person_dir(nickname, domain, base_dir, boxname)
2021-12-31 17:38:22 +00:00
for delete_filename in os.scandir(box_dir):
delete_filename = delete_filename.name
file_path = os.path.join(box_dir, delete_filename)
2019-06-29 11:47:33 +00:00
try:
2021-12-31 15:16:53 +00:00
if os.path.isfile(file_path):
os.unlink(file_path)
elif os.path.isdir(file_path):
shutil.rmtree(file_path, ignore_errors=False, onerror=None)
2021-12-31 17:38:22 +00:00
except OSError as ex:
2021-12-29 21:55:09 +00:00
print('ERROR: delete_all_posts ' + str(ex))
2020-04-04 10:05:27 +00:00
2021-12-28 18:13:52 +00:00
def save_post_to_box(base_dir: str, http_prefix: str, post_id: str,
nickname: str, domain: str, post_json_object: {},
boxname: str) -> str:
2019-07-04 16:24:23 +00:00
"""Saves the give json to the give box
Returns the filename
"""
2021-12-31 17:38:22 +00:00
if boxname not in ('inbox', 'outbox', 'tlblogs', 'tlnews', 'scheduled'):
return None
2021-12-31 15:16:53 +00:00
original_domain = domain
2021-12-26 18:17:37 +00:00
domain = remove_domain_port(domain)
2019-07-04 16:24:23 +00:00
2021-12-26 19:47:06 +00:00
if not post_id:
2021-12-31 17:38:22 +00:00
status_number, _ = get_status_number()
2021-12-26 19:47:06 +00:00
post_id = \
2021-12-31 15:16:53 +00:00
local_actor_url(http_prefix, nickname, original_domain) + \
'/statuses/' + status_number
2021-12-26 19:47:06 +00:00
post_json_object['id'] = post_id + '/activity'
2021-12-26 10:57:03 +00:00
if has_object_dict(post_json_object):
2021-12-26 19:47:06 +00:00
post_json_object['object']['id'] = post_id
post_json_object['object']['atomUri'] = post_id
2020-03-22 21:16:02 +00:00
2021-12-31 15:16:53 +00:00
box_dir = create_person_dir(nickname, domain, base_dir, boxname)
filename = box_dir + '/' + post_id.replace('/', '#') + '.json'
2020-11-27 19:52:01 +00:00
2021-12-26 14:47:21 +00:00
save_json(post_json_object, filename)
return filename
2020-04-04 10:05:27 +00:00
2022-08-22 20:14:37 +00:00
def _update_hashtags_index(base_dir: str, tag: {}, new_post_id: str,
nickname: str) -> None:
2019-08-09 11:12:08 +00:00
"""Writes the post url for hashtags to a file
This allows posts for a hashtag to be quickly looked up
"""
2020-04-04 10:05:27 +00:00
if tag['type'] != 'Hashtag':
2019-08-09 17:42:11 +00:00
return
2019-12-17 10:24:52 +00:00
2020-03-22 21:16:02 +00:00
# create hashtags directory
2021-12-31 15:16:53 +00:00
tags_dir = base_dir + '/tags'
if not os.path.isdir(tags_dir):
os.mkdir(tags_dir)
tag_name = tag['name']
tags_filename = tags_dir + '/' + tag_name[1:] + '.txt'
2022-08-22 20:14:37 +00:00
new_post_id = new_post_id.replace('/', '#')
2021-12-31 15:16:53 +00:00
if not os.path.isfile(tags_filename):
2022-08-22 20:14:37 +00:00
days_diff = datetime.datetime.utcnow() - datetime.datetime(1970, 1, 1)
days_since_epoch = days_diff.days
tag_line = \
str(days_since_epoch) + ' ' + nickname + ' ' + \
new_post_id + '\n'
2019-12-17 10:24:52 +00:00
# create a new tags index file
2021-12-31 15:16:53 +00:00
try:
2022-06-09 14:46:30 +00:00
with open(tags_filename, 'w+', encoding='utf-8') as tags_file:
2022-08-22 20:14:37 +00:00
tags_file.write(tag_line)
2021-12-31 15:16:53 +00:00
except OSError:
print('EX: _update_hashtags_index unable to write tags file ' +
tags_filename)
2019-12-17 10:24:52 +00:00
else:
# prepend to tags index file
2022-08-22 20:14:37 +00:00
if not text_in_file(new_post_id, tags_filename):
days_diff = \
datetime.datetime.utcnow() - datetime.datetime(1970, 1, 1)
days_since_epoch = days_diff.days
tag_line = \
str(days_since_epoch) + ' ' + nickname + ' ' + \
new_post_id + '\n'
try:
2022-06-09 14:46:30 +00:00
with open(tags_filename, 'r+', encoding='utf-8') as tags_file:
2021-12-31 15:16:53 +00:00
content = tags_file.read()
2022-08-22 20:14:37 +00:00
if tag_line not in content:
2021-12-31 15:16:53 +00:00
tags_file.seek(0, 0)
2022-08-22 20:14:37 +00:00
tags_file.write(tag_line + content)
2021-12-31 15:16:53 +00:00
except OSError as ex:
print('EX: Failed to write entry to tags file ' +
tags_filename + ' ' + str(ex))
2020-04-04 10:05:27 +00:00
2019-08-09 11:12:08 +00:00
2021-12-29 21:55:09 +00:00
def _add_schedule_post(base_dir: str, nickname: str, domain: str,
2021-12-31 15:16:53 +00:00
event_date_str: str, post_id: str) -> None:
2020-01-13 10:49:03 +00:00
"""Adds a scheduled post to the index
"""
2020-04-04 10:05:27 +00:00
handle = nickname + '@' + domain
2021-12-31 15:16:53 +00:00
schedule_index_filename = \
2021-12-25 16:17:53 +00:00
base_dir + '/accounts/' + handle + '/schedule.index'
2020-01-13 10:49:03 +00:00
2021-12-31 15:40:01 +00:00
index_str = event_date_str + ' ' + post_id.replace('/', '#')
2021-12-31 15:16:53 +00:00
if os.path.isfile(schedule_index_filename):
2022-06-10 11:43:33 +00:00
if not text_in_file(index_str, schedule_index_filename):
2020-01-13 10:49:03 +00:00
try:
2022-06-09 14:46:30 +00:00
with open(schedule_index_filename, 'r+',
encoding='utf-8') as schedule_file:
2021-12-31 15:16:53 +00:00
content = schedule_file.read()
2021-12-31 15:40:01 +00:00
if index_str + '\n' not in content:
2021-12-31 15:16:53 +00:00
schedule_file.seek(0, 0)
2021-12-31 15:40:01 +00:00
schedule_file.write(index_str + '\n' + content)
print('DEBUG: scheduled post added to index')
2021-12-31 15:16:53 +00:00
except OSError as ex:
print('EX: Failed to write entry to scheduled posts index ' +
schedule_index_filename + ' ' + str(ex))
2020-01-13 10:49:03 +00:00
else:
2021-12-31 15:16:53 +00:00
try:
2022-06-09 14:46:30 +00:00
with open(schedule_index_filename, 'w+',
encoding='utf-8') as schedule_file:
2021-12-31 15:40:01 +00:00
schedule_file.write(index_str + '\n')
2021-12-31 15:16:53 +00:00
except OSError as ex:
print('EX: Failed to write entry to scheduled posts index2 ' +
schedule_index_filename + ' ' + str(ex))
2020-01-13 10:49:03 +00:00
2020-04-04 10:05:27 +00:00
2021-12-29 21:55:09 +00:00
def valid_content_warning(cw: str) -> str:
2020-08-25 19:35:55 +00:00
"""Returns a validated content warning
"""
2021-12-27 15:43:22 +00:00
cw = remove_html(cw)
2020-08-25 19:35:55 +00:00
# hashtags within content warnings apparently cause a lot of trouble
# so remove them
if '#' in cw:
cw = cw.replace('#', '').replace(' ', ' ')
2021-12-27 19:33:45 +00:00
return remove_invalid_chars(cw)
2020-08-25 19:35:55 +00:00
2021-12-29 21:55:09 +00:00
def _load_auto_cw(base_dir: str, nickname: str, domain: str) -> []:
2020-09-13 18:56:41 +00:00
"""Loads automatic CWs file and returns a list containing
the lines of the file
"""
2021-12-26 12:02:29 +00:00
filename = acct_dir(base_dir, nickname, domain) + '/autocw.txt'
2020-09-13 18:56:41 +00:00
if not os.path.isfile(filename):
return []
2021-12-31 15:16:53 +00:00
try:
2022-06-09 14:46:30 +00:00
with open(filename, 'r', encoding='utf-8') as fp_auto:
2021-12-31 15:16:53 +00:00
return fp_auto.readlines()
except OSError:
print('EX: unable to load auto cw file ' + filename)
2020-09-13 18:56:41 +00:00
return []
2021-12-29 21:55:09 +00:00
def _add_auto_cw(base_dir: str, nickname: str, domain: str,
subject: str, content: str) -> str:
2020-09-13 18:56:41 +00:00
"""Appends any automatic CW to the subject line
and returns the new subject line
"""
2021-12-31 15:16:53 +00:00
new_subject = subject
auto_cw_list = _load_auto_cw(base_dir, nickname, domain)
2021-12-31 17:38:22 +00:00
for cw_rule in auto_cw_list:
if '->' not in cw_rule:
2020-09-13 18:56:41 +00:00
continue
2021-12-31 17:38:22 +00:00
rulematch = cw_rule.split('->')[0].strip()
if rulematch not in content:
2020-09-13 18:56:41 +00:00
continue
2021-12-31 17:38:22 +00:00
cw_str = cw_rule.split('->')[1].strip()
2021-12-31 15:16:53 +00:00
if new_subject:
2021-12-31 15:40:01 +00:00
if cw_str not in new_subject:
new_subject += ', ' + cw_str
2020-09-13 18:56:41 +00:00
else:
2021-12-31 15:40:01 +00:00
new_subject = cw_str
2021-12-31 15:16:53 +00:00
return new_subject
2020-09-13 18:56:41 +00:00
2021-12-29 21:55:09 +00:00
def _create_post_cw_from_reply(base_dir: str, nickname: str, domain: str,
2021-12-31 15:16:53 +00:00
in_reply_to: str,
sensitive: bool, summary: str,
system_language: str,
languages_understood: []) -> (bool, str):
2021-06-26 21:29:49 +00:00
"""If this is a reply and the original post has a CW
then use the same CW
"""
2022-05-31 15:18:23 +00:00
reply_to_json = None
2021-12-31 15:16:53 +00:00
if in_reply_to and not sensitive:
2021-06-26 21:29:49 +00:00
# locate the post which this is a reply to and check if
# it has a content warning. If it does then reproduce
# the same warning
2021-12-31 15:16:53 +00:00
reply_post_filename = \
locate_post(base_dir, nickname, domain, in_reply_to)
if reply_post_filename:
reply_to_json = load_json(reply_post_filename)
2022-05-31 15:18:23 +00:00
if reply_to_json:
if reply_to_json.get('object'):
if reply_to_json['object'].get('sensitive'):
if reply_to_json['object']['sensitive']:
sensitive = True
if reply_to_json['object'].get('summary'):
summary = \
get_summary_from_post(reply_to_json,
system_language,
languages_understood)
2021-06-26 21:29:49 +00:00
return sensitive, summary
2021-12-29 21:55:09 +00:00
def _create_post_s2s(base_dir: str, nickname: str, domain: str, port: int,
2021-12-31 15:16:53 +00:00
http_prefix: str, content: str, status_number: str,
published: str, new_post_id: str, post_context: {},
to_recipients: [], to_cc: [], in_reply_to: str,
sensitive: bool, comments_enabled: bool,
tags: [], attach_image_filename: str,
media_type: str, image_description: str, city: str,
post_object_type: str, summary: str,
in_reply_to_atom_uri: str, system_language: str,
conversation_id: str, low_bandwidth: bool,
2021-12-29 21:55:09 +00:00
content_license_url: str) -> {}:
"""Creates a new server-to-server post
"""
2021-12-31 17:38:22 +00:00
actor_url = local_actor_url(http_prefix, nickname, domain)
2021-12-31 15:16:53 +00:00
id_str = \
2021-12-26 10:19:59 +00:00
local_actor_url(http_prefix, nickname, domain) + \
2021-12-31 15:16:53 +00:00
'/statuses/' + status_number + '/replies'
new_post_url = \
http_prefix + '://' + domain + '/@' + nickname + '/' + status_number
new_post_attributed_to = \
2021-12-26 10:19:59 +00:00
local_actor_url(http_prefix, nickname, domain)
2021-12-31 15:16:53 +00:00
if not conversation_id:
conversation_id = new_post_id
new_post = {
'@context': post_context,
'id': new_post_id + '/activity',
'type': 'Create',
2021-12-31 17:38:22 +00:00
'actor': actor_url,
'published': published,
2021-12-31 15:16:53 +00:00
'to': to_recipients,
'cc': to_cc,
'object': {
2021-12-31 15:16:53 +00:00
'id': new_post_id,
'conversation': conversation_id,
'type': post_object_type,
'summary': summary,
2021-12-31 15:16:53 +00:00
'inReplyTo': in_reply_to,
'published': published,
2021-12-31 15:16:53 +00:00
'url': new_post_url,
'attributedTo': new_post_attributed_to,
'to': to_recipients,
'cc': to_cc,
'sensitive': sensitive,
2021-12-31 15:16:53 +00:00
'atomUri': new_post_id,
'inReplyToAtomUri': in_reply_to_atom_uri,
'commentsEnabled': comments_enabled,
'rejectReplies': not comments_enabled,
'mediaType': 'text/html',
'content': content,
'contentMap': {
2021-12-25 23:03:28 +00:00
system_language: content
},
'attachment': [],
'tag': tags,
'replies': {
2021-12-31 15:16:53 +00:00
'id': id_str,
'type': 'Collection',
'first': {
'type': 'CollectionPage',
2021-12-31 15:16:53 +00:00
'next': id_str + '?only_other_accounts=true&page=true',
'partOf': id_str,
'items': []
}
}
}
}
2021-12-31 15:16:53 +00:00
if attach_image_filename:
new_post['object'] = \
2021-12-28 21:36:27 +00:00
attach_media(base_dir, http_prefix, nickname, domain, port,
2021-12-31 15:16:53 +00:00
new_post['object'], attach_image_filename,
media_type, image_description, city, low_bandwidth,
2021-12-28 21:36:27 +00:00
content_license_url)
2021-12-31 15:16:53 +00:00
return new_post
2021-12-29 21:55:09 +00:00
def _create_post_c2s(base_dir: str, nickname: str, domain: str, port: int,
2021-12-31 15:16:53 +00:00
http_prefix: str, content: str, status_number: str,
published: str, new_post_id: str, post_context: {},
to_recipients: [], to_cc: [], in_reply_to: str,
sensitive: bool, comments_enabled: bool,
tags: [], attach_image_filename: str,
media_type: str, image_description: str, city: str,
post_object_type: str, summary: str,
in_reply_to_atom_uri: str, system_language: str,
conversation_id: str, low_bandwidth: str,
2021-12-29 21:55:09 +00:00
content_license_url: str) -> {}:
"""Creates a new client-to-server post
"""
2021-12-26 12:45:03 +00:00
domain_full = get_full_domain(domain, port)
2021-12-31 15:16:53 +00:00
id_str = \
2021-12-26 10:19:59 +00:00
local_actor_url(http_prefix, nickname, domain_full) + \
2021-12-31 15:16:53 +00:00
'/statuses/' + status_number + '/replies'
new_post_url = \
http_prefix + '://' + domain + '/@' + nickname + '/' + status_number
if not conversation_id:
conversation_id = new_post_id
new_post = {
"@context": post_context,
'id': new_post_id,
'conversation': conversation_id,
'type': post_object_type,
'summary': summary,
2021-12-31 15:16:53 +00:00
'inReplyTo': in_reply_to,
'published': published,
2021-12-31 15:16:53 +00:00
'url': new_post_url,
2021-12-26 10:19:59 +00:00
'attributedTo': local_actor_url(http_prefix, nickname, domain_full),
2021-12-31 15:16:53 +00:00
'to': to_recipients,
'cc': to_cc,
'sensitive': sensitive,
2021-12-31 15:16:53 +00:00
'atomUri': new_post_id,
'inReplyToAtomUri': in_reply_to_atom_uri,
'commentsEnabled': comments_enabled,
'rejectReplies': not comments_enabled,
'mediaType': 'text/html',
'content': content,
'contentMap': {
2021-12-25 23:03:28 +00:00
system_language: content
},
'attachment': [],
'tag': tags,
'replies': {
2021-12-31 15:16:53 +00:00
'id': id_str,
'type': 'Collection',
'first': {
'type': 'CollectionPage',
2021-12-31 15:16:53 +00:00
'next': id_str + '?only_other_accounts=true&page=true',
'partOf': id_str,
'items': []
}
}
}
2021-12-31 15:16:53 +00:00
if attach_image_filename:
new_post = \
2021-12-28 21:36:27 +00:00
attach_media(base_dir, http_prefix, nickname, domain, port,
2021-12-31 15:16:53 +00:00
new_post, attach_image_filename,
media_type, image_description, city, low_bandwidth,
2021-12-28 21:36:27 +00:00
content_license_url)
2021-12-31 15:16:53 +00:00
return new_post
2021-12-31 15:16:53 +00:00
def _create_post_place_and_time(event_date: str, end_date: str,
event_time: str, end_time: str,
2021-12-29 21:55:09 +00:00
summary: str, content: str,
2021-12-31 15:16:53 +00:00
schedule_post: bool,
event_uuid: str,
2021-12-29 21:55:09 +00:00
location: str,
tags: []) -> str:
"""Adds a place and time to the tags on a new post
"""
2021-12-31 15:16:53 +00:00
end_date_str = None
if end_date:
event_name = summary
if not event_name:
event_name = content
end_date_str = end_date
if end_time:
if end_time.endswith('Z'):
end_date_str = end_date + 'T' + end_time
else:
2021-12-31 15:16:53 +00:00
end_date_str = end_date + 'T' + end_time + \
':00' + strftime("%z", gmtime())
else:
2021-12-31 15:16:53 +00:00
end_date_str = end_date + 'T12:00:00Z'
# get the starting date and time
2021-12-31 15:16:53 +00:00
event_date_str = None
if event_date:
event_name = summary
if not event_name:
event_name = content
event_date_str = event_date
if event_time:
if event_time.endswith('Z'):
event_date_str = event_date + 'T' + event_time
else:
2021-12-31 15:16:53 +00:00
event_date_str = event_date + 'T' + event_time + \
':00' + strftime("%z", gmtime())
else:
2021-12-31 15:16:53 +00:00
event_date_str = event_date + 'T12:00:00Z'
if not end_date_str:
end_date_str = event_date_str
if not schedule_post and not event_uuid:
tags.append({
"@context": "https://www.w3.org/ns/activitystreams",
"type": "Event",
2021-12-31 15:16:53 +00:00
"name": event_name,
"startTime": event_date_str,
"endTime": end_date_str
})
2021-12-31 15:16:53 +00:00
if location and not event_uuid:
latitude = longitude = None
if '://' in location:
_, latitude, longitude = \
2022-05-22 14:15:04 +00:00
geocoords_from_map_link(location)
if latitude and longitude:
tags.append({
"@context": "https://www.w3.org/ns/activitystreams",
"type": "Place",
"name": location,
"latitude": latitude,
"longitude": longitude
})
else:
tags.append({
"@context": "https://www.w3.org/ns/activitystreams",
"type": "Place",
"name": location
})
2021-12-31 15:16:53 +00:00
return event_date_str
2021-12-31 15:16:53 +00:00
def _consolidate_actors_list(actors_list: []) -> None:
""" consolidate duplicated actors
https://domain/@nick gets merged with https://domain/users/nick
"""
2021-12-31 15:16:53 +00:00
possible_duplicate_actors = []
for cc_actor in actors_list:
if '/@' in cc_actor:
if cc_actor not in possible_duplicate_actors:
possible_duplicate_actors.append(cc_actor)
if possible_duplicate_actors:
u_paths = get_user_paths()
remove_actors = []
for cc_actor in possible_duplicate_actors:
2021-12-31 15:40:01 +00:00
for usr_path in u_paths:
2021-12-31 17:38:22 +00:00
cc_actor_full = cc_actor.replace('/@', usr_path)
if cc_actor_full in actors_list:
2021-12-31 15:16:53 +00:00
if cc_actor not in remove_actors:
remove_actors.append(cc_actor)
break
2021-12-31 15:16:53 +00:00
for cc_actor in remove_actors:
actors_list.remove(cc_actor)
2021-12-31 15:16:53 +00:00
def _create_post_mentions(cc_url: str, new_post: {},
to_recipients: [], tags: []) -> None:
2021-06-27 16:12:10 +00:00
"""Updates mentions for a new post
"""
2021-12-31 15:16:53 +00:00
if not cc_url:
2021-06-27 16:12:10 +00:00
return
2021-12-31 15:16:53 +00:00
if len(cc_url) == 0:
2021-06-27 16:12:10 +00:00
return
2021-12-31 15:16:53 +00:00
if new_post.get('object'):
if cc_url not in new_post['object']['cc']:
new_post['object']['cc'] = [cc_url] + new_post['object']['cc']
2021-06-27 16:12:10 +00:00
# if this is a public post then include any mentions in cc
2021-12-31 15:16:53 +00:00
to_cc = new_post['object']['cc']
if len(to_recipients) != 1:
2021-06-27 16:12:10 +00:00
return
2021-12-31 15:16:53 +00:00
if to_recipients[0].endswith('#Public') and \
cc_url.endswith('/followers'):
2021-06-27 16:12:10 +00:00
for tag in tags:
if tag['type'] != 'Mention':
continue
2021-12-31 15:16:53 +00:00
if tag['href'] not in to_cc:
new_post['object']['cc'].append(tag['href'])
2021-06-27 16:12:10 +00:00
2021-12-31 15:16:53 +00:00
_consolidate_actors_list(new_post['object']['cc'])
new_post['cc'] = new_post['object']['cc']
else:
2021-12-31 15:16:53 +00:00
if cc_url not in new_post['cc']:
new_post['cc'] = [cc_url] + new_post['cc']
2021-12-29 21:55:09 +00:00
_consolidate_actors_list(['cc'])
2021-06-27 16:12:10 +00:00
2021-12-29 21:55:09 +00:00
def _create_post_mod_report(base_dir: str,
2021-12-31 15:16:53 +00:00
is_moderation_report: bool, new_post: {},
new_post_id: str) -> None:
2021-06-27 16:12:10 +00:00
""" if this is a moderation report then add a status
"""
2021-12-31 15:16:53 +00:00
if not is_moderation_report:
2021-06-27 16:12:10 +00:00
return
# add status
2021-12-31 15:16:53 +00:00
if new_post.get('object'):
new_post['object']['moderationStatus'] = 'pending'
2021-06-27 16:12:10 +00:00
else:
2021-12-31 15:16:53 +00:00
new_post['moderationStatus'] = 'pending'
2021-06-27 16:12:10 +00:00
# save to index file
2021-12-31 15:16:53 +00:00
moderation_index_file = base_dir + '/accounts/moderation.txt'
try:
2022-06-09 14:46:30 +00:00
with open(moderation_index_file, 'a+', encoding='utf-8') as mod_file:
2021-12-31 15:16:53 +00:00
mod_file.write(new_post_id + '\n')
except OSError:
print('EX: unable to write moderation index file ' +
moderation_index_file)
2021-06-27 16:12:10 +00:00
2021-12-31 15:16:53 +00:00
def get_actor_from_in_reply_to(in_reply_to: str) -> str:
"""Tries to get the replied to actor from the inReplyTo post id
Note: this will not always be successful for some instance types
"""
2021-12-31 17:38:22 +00:00
reply_nickname = get_nickname_from_actor(in_reply_to)
if not reply_nickname:
return None
2021-12-31 15:16:53 +00:00
reply_actor = None
2021-12-31 17:38:22 +00:00
if '/' + reply_nickname + '/' in in_reply_to:
2021-12-31 15:16:53 +00:00
reply_actor = \
2021-12-31 17:38:22 +00:00
in_reply_to.split('/' + reply_nickname + '/')[0] + \
'/' + reply_nickname
elif '#' + reply_nickname + '#' in in_reply_to:
2021-12-31 15:16:53 +00:00
reply_actor = \
2021-12-31 17:38:22 +00:00
in_reply_to.split('#' + reply_nickname + '#')[0] + \
'#' + reply_nickname
2021-12-31 15:16:53 +00:00
reply_actor = reply_actor.replace('#', '/')
if not reply_actor:
return None
2021-12-31 15:16:53 +00:00
if '://' not in reply_actor:
return None
2021-12-31 15:16:53 +00:00
return reply_actor
2021-12-29 21:55:09 +00:00
def _create_post_base(base_dir: str,
nickname: str, domain: str, port: int,
2021-12-31 15:16:53 +00:00
to_url: str, cc_url: str, http_prefix: str, content: str,
2022-05-31 16:20:16 +00:00
save_to_file: bool,
2021-12-31 15:16:53 +00:00
client_to_server: bool, comments_enabled: bool,
attach_image_filename: str,
media_type: str, image_description: str, city: str,
is_moderation_report: bool,
is_article: bool,
in_reply_to: str,
in_reply_to_atom_uri: str,
subject: str, schedule_post: bool,
event_date: str, event_time: str,
2021-12-29 21:55:09 +00:00
location: str,
2021-12-31 15:16:53 +00:00
event_uuid: str, category: str,
join_mode: str,
end_date: str, end_time: str,
maximum_attendee_capacity: int,
replies_moderation_option: str,
anonymous_participation_enabled: bool,
event_status: str, ticket_url: str,
2021-12-29 21:55:09 +00:00
system_language: str,
2021-12-31 15:16:53 +00:00
conversation_id: str, low_bandwidth: bool,
content_license_url: str,
2022-07-18 16:18:04 +00:00
languages_understood: [], translate: {}) -> {}:
2019-07-01 12:14:49 +00:00
"""Creates a message
2019-06-29 22:29:18 +00:00
"""
2021-12-27 19:33:45 +00:00
content = remove_invalid_chars(content)
2021-02-11 10:33:56 +00:00
2021-12-29 21:55:09 +00:00
subject = _add_auto_cw(base_dir, nickname, domain, subject, content)
2020-09-13 18:56:41 +00:00
2020-10-10 11:38:52 +00:00
if nickname != 'news':
2021-12-31 17:38:22 +00:00
mentioned_recipients = \
2021-12-29 21:55:09 +00:00
get_mentioned_people(base_dir, http_prefix, content, domain, False)
2020-10-10 11:38:52 +00:00
else:
2021-12-31 17:38:22 +00:00
mentioned_recipients = ''
2019-08-19 09:37:14 +00:00
# add hashtags from audio file ID3 tags, such as Artist, Album, etc
if attach_image_filename and media_type:
audio_types = get_audio_extensions()
music_metadata = None
for ext in audio_types:
if ext in media_type:
music_metadata = get_music_metadata(attach_image_filename)
break
if music_metadata:
for audio_tag, audio_value in music_metadata.items():
2022-05-30 15:15:17 +00:00
if audio_tag in ('title', 'track'):
continue
2022-05-20 20:02:08 +00:00
# capitalize and remove any spaces
audio_value = audio_value.title().replace(' ', '')
2022-05-20 20:02:08 +00:00
# check that the tag is valid
if valid_hash_tag(audio_value) and \
'#' + audio_value not in content:
2022-05-20 20:02:08 +00:00
# check that it hasn't been blocked
2022-05-20 15:55:02 +00:00
if not is_blocked_hashtag(base_dir, audio_value):
content += ' #' + audio_value
2020-04-04 10:05:27 +00:00
tags = []
2021-12-31 15:16:53 +00:00
hashtags_dict = {}
2019-07-15 14:41:15 +00:00
2021-12-26 12:45:03 +00:00
domain = get_full_domain(domain, port)
2019-11-01 10:19:21 +00:00
# add tags
2020-10-10 11:38:52 +00:00
if nickname != 'news':
content = \
2021-12-29 21:55:09 +00:00
add_html_tags(base_dir, http_prefix,
nickname, domain, content,
2021-12-31 17:38:22 +00:00
mentioned_recipients,
2022-07-18 16:18:04 +00:00
hashtags_dict, translate, True)
2020-02-21 15:17:55 +00:00
# replace emoji with unicode
2020-04-04 10:05:27 +00:00
tags = []
2021-12-31 15:16:53 +00:00
for tag_name, tag in hashtags_dict.items():
2020-02-21 15:17:55 +00:00
tags.append(tag)
2021-11-01 18:33:32 +00:00
2020-02-21 15:17:55 +00:00
# get list of tags
2020-10-10 11:38:52 +00:00
if nickname != 'news':
2021-11-01 17:12:17 +00:00
content = \
2021-12-29 21:55:09 +00:00
replace_emoji_from_tags(None, base_dir, content, tags, 'content',
2022-04-21 13:03:40 +00:00
False, True)
2020-02-21 15:17:55 +00:00
# remove replaced emoji
2021-12-31 15:16:53 +00:00
hashtags_dict_copy = hashtags_dict.copy()
for tag_name, tag in hashtags_dict_copy.items():
2020-02-21 15:17:55 +00:00
if tag.get('name'):
if tag['name'].startswith(':'):
if tag['name'] not in content:
2021-12-31 15:16:53 +00:00
del hashtags_dict[tag_name]
2020-02-21 15:17:55 +00:00
2021-12-31 15:16:53 +00:00
status_number, published = get_status_number()
new_post_id = \
2021-12-26 10:19:59 +00:00
local_actor_url(http_prefix, nickname, domain) + \
2021-12-31 15:16:53 +00:00
'/statuses/' + status_number
2020-04-04 10:05:27 +00:00
sensitive = False
summary = None
2019-06-29 10:23:40 +00:00
if subject:
2021-12-29 21:55:09 +00:00
summary = remove_invalid_chars(valid_content_warning(subject))
2020-04-04 10:05:27 +00:00
sensitive = True
2019-07-15 14:41:15 +00:00
2021-12-31 15:16:53 +00:00
to_recipients = []
to_cc = []
if to_url:
if not isinstance(to_url, str):
print('ERROR: to_url is not a string')
2019-08-11 18:32:29 +00:00
return None
2021-12-31 15:16:53 +00:00
to_recipients = [to_url]
2019-08-11 18:32:29 +00:00
2019-08-05 16:56:32 +00:00
# who to send to
2021-12-31 17:38:22 +00:00
if mentioned_recipients:
for mention in mentioned_recipients:
2021-12-31 15:16:53 +00:00
if mention not in to_cc:
to_cc.append(mention)
2019-08-09 11:12:08 +00:00
2021-12-31 15:16:53 +00:00
is_public = False
for recipient in to_recipients:
2021-12-06 16:02:47 +00:00
if recipient.endswith('#Public'):
2021-12-31 15:16:53 +00:00
is_public = True
2021-12-06 16:02:47 +00:00
break
2019-08-09 11:12:08 +00:00
# create a list of hashtags
2019-09-05 11:37:41 +00:00
# Only posts which are #Public are searchable by hashtag
2021-12-31 15:16:53 +00:00
if hashtags_dict:
for tag_name, tag in hashtags_dict.items():
2021-12-29 21:55:09 +00:00
if not post_tag_exists(tag['type'], tag['name'], tags):
2020-12-13 20:07:45 +00:00
tags.append(tag)
2021-12-31 15:16:53 +00:00
if is_public:
2022-08-22 20:14:37 +00:00
_update_hashtags_index(base_dir, tag, new_post_id, nickname)
2021-03-10 20:39:20 +00:00
# print('Content tags: ' + str(tags))
2021-06-26 21:29:49 +00:00
sensitive, summary = \
2021-12-29 21:55:09 +00:00
_create_post_cw_from_reply(base_dir, nickname, domain,
in_reply_to, sensitive, summary,
system_language, languages_understood)
2020-08-21 16:10:47 +00:00
2021-12-31 15:16:53 +00:00
event_date_str = \
_create_post_place_and_time(event_date, end_date,
event_time, end_time,
summary, content, schedule_post,
event_uuid, location, tags)
2019-10-19 15:59:49 +00:00
2021-12-31 15:16:53 +00:00
post_context = get_individual_post_context()
2020-03-22 21:16:02 +00:00
2021-12-31 15:16:53 +00:00
if not is_public:
2021-12-06 18:11:17 +00:00
# make sure that CC doesn't also contain a To address
# eg. To: [ "https://mydomain/users/foo/followers" ]
# CC: [ "X", "Y", "https://mydomain/users/foo", "Z" ]
2021-12-31 15:16:53 +00:00
remove_from_cc = []
for cc_recipient in to_cc:
2021-12-31 15:40:01 +00:00
for send_to_actor in to_recipients:
if cc_recipient in send_to_actor and \
2021-12-31 15:16:53 +00:00
cc_recipient not in remove_from_cc:
remove_from_cc.append(cc_recipient)
2021-12-06 18:11:17 +00:00
break
2021-12-31 15:16:53 +00:00
for cc_removal in remove_from_cc:
to_cc.remove(cc_removal)
2021-12-06 18:11:17 +00:00
else:
2021-12-31 15:16:53 +00:00
if in_reply_to:
2021-12-06 18:11:17 +00:00
# If this is a public post then get the actor being
# replied to end ensure that it is within the CC list
2021-12-31 15:16:53 +00:00
reply_actor = get_actor_from_in_reply_to(in_reply_to)
if reply_actor:
if reply_actor not in to_cc:
to_cc.append(reply_actor)
2020-08-21 11:08:31 +00:00
# the type of post to be made
2021-12-31 15:16:53 +00:00
post_object_type = 'Note'
if is_article:
post_object_type = 'Article'
2020-08-21 11:08:31 +00:00
2021-12-25 20:39:35 +00:00
if not client_to_server:
2021-12-31 15:16:53 +00:00
new_post = \
2021-12-29 21:55:09 +00:00
_create_post_s2s(base_dir, nickname, domain, port,
2021-12-31 15:16:53 +00:00
http_prefix, content, status_number,
published, new_post_id, post_context,
to_recipients, to_cc, in_reply_to,
sensitive, comments_enabled,
tags, attach_image_filename,
media_type, image_description, city,
post_object_type, summary,
in_reply_to_atom_uri, system_language,
conversation_id, low_bandwidth,
2021-12-29 21:55:09 +00:00
content_license_url)
2019-07-03 15:10:18 +00:00
else:
2021-12-31 15:16:53 +00:00
new_post = \
2021-12-29 21:55:09 +00:00
_create_post_c2s(base_dir, nickname, domain, port,
2021-12-31 15:16:53 +00:00
http_prefix, content, status_number,
published, new_post_id, post_context,
to_recipients, to_cc, in_reply_to,
sensitive, comments_enabled,
tags, attach_image_filename,
media_type, image_description, city,
post_object_type, summary,
in_reply_to_atom_uri, system_language,
conversation_id, low_bandwidth,
2021-12-29 21:55:09 +00:00
content_license_url)
2021-12-31 15:16:53 +00:00
_create_post_mentions(cc_url, new_post, to_recipients, tags)
2021-12-29 21:55:09 +00:00
2021-12-31 15:16:53 +00:00
_create_post_mod_report(base_dir, is_moderation_report,
new_post, new_post_id)
2019-08-11 20:38:10 +00:00
2020-05-03 12:52:13 +00:00
# If a patch has been posted - i.e. the output from
# git format-patch - then convert the activitypub type
2021-12-31 15:16:53 +00:00
convert_post_to_patch(base_dir, nickname, domain, new_post)
2021-12-31 15:16:53 +00:00
if schedule_post:
if event_date and event_time:
2020-01-12 20:53:00 +00:00
# add an item to the scheduled post index file
2021-12-29 21:55:09 +00:00
_add_schedule_post(base_dir, nickname, domain,
2021-12-31 15:16:53 +00:00
event_date_str, new_post_id)
save_post_to_box(base_dir, http_prefix, new_post_id,
nickname, domain, new_post, 'scheduled')
2020-01-12 20:53:00 +00:00
else:
2020-04-04 10:05:27 +00:00
print('Unable to create scheduled post without ' +
'date and time values')
2021-12-31 15:16:53 +00:00
return new_post
elif save_to_file:
if is_article:
save_post_to_box(base_dir, http_prefix, new_post_id,
nickname, domain, new_post, 'tlblogs')
2020-02-24 22:34:54 +00:00
else:
2021-12-31 15:16:53 +00:00
save_post_to_box(base_dir, http_prefix, new_post_id,
nickname, domain, new_post, 'outbox')
return new_post
2019-06-29 10:08:59 +00:00
2020-04-04 10:05:27 +00:00
2021-12-28 19:33:29 +00:00
def outbox_message_create_wrap(http_prefix: str,
nickname: str, domain: str, port: int,
message_json: {}) -> {}:
2019-07-03 21:37:46 +00:00
"""Wraps a received message in a Create
https://www.w3.org/TR/activitypub/#object-without-create
"""
2021-12-26 12:45:03 +00:00
domain = get_full_domain(domain, port)
2021-12-31 15:16:53 +00:00
status_number, published = get_status_number()
2021-12-25 23:51:19 +00:00
if message_json.get('published'):
published = message_json['published']
2021-12-31 15:16:53 +00:00
new_post_id = \
2021-12-26 10:19:59 +00:00
local_actor_url(http_prefix, nickname, domain) + \
2021-12-31 15:16:53 +00:00
'/statuses/' + status_number
2021-12-31 17:38:22 +00:00
cc_list = []
2021-12-25 23:51:19 +00:00
if message_json.get('cc'):
2021-12-31 17:38:22 +00:00
cc_list = message_json['cc']
2021-12-31 15:16:53 +00:00
new_post = {
2019-08-18 11:07:06 +00:00
"@context": "https://www.w3.org/ns/activitystreams",
2021-12-31 15:16:53 +00:00
'id': new_post_id + '/activity',
2019-07-03 21:37:46 +00:00
'type': 'Create',
2021-12-26 10:19:59 +00:00
'actor': local_actor_url(http_prefix, nickname, domain),
2019-07-03 21:37:46 +00:00
'published': published,
2021-12-25 23:51:19 +00:00
'to': message_json['to'],
2021-12-31 17:38:22 +00:00
'cc': cc_list,
2021-12-25 23:51:19 +00:00
'object': message_json
2019-07-03 21:37:46 +00:00
}
2021-12-31 15:16:53 +00:00
new_post['object']['id'] = new_post['id']
new_post['object']['url'] = \
http_prefix + '://' + domain + '/@' + nickname + '/' + status_number
new_post['object']['atomUri'] = \
2021-12-26 10:19:59 +00:00
local_actor_url(http_prefix, nickname, domain) + \
2021-12-31 15:16:53 +00:00
'/statuses/' + status_number
return new_post
2019-07-03 21:37:46 +00:00
2020-04-04 10:05:27 +00:00
2022-05-31 12:38:18 +00:00
def _post_is_addressed_to_followers(nickname: str, domain: str, port: int,
2021-12-29 21:55:09 +00:00
http_prefix: str,
post_json_object: {}) -> bool:
2019-07-08 13:30:04 +00:00
"""Returns true if the given post is addressed to followers of the nickname
"""
2021-12-26 12:45:03 +00:00
domain_full = get_full_domain(domain, port)
2019-07-08 13:30:04 +00:00
2021-12-25 22:09:19 +00:00
if not post_json_object.get('object'):
2019-07-08 13:30:04 +00:00
return False
2021-12-31 15:16:53 +00:00
to_list = []
cc_list = []
2021-12-25 22:09:19 +00:00
if post_json_object['type'] != 'Update' and \
2021-12-26 10:57:03 +00:00
has_object_dict(post_json_object):
2021-12-25 22:09:19 +00:00
if post_json_object['object'].get('to'):
2021-12-31 15:16:53 +00:00
to_list = post_json_object['object']['to']
2021-12-25 22:09:19 +00:00
if post_json_object['object'].get('cc'):
2021-12-31 15:16:53 +00:00
cc_list = post_json_object['object']['cc']
2019-07-16 19:07:45 +00:00
else:
2021-12-25 22:09:19 +00:00
if post_json_object.get('to'):
2021-12-31 15:16:53 +00:00
to_list = post_json_object['to']
2021-12-25 22:09:19 +00:00
if post_json_object.get('cc'):
2021-12-31 15:16:53 +00:00
cc_list = post_json_object['cc']
2020-03-22 21:16:02 +00:00
2021-12-31 15:16:53 +00:00
followers_url = \
2021-12-26 10:19:59 +00:00
local_actor_url(http_prefix, nickname, domain_full) + '/followers'
2019-07-08 13:30:04 +00:00
# does the followers url exist in 'to' or 'cc' lists?
2021-12-31 15:16:53 +00:00
addressed_to_followers = False
if followers_url in to_list:
addressed_to_followers = True
elif followers_url in cc_list:
addressed_to_followers = True
return addressed_to_followers
2019-07-08 13:30:04 +00:00
2020-04-04 10:05:27 +00:00
2021-12-28 19:33:29 +00:00
def pin_post(base_dir: str, nickname: str, domain: str,
2022-05-31 12:41:29 +00:00
pinned_content: str) -> None:
2021-01-24 18:09:21 +00:00
"""Pins the given post Id to the profile of then given account
"""
2021-12-31 15:16:53 +00:00
account_dir = acct_dir(base_dir, nickname, domain)
pinned_filename = account_dir + '/pinToProfile.txt'
2021-11-25 21:18:53 +00:00
try:
2022-06-09 14:46:30 +00:00
with open(pinned_filename, 'w+', encoding='utf-8') as pin_file:
2021-12-31 17:38:22 +00:00
pin_file.write(pinned_content)
2021-11-25 21:18:53 +00:00
except OSError:
2021-12-31 15:16:53 +00:00
print('EX: unable to write ' + pinned_filename)
2021-01-24 18:09:21 +00:00
2021-12-28 19:33:29 +00:00
def undo_pinned_post(base_dir: str, nickname: str, domain: str) -> None:
2021-01-24 18:35:42 +00:00
"""Removes pinned content for then given account
"""
2021-12-31 15:16:53 +00:00
account_dir = acct_dir(base_dir, nickname, domain)
pinned_filename = account_dir + '/pinToProfile.txt'
if not os.path.isfile(pinned_filename):
2021-12-29 21:55:09 +00:00
return
try:
2021-12-31 15:16:53 +00:00
os.remove(pinned_filename)
2021-12-29 21:55:09 +00:00
except OSError:
2021-12-31 15:16:53 +00:00
print('EX: undo_pinned_post unable to delete ' + pinned_filename)
2021-11-09 18:03:17 +00:00
2021-01-24 18:35:42 +00:00
2021-12-28 19:33:29 +00:00
def get_pinned_post_as_json(base_dir: str, http_prefix: str,
nickname: str, domain: str,
domain_full: str, system_language: str) -> {}:
2021-01-25 14:31:37 +00:00
"""Returns the pinned profile post as json
2021-01-24 21:35:26 +00:00
"""
2021-12-31 15:16:53 +00:00
account_dir = acct_dir(base_dir, nickname, domain)
pinned_filename = account_dir + '/pinToProfile.txt'
pinned_post_json = {}
2021-12-26 10:19:59 +00:00
actor = local_actor_url(http_prefix, nickname, domain_full)
2021-12-31 15:16:53 +00:00
if os.path.isfile(pinned_filename):
pinned_content = None
2022-06-09 14:46:30 +00:00
with open(pinned_filename, 'r', encoding='utf-8') as pin_file:
2021-12-31 15:16:53 +00:00
pinned_content = pin_file.read()
if pinned_content:
pinned_post_json = {
2021-01-24 21:35:26 +00:00
'atomUri': actor + '/pinned',
'attachment': [],
'attributedTo': actor,
'cc': [
actor + '/followers'
],
2021-12-31 15:16:53 +00:00
'content': pinned_content,
2021-01-24 21:35:26 +00:00
'contentMap': {
2021-12-31 15:16:53 +00:00
system_language: pinned_content
2021-01-24 21:35:26 +00:00
},
'id': actor + '/pinned',
'inReplyTo': None,
'inReplyToAtomUri': None,
2021-12-31 15:16:53 +00:00
'published': file_last_modified(pinned_filename),
2021-01-24 21:35:26 +00:00
'replies': {},
'sensitive': False,
'summary': None,
'tag': [],
'to': ['https://www.w3.org/ns/activitystreams#Public'],
'type': 'Note',
2021-12-26 17:21:37 +00:00
'url': replace_users_with_at(actor) + '/pinned'
2021-01-24 21:35:26 +00:00
}
2021-12-31 15:16:53 +00:00
return pinned_post_json
2021-01-25 14:31:37 +00:00
2021-12-28 19:33:29 +00:00
def json_pin_post(base_dir: str, http_prefix: str,
nickname: str, domain: str,
domain_full: str, system_language: str) -> {}:
2021-01-25 14:31:37 +00:00
"""Returns a pinned post as json
"""
2021-12-31 15:16:53 +00:00
pinned_post_json = \
2021-12-28 19:33:29 +00:00
get_pinned_post_as_json(base_dir, http_prefix,
nickname, domain,
domain_full, system_language)
2021-12-31 15:16:53 +00:00
items_list = []
if pinned_post_json:
items_list = [pinned_post_json]
2021-01-25 14:31:37 +00:00
2021-12-26 10:19:59 +00:00
actor = local_actor_url(http_prefix, nickname, domain_full)
2021-12-31 15:16:53 +00:00
post_context = get_individual_post_context()
2021-01-24 21:35:26 +00:00
return {
2021-12-31 15:16:53 +00:00
'@context': post_context,
2021-01-24 21:35:26 +00:00
'id': actor + '/collections/featured',
2021-12-31 15:16:53 +00:00
'orderedItems': items_list,
'totalItems': len(items_list),
2021-01-24 21:35:26 +00:00
'type': 'OrderedCollection'
}
2021-12-29 21:55:09 +00:00
def regenerate_index_for_box(base_dir: str,
2021-12-31 15:16:53 +00:00
nickname: str, domain: str,
box_name: str) -> None:
2021-08-01 19:19:45 +00:00
"""Generates an index for the given box if it doesn't exist
Used by unit tests to artificially create an index
"""
2021-12-31 15:16:53 +00:00
box_dir = acct_dir(base_dir, nickname, domain) + '/' + box_name
2021-12-31 17:38:22 +00:00
box_index_filename = box_dir + '.index'
2021-08-01 19:19:45 +00:00
2021-12-31 15:16:53 +00:00
if not os.path.isdir(box_dir):
2021-08-01 19:19:45 +00:00
return
2021-12-31 17:38:22 +00:00
if os.path.isfile(box_index_filename):
2021-08-01 19:19:45 +00:00
return
2021-12-31 15:16:53 +00:00
index_lines = []
2021-12-31 17:38:22 +00:00
for _, _, files in os.walk(box_dir):
for fname in files:
if ':##' not in fname:
2021-08-01 19:19:45 +00:00
continue
2021-12-31 17:38:22 +00:00
index_lines.append(fname)
2021-08-01 19:19:45 +00:00
break
2021-12-31 15:16:53 +00:00
index_lines.sort(reverse=True)
2021-08-01 19:19:45 +00:00
result = ''
2021-12-31 15:16:53 +00:00
try:
2022-06-09 14:46:30 +00:00
with open(box_index_filename, 'w+', encoding='utf-8') as fp_box:
2021-12-31 15:16:53 +00:00
for line in index_lines:
result += line + '\n'
fp_box.write(line + '\n')
except OSError:
print('EX: unable to generate index for ' + box_name + ' ' + result)
print('Index generated for ' + box_name + '\n' + result)
2021-08-01 19:19:45 +00:00
2021-12-28 19:33:29 +00:00
def create_public_post(base_dir: str,
nickname: str, domain: str, port: int, http_prefix: str,
2022-05-31 16:20:16 +00:00
content: str, save_to_file: bool,
2021-12-31 15:16:53 +00:00
client_to_server: bool, comments_enabled: bool,
attach_image_filename: str, media_type: str,
image_description: str, city: str,
in_reply_to: str,
in_reply_to_atom_uri: str, subject: str,
schedule_post: bool,
2022-05-23 12:14:36 +00:00
event_date: str, event_time: str, event_end_time: str,
location: str, is_article: bool, system_language: str,
2021-12-31 15:16:53 +00:00
conversation_id: str, low_bandwidth: bool,
content_license_url: str,
2022-07-18 16:18:04 +00:00
languages_understood: [], translate: {}) -> {}:
2019-07-27 22:48:34 +00:00
"""Public post
2019-06-30 10:14:02 +00:00
"""
2021-12-26 12:45:03 +00:00
domain_full = get_full_domain(domain, port)
2021-12-31 15:16:53 +00:00
is_moderation_report = False
event_uuid = None
2021-02-10 17:03:51 +00:00
category = None
2021-12-31 15:16:53 +00:00
join_mode = None
2022-05-23 12:14:36 +00:00
end_date = event_date
end_time = event_end_time
2021-12-31 15:16:53 +00:00
maximum_attendee_capacity = None
replies_moderation_option = None
anonymous_participation_enabled = None
event_status = None
ticket_url = None
local_actor = local_actor_url(http_prefix, nickname, domain_full)
2021-12-29 21:55:09 +00:00
return _create_post_base(base_dir, nickname, domain, port,
'https://www.w3.org/ns/activitystreams#Public',
2021-12-31 15:16:53 +00:00
local_actor + '/followers',
2022-05-31 16:20:16 +00:00
http_prefix, content, save_to_file,
2021-12-31 15:16:53 +00:00
client_to_server, comments_enabled,
attach_image_filename, media_type,
image_description, city,
is_moderation_report, is_article,
in_reply_to, in_reply_to_atom_uri, subject,
schedule_post, event_date, event_time, location,
event_uuid, category, join_mode,
end_date, end_time,
maximum_attendee_capacity,
replies_moderation_option,
anonymous_participation_enabled,
event_status, ticket_url, system_language,
conversation_id, low_bandwidth,
content_license_url,
2022-07-18 16:18:04 +00:00
languages_understood, translate)
2021-12-29 21:55:09 +00:00
def _append_citations_to_blog_post(base_dir: str,
nickname: str, domain: str,
2021-12-31 15:16:53 +00:00
blog_json: {}) -> None:
2021-02-11 10:01:27 +00:00
"""Appends any citations to a new blog post
"""
# append citations tags, stored in a file
2021-12-31 15:16:53 +00:00
citations_filename = \
2021-12-26 12:02:29 +00:00
acct_dir(base_dir, nickname, domain) + '/.citations.txt'
2021-12-31 15:16:53 +00:00
if not os.path.isfile(citations_filename):
2021-02-11 10:01:27 +00:00
return
2021-12-31 15:16:53 +00:00
citations_separator = '#####'
2022-06-09 14:46:30 +00:00
with open(citations_filename, 'r', encoding='utf-8') as fp_cit:
2021-12-31 17:38:22 +00:00
citations = fp_cit.readlines()
2021-02-11 10:01:27 +00:00
for line in citations:
2021-12-31 15:16:53 +00:00
if citations_separator not in line:
2021-02-11 10:01:27 +00:00
continue
2021-12-31 15:16:53 +00:00
sections = line.strip().split(citations_separator)
2021-02-11 10:01:27 +00:00
if len(sections) != 3:
continue
2021-12-31 15:40:01 +00:00
# date_str = sections[0]
2021-02-11 10:01:27 +00:00
title = sections[1]
link = sections[2]
2021-12-31 15:16:53 +00:00
tag_json = {
2021-02-11 10:01:27 +00:00
"type": "Article",
"name": title,
"url": link
}
2021-12-31 15:16:53 +00:00
blog_json['object']['tag'].append(tag_json)
2021-02-11 10:01:27 +00:00
2021-12-28 19:33:29 +00:00
def create_blog_post(base_dir: str,
nickname: str, domain: str, port: int, http_prefix: str,
2022-06-15 09:07:08 +00:00
content: str, save_to_file: bool,
2021-12-31 15:16:53 +00:00
client_to_server: bool, comments_enabled: bool,
attach_image_filename: str, media_type: str,
image_description: str, city: str,
in_reply_to: str, in_reply_to_atom_uri: str,
subject: str, schedule_post: bool,
2022-05-23 12:14:36 +00:00
event_date: str, event_time: str, event_end_time: str,
2021-12-28 19:33:29 +00:00
location: str, system_language: str,
2021-12-31 15:16:53 +00:00
conversation_id: str, low_bandwidth: bool,
content_license_url: str,
2022-07-18 16:18:04 +00:00
languages_understood: [], translate: {}) -> {}:
2021-12-31 15:16:53 +00:00
blog_json = \
2021-12-28 19:33:29 +00:00
create_public_post(base_dir,
nickname, domain, port, http_prefix,
2022-05-31 16:20:16 +00:00
content, save_to_file,
2021-12-31 15:16:53 +00:00
client_to_server, comments_enabled,
attach_image_filename, media_type,
image_description, city,
in_reply_to, in_reply_to_atom_uri, subject,
schedule_post,
2022-05-23 12:14:36 +00:00
event_date, event_time, event_end_time, location,
2021-12-31 15:16:53 +00:00
True, system_language, conversation_id,
low_bandwidth, content_license_url,
2022-07-18 16:18:04 +00:00
languages_understood, translate)
2021-12-31 15:16:53 +00:00
blog_json['object']['url'] = \
blog_json['object']['url'].replace('/@', '/users/')
_append_citations_to_blog_post(base_dir, nickname, domain, blog_json)
2020-11-06 11:21:41 +00:00
2021-12-31 15:16:53 +00:00
return blog_json
2020-10-07 21:26:03 +00:00
2021-12-29 21:55:09 +00:00
def create_news_post(base_dir: str,
domain: str, port: int, http_prefix: str,
2022-05-31 16:51:56 +00:00
content: str, save_to_file: bool,
2021-12-31 15:16:53 +00:00
attach_image_filename: str, media_type: str,
image_description: str, city: str,
2021-12-29 21:55:09 +00:00
subject: str, system_language: str,
2021-12-31 15:16:53 +00:00
conversation_id: str, low_bandwidth: bool,
content_license_url: str,
2022-07-18 16:18:04 +00:00
languages_understood: [], translate: {}) -> {}:
2021-12-25 20:39:35 +00:00
client_to_server = False
2021-12-31 15:16:53 +00:00
in_reply_to = None
in_reply_to_atom_uri = None
schedule_post = False
event_date = None
event_time = None
2022-05-23 12:14:36 +00:00
event_end_time = None
2020-10-07 21:26:03 +00:00
location = None
blog = \
2021-12-28 19:33:29 +00:00
create_public_post(base_dir,
'news', domain, port, http_prefix,
2022-05-31 16:20:16 +00:00
content, save_to_file,
2021-12-28 19:33:29 +00:00
client_to_server, False,
2021-12-31 15:16:53 +00:00
attach_image_filename, media_type,
image_description, city,
in_reply_to, in_reply_to_atom_uri, subject,
schedule_post,
2022-05-23 12:14:36 +00:00
event_date, event_time, event_end_time, location,
2021-12-31 15:16:53 +00:00
True, system_language, conversation_id,
low_bandwidth, content_license_url,
2022-07-18 16:18:04 +00:00
languages_understood, translate)
2020-10-07 12:05:49 +00:00
blog['object']['type'] = 'Article'
return blog
2021-12-28 19:33:29 +00:00
def create_question_post(base_dir: str,
nickname: str, domain: str, port: int,
http_prefix: str,
2021-12-31 15:16:53 +00:00
content: str, q_options: [],
2022-05-31 16:51:56 +00:00
save_to_file: bool,
2021-12-31 15:16:53 +00:00
client_to_server: bool, comments_enabled: bool,
attach_image_filename: str, media_type: str,
image_description: str, city: str,
2022-05-30 15:15:17 +00:00
subject: str, duration_days: int,
2021-12-28 19:33:29 +00:00
system_language: str, low_bandwidth: bool,
content_license_url: str,
2022-07-18 16:18:04 +00:00
languages_understood: [], translate: {}) -> {}:
2019-11-25 22:34:26 +00:00
"""Question post with multiple choice options
"""
2021-12-26 12:45:03 +00:00
domain_full = get_full_domain(domain, port)
2021-12-31 15:16:53 +00:00
local_actor = local_actor_url(http_prefix, nickname, domain_full)
2021-12-25 23:51:19 +00:00
message_json = \
2021-12-29 21:55:09 +00:00
_create_post_base(base_dir, nickname, domain, port,
'https://www.w3.org/ns/activitystreams#Public',
2021-12-31 15:16:53 +00:00
local_actor + '/followers',
2022-05-31 16:20:16 +00:00
http_prefix, content, save_to_file,
2021-12-31 15:16:53 +00:00
client_to_server, comments_enabled,
attach_image_filename, media_type,
image_description, city,
2021-12-29 21:55:09 +00:00
False, False, None, None, subject,
False, None, None, None, None, None,
None, None, None,
None, None, None, None, None, system_language,
None, low_bandwidth, content_license_url,
2022-07-18 16:18:04 +00:00
languages_understood, translate)
2021-12-25 23:51:19 +00:00
message_json['object']['type'] = 'Question'
message_json['object']['oneOf'] = []
message_json['object']['votersCount'] = 0
2021-12-26 13:17:46 +00:00
curr_time = datetime.datetime.utcnow()
2021-12-31 15:16:53 +00:00
days_since_epoch = \
2022-05-30 15:15:17 +00:00
int((curr_time - datetime.datetime(1970, 1, 1)).days + duration_days)
2021-12-31 15:16:53 +00:00
end_time = datetime.datetime(1970, 1, 1) + \
datetime.timedelta(days_since_epoch)
message_json['object']['endTime'] = end_time.strftime("%Y-%m-%dT%H:%M:%SZ")
for question_option in q_options:
2021-12-25 23:51:19 +00:00
message_json['object']['oneOf'].append({
2019-11-25 22:34:26 +00:00
"type": "Note",
2021-12-31 15:16:53 +00:00
"name": question_option,
2019-11-25 22:34:26 +00:00
"replies": {
"type": "Collection",
"totalItems": 0
}
})
2021-12-25 23:51:19 +00:00
return message_json
2019-11-25 22:34:26 +00:00
2020-02-24 13:32:19 +00:00
2021-12-28 19:33:29 +00:00
def create_unlisted_post(base_dir: str,
nickname: str, domain: str, port: int,
http_prefix: str,
2022-05-31 16:51:56 +00:00
content: str, save_to_file: bool,
2021-12-31 15:16:53 +00:00
client_to_server: bool, comments_enabled: bool,
attach_image_filename: str, media_type: str,
image_description: str, city: str,
in_reply_to: str, in_reply_to_atom_uri: str,
subject: str, schedule_post: bool,
2022-05-23 12:14:36 +00:00
event_date: str, event_time: str, event_end_time: str,
2021-12-28 19:33:29 +00:00
location: str, system_language: str,
2021-12-31 15:16:53 +00:00
conversation_id: str, low_bandwidth: bool,
content_license_url: str,
2022-07-18 16:18:04 +00:00
languages_understood: [], translate: {}) -> {}:
2019-07-28 11:08:14 +00:00
"""Unlisted post. This has the #Public and followers links inverted.
"""
2021-12-26 12:45:03 +00:00
domain_full = get_full_domain(domain, port)
2021-12-31 15:16:53 +00:00
local_actor = local_actor_url(http_prefix, nickname, domain_full)
2021-12-29 21:55:09 +00:00
return _create_post_base(base_dir, nickname, domain, port,
2021-12-31 15:16:53 +00:00
local_actor + '/followers',
2021-12-29 21:55:09 +00:00
'https://www.w3.org/ns/activitystreams#Public',
2022-05-31 16:20:16 +00:00
http_prefix, content, save_to_file,
2021-12-31 15:16:53 +00:00
client_to_server, comments_enabled,
attach_image_filename, media_type,
image_description, city,
2021-12-29 21:55:09 +00:00
False, False,
2021-12-31 15:16:53 +00:00
in_reply_to, in_reply_to_atom_uri, subject,
2022-05-23 12:14:36 +00:00
schedule_post, event_date,
2022-05-24 08:37:33 +00:00
event_time, location,
2022-05-23 12:14:36 +00:00
None, None, None, event_date, event_end_time,
2021-12-29 21:55:09 +00:00
None, None, None, None, None, system_language,
2021-12-31 15:16:53 +00:00
conversation_id, low_bandwidth,
2022-07-18 16:18:04 +00:00
content_license_url, languages_understood,
translate)
2020-04-04 10:05:27 +00:00
2019-07-28 11:08:14 +00:00
2021-12-28 19:33:29 +00:00
def create_followers_only_post(base_dir: str,
nickname: str, domain: str, port: int,
2022-05-31 16:51:56 +00:00
http_prefix: str, content: str,
2021-12-31 15:16:53 +00:00
save_to_file: bool,
client_to_server: bool, comments_enabled: bool,
attach_image_filename: str, media_type: str,
image_description: str, city: str,
in_reply_to: str,
in_reply_to_atom_uri: str,
subject: str, schedule_post: bool,
2022-05-23 12:14:36 +00:00
event_date: str,
event_time: str, event_end_time: str,
2021-12-28 19:33:29 +00:00
location: str, system_language: str,
2021-12-31 15:16:53 +00:00
conversation_id: str, low_bandwidth: bool,
content_license_url: str,
2022-07-18 16:18:04 +00:00
languages_understood: [],
translate: {}) -> {}:
2019-07-27 22:48:34 +00:00
"""Followers only post
"""
2021-12-26 12:45:03 +00:00
domain_full = get_full_domain(domain, port)
2021-12-31 15:16:53 +00:00
local_actor = local_actor_url(http_prefix, nickname, domain_full)
2021-12-29 21:55:09 +00:00
return _create_post_base(base_dir, nickname, domain, port,
2021-12-31 15:16:53 +00:00
local_actor + '/followers', None,
2022-05-31 16:20:16 +00:00
http_prefix, content, save_to_file,
2021-12-31 15:16:53 +00:00
client_to_server, comments_enabled,
attach_image_filename, media_type,
image_description, city,
2021-12-29 21:55:09 +00:00
False, False,
2021-12-31 15:16:53 +00:00
in_reply_to, in_reply_to_atom_uri, subject,
schedule_post, event_date, event_time, location,
2022-05-23 12:14:36 +00:00
None, None, None, event_date, event_end_time,
2021-12-29 21:55:09 +00:00
None, None, None, None, None, system_language,
2021-12-31 15:16:53 +00:00
conversation_id, low_bandwidth,
2022-07-18 16:18:04 +00:00
content_license_url, languages_understood,
translate)
2021-12-29 21:55:09 +00:00
def get_mentioned_people(base_dir: str, http_prefix: str,
content: str, domain: str, debug: bool) -> []:
2019-07-27 22:48:34 +00:00
"""Extracts a list of mentioned actors from the given message content
"""
if '@' not in content:
return None
2020-04-04 10:05:27 +00:00
mentions = []
words = content.split(' ')
2019-07-27 22:48:34 +00:00
for wrd in words:
2021-07-04 12:50:42 +00:00
if not wrd.startswith('@'):
continue
handle = wrd[1:]
if debug:
print('DEBUG: mentioned handle ' + handle)
if '@' not in handle:
handle = handle + '@' + domain
2021-12-25 16:17:53 +00:00
if not os.path.isdir(base_dir + '/accounts/' + handle):
2021-07-04 12:50:42 +00:00
continue
else:
2021-12-31 15:16:53 +00:00
external_domain = handle.split('@')[1]
if not ('.' in external_domain or
external_domain == 'localhost'):
2019-07-27 22:48:34 +00:00
continue
2021-12-31 15:16:53 +00:00
mentioned_nickname = handle.split('@')[0]
mentioned_domain = handle.split('@')[1].strip('\n').strip('\r')
if ':' in mentioned_domain:
mentioned_domain = remove_domain_port(mentioned_domain)
if not valid_nickname(mentioned_domain, mentioned_nickname):
2021-07-04 12:50:42 +00:00
continue
actor = \
2021-12-31 15:16:53 +00:00
local_actor_url(http_prefix, mentioned_nickname,
2021-12-26 10:19:59 +00:00
handle.split('@')[1])
2021-07-04 12:50:42 +00:00
mentions.append(actor)
2019-08-19 09:16:33 +00:00
return mentions
2019-07-27 22:48:34 +00:00
2020-04-04 10:05:27 +00:00
2021-12-28 19:33:29 +00:00
def create_direct_message_post(base_dir: str,
nickname: str, domain: str, port: int,
2022-05-31 16:51:56 +00:00
http_prefix: str, content: str,
2021-12-31 15:16:53 +00:00
save_to_file: bool, client_to_server: bool,
comments_enabled: bool,
attach_image_filename: str, media_type: str,
image_description: str, city: str,
in_reply_to: str,
in_reply_to_atom_uri: str,
2021-12-28 19:33:29 +00:00
subject: str, debug: bool,
2021-12-31 15:16:53 +00:00
schedule_post: bool,
event_date: str, event_time: str,
2022-05-23 12:14:36 +00:00
event_end_time: str,
2021-12-28 19:33:29 +00:00
location: str, system_language: str,
2021-12-31 15:16:53 +00:00
conversation_id: str, low_bandwidth: bool,
content_license_url: str,
languages_understood: [],
2022-07-18 16:18:04 +00:00
dm_is_chat: bool, translate: {}) -> {}:
2019-07-27 22:48:34 +00:00
"""Direct Message post
"""
2021-12-29 21:55:09 +00:00
content = resolve_petnames(base_dir, nickname, domain, content)
2021-12-31 15:16:53 +00:00
mentioned_people = \
2021-12-29 21:55:09 +00:00
get_mentioned_people(base_dir, http_prefix, content, domain, debug)
2019-08-19 09:11:25 +00:00
if debug:
2021-12-31 15:16:53 +00:00
print('mentioned_people: ' + str(mentioned_people))
if not mentioned_people:
2019-07-27 22:48:34 +00:00
return None
2021-12-31 15:16:53 +00:00
post_to = None
post_cc = None
2021-12-25 23:51:19 +00:00
message_json = \
2021-12-29 21:55:09 +00:00
_create_post_base(base_dir, nickname, domain, port,
2021-12-31 15:16:53 +00:00
post_to, post_cc,
2022-05-31 16:20:16 +00:00
http_prefix, content, save_to_file,
2021-12-31 15:16:53 +00:00
client_to_server, comments_enabled,
attach_image_filename, media_type,
image_description, city,
2021-12-29 21:55:09 +00:00
False, False,
2021-12-31 15:16:53 +00:00
in_reply_to, in_reply_to_atom_uri, subject,
schedule_post, event_date, event_time, location,
2022-05-23 12:14:36 +00:00
None, None, None, event_date, event_end_time,
2021-12-29 21:55:09 +00:00
None, None, None, None, None, system_language,
2021-12-31 15:16:53 +00:00
conversation_id, low_bandwidth,
2022-07-18 16:18:04 +00:00
content_license_url, languages_understood,
translate)
# mentioned recipients go into To rather than Cc
2021-12-25 23:51:19 +00:00
message_json['to'] = message_json['object']['cc']
message_json['object']['to'] = message_json['to']
message_json['cc'] = []
message_json['object']['cc'] = []
if dm_is_chat:
message_json['object']['type'] = 'ChatMessage'
2021-12-31 15:16:53 +00:00
if schedule_post:
2021-12-27 11:20:57 +00:00
post_id = remove_id_ending(message_json['object']['id'])
2021-12-28 18:13:52 +00:00
save_post_to_box(base_dir, http_prefix, post_id,
nickname, domain, message_json, 'scheduled')
2021-12-25 23:51:19 +00:00
return message_json
2021-12-28 19:33:29 +00:00
def create_report_post(base_dir: str,
nickname: str, domain: str, port: int, http_prefix: str,
2022-05-31 16:51:56 +00:00
content: str, save_to_file: bool,
2021-12-31 15:16:53 +00:00
client_to_server: bool, comments_enabled: bool,
attach_image_filename: str, media_type: str,
image_description: str, city: str,
2021-12-28 19:33:29 +00:00
debug: bool, subject: str, system_language: str,
low_bandwidth: bool,
content_license_url: str,
2022-07-18 16:18:04 +00:00
languages_understood: [], translate: {}) -> {}:
2019-08-11 11:25:27 +00:00
"""Send a report to moderators
"""
2021-12-26 12:45:03 +00:00
domain_full = get_full_domain(domain, port)
2019-08-11 11:25:27 +00:00
2019-08-11 11:33:29 +00:00
# add a title to distinguish moderation reports from other posts
2021-12-31 15:16:53 +00:00
report_title = 'Moderation Report'
2019-08-11 11:33:29 +00:00
if not subject:
2021-12-31 15:16:53 +00:00
subject = report_title
2019-08-11 11:33:29 +00:00
else:
2021-12-31 15:16:53 +00:00
if not subject.startswith(report_title):
subject = report_title + ': ' + subject
2019-08-11 11:33:29 +00:00
2019-08-11 13:02:36 +00:00
# create the list of moderators from the moderators file
2021-12-31 15:16:53 +00:00
moderators_list = []
moderators_file = base_dir + '/accounts/moderators.txt'
if os.path.isfile(moderators_file):
2022-06-09 14:46:30 +00:00
with open(moderators_file, 'r', encoding='utf-8') as fp_mod:
2022-01-03 15:09:45 +00:00
for line in fp_mod:
2020-05-22 11:32:38 +00:00
line = line.strip('\n').strip('\r')
2019-08-11 11:25:27 +00:00
if line.startswith('#'):
continue
if line.startswith('/users/'):
2020-04-04 10:05:27 +00:00
line = line.replace('users', '')
2019-08-11 11:25:27 +00:00
if line.startswith('@'):
2020-04-04 10:05:27 +00:00
line = line[1:]
2019-08-11 11:25:27 +00:00
if '@' in line:
2021-08-14 11:13:39 +00:00
nick = line.split('@')[0]
2021-12-31 15:16:53 +00:00
moderator_actor = \
2021-12-26 10:19:59 +00:00
local_actor_url(http_prefix, nick, domain_full)
2021-12-31 15:16:53 +00:00
if moderator_actor not in moderators_list:
moderators_list.append(moderator_actor)
2019-08-11 11:25:27 +00:00
continue
2022-04-29 13:54:13 +00:00
if line.startswith('http') or \
line.startswith('ipfs') or \
line.startswith('ipns') or \
line.startswith('hyper'):
2019-08-11 11:25:27 +00:00
# must be a local address - no remote moderators
2021-12-26 10:00:46 +00:00
if '://' + domain_full + '/' in line:
2021-12-31 15:16:53 +00:00
if line not in moderators_list:
moderators_list.append(line)
2019-08-11 11:25:27 +00:00
else:
if '/' not in line:
2021-12-31 15:16:53 +00:00
moderator_actor = \
2021-12-26 10:19:59 +00:00
local_actor_url(http_prefix, line, domain_full)
2021-12-31 15:16:53 +00:00
if moderator_actor not in moderators_list:
moderators_list.append(moderator_actor)
if len(moderators_list) == 0:
2019-08-11 11:25:27 +00:00
# if there are no moderators then the admin becomes the moderator
2021-12-31 15:16:53 +00:00
admin_nickname = get_config_param(base_dir, 'admin')
if admin_nickname:
local_actor = \
local_actor_url(http_prefix, admin_nickname, domain_full)
moderators_list.append(local_actor)
if not moderators_list:
2019-08-11 11:25:27 +00:00
return None
if debug:
print('DEBUG: Sending report to moderators')
2021-12-31 15:16:53 +00:00
print(str(moderators_list))
post_to = moderators_list
post_cc = None
2021-12-25 22:09:19 +00:00
post_json_object = None
2021-12-31 15:16:53 +00:00
for to_url in post_to:
2019-11-16 22:09:54 +00:00
# who is this report going to?
2021-12-31 15:16:53 +00:00
to_nickname = to_url.split('/users/')[1]
handle = to_nickname + '@' + domain
2020-04-04 10:05:27 +00:00
2021-12-25 22:09:19 +00:00
post_json_object = \
2021-12-29 21:55:09 +00:00
_create_post_base(base_dir, nickname, domain, port,
2021-12-31 15:16:53 +00:00
to_url, post_cc,
2022-05-31 16:20:16 +00:00
http_prefix, content, save_to_file,
2021-12-31 15:16:53 +00:00
client_to_server, comments_enabled,
attach_image_filename, media_type,
image_description, city,
2021-12-29 21:55:09 +00:00
True, False, None, None, subject,
False, None, None, None, None, None,
None, None, None,
None, None, None, None, None, system_language,
None, low_bandwidth, content_license_url,
2022-07-18 16:18:04 +00:00
languages_understood, translate)
2021-12-25 22:09:19 +00:00
if not post_json_object:
continue
2019-11-16 18:14:00 +00:00
# save a notification file so that the moderator
# knows something new has appeared
2021-12-31 15:16:53 +00:00
new_report_file = base_dir + '/accounts/' + handle + '/.newReport'
if os.path.isfile(new_report_file):
continue
try:
2022-06-09 14:46:30 +00:00
with open(new_report_file, 'w+', encoding='utf-8') as fp_report:
2021-12-31 15:16:53 +00:00
fp_report.write(to_url + '/moderation')
2021-11-25 22:22:54 +00:00
except OSError:
2021-12-31 15:16:53 +00:00
print('EX: create_report_post unable to write ' + new_report_file)
2021-12-25 22:09:19 +00:00
return post_json_object
2019-08-11 11:25:27 +00:00
2020-04-04 10:05:27 +00:00
2021-12-31 15:40:01 +00:00
def thread_send_post(session, post_json_str: str, federation_list: [],
2021-12-31 15:16:53 +00:00
inbox_url: str, base_dir: str,
signature_header_json: {},
signature_header_json_ld: {},
2022-02-28 11:55:36 +00:00
post_log: [], debug: bool,
http_prefix: str, domain_full: str) -> None:
"""Sends a with retries
2019-06-30 13:38:01 +00:00
"""
2020-04-04 10:05:27 +00:00
tries = 0
2021-12-31 15:16:53 +00:00
send_interval_sec = 30
2021-12-31 17:38:22 +00:00
for _ in range(20):
2021-12-31 15:16:53 +00:00
post_result = None
2020-04-04 10:05:27 +00:00
unauthorized = False
2021-02-02 21:08:33 +00:00
if debug:
2021-12-31 15:16:53 +00:00
print('Getting post_json_string for ' + inbox_url)
2019-10-14 21:05:14 +00:00
try:
2021-12-31 15:16:53 +00:00
post_result, unauthorized, return_code = \
2021-12-31 15:40:01 +00:00
post_json_string(session, post_json_str, federation_list,
2021-12-31 15:16:53 +00:00
inbox_url, signature_header_json,
2022-02-28 11:55:36 +00:00
debug, http_prefix, domain_full)
2022-06-15 09:07:08 +00:00
if return_code in range(500, 600):
2021-10-18 10:42:17 +00:00
# if an instance is returning a code which indicates that
# it might have a runtime error, like 503, then don't
# continue to post to it
2021-10-18 10:20:57 +00:00
break
2021-02-02 21:08:33 +00:00
if debug:
2021-12-31 15:16:53 +00:00
print('Obtained post_json_string for ' + inbox_url +
2021-02-02 21:08:33 +00:00
' unauthorized: ' + str(unauthorized))
2021-12-25 15:28:52 +00:00
except Exception as ex:
2021-12-29 21:55:09 +00:00
print('ERROR: post_json_string failed ' + str(ex))
if unauthorized:
# try again with application/ld+json header
post_result = None
unauthorized = False
if debug:
print('Getting ld post_json_string for ' + inbox_url)
try:
post_result, unauthorized, return_code = \
post_json_string(session, post_json_str, federation_list,
inbox_url, signature_header_json_ld,
2022-02-28 11:55:36 +00:00
debug, http_prefix, domain_full)
2022-06-15 09:07:08 +00:00
if return_code in range(500, 600):
# if an instance is returning a code which indicates that
# it might have a runtime error, like 503, then don't
# continue to post to it
break
if debug:
print('Obtained ld post_json_string for ' + inbox_url +
' unauthorized: ' + str(unauthorized))
except Exception as ex:
print('ERROR: ld post_json_string failed ' + str(ex))
2020-04-04 10:05:27 +00:00
if unauthorized:
2021-12-29 21:55:09 +00:00
print('WARN: thread_send_post: Post is unauthorized ' +
2021-12-31 15:40:01 +00:00
inbox_url + ' ' + post_json_str)
2019-10-23 18:44:03 +00:00
break
2021-12-31 15:16:53 +00:00
if post_result:
2021-12-31 15:40:01 +00:00
log_str = 'Success on try ' + str(tries) + ': ' + post_json_str
2019-08-21 21:05:37 +00:00
else:
2021-12-31 15:40:01 +00:00
log_str = 'Retry ' + str(tries) + ': ' + post_json_str
post_log.append(log_str)
2019-08-21 21:05:37 +00:00
# keep the length of the log finite
# Don't accumulate massive files on systems with limited resources
2021-12-31 15:16:53 +00:00
while len(post_log) > 16:
post_log.pop(0)
2019-10-16 11:27:43 +00:00
if debug:
# save the log file
2021-12-31 15:16:53 +00:00
post_log_filename = base_dir + '/post.log'
if os.path.isfile(post_log_filename):
2022-06-09 14:46:30 +00:00
with open(post_log_filename, 'a+',
encoding='utf-8') as log_file:
2021-12-31 15:40:01 +00:00
log_file.write(log_str + '\n')
else:
2022-06-09 14:46:30 +00:00
with open(post_log_filename, 'w+',
encoding='utf-8') as log_file:
2021-12-31 15:40:01 +00:00
log_file.write(log_str + '\n')
2019-08-21 21:05:37 +00:00
2021-12-31 15:16:53 +00:00
if post_result:
2019-07-06 13:49:25 +00:00
if debug:
2021-12-31 15:16:53 +00:00
print('DEBUG: successful json post to ' + inbox_url)
2019-06-30 13:38:01 +00:00
# our work here is done
2019-06-30 13:20:23 +00:00
break
2019-07-06 13:49:25 +00:00
if debug:
2021-12-31 15:40:01 +00:00
print(post_json_str)
2021-12-31 15:16:53 +00:00
print('DEBUG: json post to ' + inbox_url +
2020-04-04 10:05:27 +00:00
' failed. Waiting for ' +
2021-12-31 15:16:53 +00:00
str(send_interval_sec) + ' seconds.')
time.sleep(send_interval_sec)
2020-04-04 10:05:27 +00:00
tries += 1
2021-12-29 21:55:09 +00:00
def send_post(signing_priv_key_pem: str, project_version: str,
session, base_dir: str, nickname: str, domain: str, port: int,
2022-05-30 21:43:39 +00:00
to_nickname: str, to_domain: str, to_port: int, cc_str: str,
2022-05-31 16:51:56 +00:00
http_prefix: str, content: str,
2021-12-31 15:16:53 +00:00
save_to_file: bool, client_to_server: bool,
comments_enabled: bool,
attach_image_filename: str, media_type: str,
image_description: str, city: str,
federation_list: [], send_threads: [], post_log: [],
2021-12-29 21:55:09 +00:00
cached_webfingers: {}, person_cache: {},
2021-12-31 15:16:53 +00:00
is_article: bool, system_language: str,
languages_understood: [],
2021-12-29 21:55:09 +00:00
shared_items_federated_domains: [],
2021-12-31 15:16:53 +00:00
shared_item_federation_tokens: {},
2021-12-29 21:55:09 +00:00
low_bandwidth: bool, content_license_url: str,
2022-07-18 16:18:04 +00:00
translate: {},
2021-12-31 15:16:53 +00:00
debug: bool = False, in_reply_to: str = None,
in_reply_to_atom_uri: str = None, subject: str = None) -> int:
2021-08-05 11:24:24 +00:00
"""Post to another inbox. Used by unit tests.
2019-06-30 10:14:02 +00:00
"""
2021-12-31 15:16:53 +00:00
with_digest = True
conversation_id = None
2019-07-01 09:31:02 +00:00
2021-12-31 15:16:53 +00:00
if to_nickname == 'inbox':
# shared inbox actor on @domain@domain
2021-12-31 15:16:53 +00:00
to_nickname = to_domain
2021-12-31 15:16:53 +00:00
to_domain = get_full_domain(to_domain, to_port)
2019-06-30 22:56:37 +00:00
2021-12-31 15:16:53 +00:00
handle = http_prefix + '://' + to_domain + '/@' + to_nickname
2019-06-30 22:56:37 +00:00
# lookup the inbox for the To handle
2021-12-31 15:16:53 +00:00
wf_request = webfinger_handle(session, handle, http_prefix,
cached_webfingers,
domain, project_version, debug, False,
signing_priv_key_pem)
if not wf_request:
2019-06-30 10:14:02 +00:00
return 1
2021-12-31 15:16:53 +00:00
if not isinstance(wf_request, dict):
2020-06-23 10:41:12 +00:00
print('WARN: Webfinger for ' + handle + ' did not return a dict. ' +
2021-12-31 15:16:53 +00:00
str(wf_request))
2020-06-23 10:41:12 +00:00
return 1
2019-06-30 10:14:02 +00:00
2021-12-25 20:39:35 +00:00
if not client_to_server:
2021-12-31 15:16:53 +00:00
post_to_box = 'inbox'
2019-07-05 22:13:20 +00:00
else:
2021-12-31 15:16:53 +00:00
post_to_box = 'outbox'
if is_article:
post_to_box = 'tlblogs'
2019-07-05 22:13:20 +00:00
2019-06-30 22:56:37 +00:00
# get the actor inbox for the To handle
2021-12-31 15:16:53 +00:00
origin_domain = domain
2021-12-31 17:38:22 +00:00
(inbox_url, _, pub_key, to_person_id, _, _,
_, _) = get_person_box(signing_priv_key_pem,
origin_domain,
base_dir, session, wf_request,
person_cache,
project_version, http_prefix,
nickname, domain, post_to_box,
72533)
2021-12-31 15:16:53 +00:00
if not inbox_url:
2019-06-30 10:14:02 +00:00
return 3
2021-12-31 15:16:53 +00:00
if not pub_key:
2019-06-30 10:14:02 +00:00
return 4
2021-12-31 15:16:53 +00:00
if not to_person_id:
2019-07-05 22:13:20 +00:00
return 5
2021-12-31 15:16:53 +00:00
# shared_inbox is optional
2020-03-22 21:16:02 +00:00
2021-12-25 22:09:19 +00:00
post_json_object = \
2021-12-29 21:55:09 +00:00
_create_post_base(base_dir, nickname, domain, port,
2022-05-30 21:43:39 +00:00
to_person_id, cc_str, http_prefix, content,
2022-05-31 16:20:16 +00:00
save_to_file, client_to_server,
2021-12-31 15:16:53 +00:00
comments_enabled,
attach_image_filename, media_type,
image_description, city,
False, is_article, in_reply_to,
in_reply_to_atom_uri, subject,
2021-12-29 21:55:09 +00:00
False, None, None, None, None, None,
None, None, None,
None, None, None, None, None, system_language,
2021-12-31 15:16:53 +00:00
conversation_id, low_bandwidth,
2022-07-18 16:18:04 +00:00
content_license_url, languages_understood,
translate)
2019-06-30 10:14:02 +00:00
2019-06-30 22:56:37 +00:00
# get the senders private key
2021-12-31 15:16:53 +00:00
private_key_pem = _get_person_key(nickname, domain, base_dir, 'private')
if len(private_key_pem) == 0:
2019-07-05 22:13:20 +00:00
return 6
2019-06-30 10:14:02 +00:00
2021-12-31 15:16:53 +00:00
if to_domain not in inbox_url:
2019-07-05 22:13:20 +00:00
return 7
2021-12-31 15:40:01 +00:00
post_path = inbox_url.split(to_domain, 1)[1]
2021-12-25 22:09:19 +00:00
if not post_json_object.get('signature'):
try:
2021-12-31 15:16:53 +00:00
signed_post_json_object = post_json_object.copy()
generate_json_signature(signed_post_json_object, private_key_pem)
post_json_object = signed_post_json_object
2021-12-25 15:28:52 +00:00
except Exception as ex:
print('WARN: send_post failed to JSON-LD sign post, ' + str(ex))
pprint(signed_post_json_object)
2020-06-15 13:08:19 +00:00
# convert json to string so that there are no
# subsequent conversions after creating message body digest
2021-12-31 15:40:01 +00:00
post_json_str = json.dumps(post_json_object)
# construct the http header, including the message body digest
2021-12-31 15:16:53 +00:00
signature_header_json = \
create_signed_header(None, private_key_pem, nickname, domain, port,
to_domain, to_port,
2021-12-31 15:40:01 +00:00
post_path, http_prefix, with_digest,
post_json_str, 'application/activity+json')
signature_header_json_ld = \
create_signed_header(None, private_key_pem, nickname, domain, port,
to_domain, to_port,
post_path, http_prefix, with_digest,
post_json_str, 'application/ld+json')
2019-07-05 18:57:19 +00:00
2021-08-05 11:24:24 +00:00
# if the "to" domain is within the shared items
# federation list then send the token for this domain
# so that it can request a catalog
2022-02-28 11:55:36 +00:00
domain_full = get_full_domain(domain, port)
2021-12-31 15:16:53 +00:00
if to_domain in shared_items_federated_domains:
if shared_item_federation_tokens.get(domain_full):
signature_header_json['Origin'] = domain_full
2022-02-27 23:46:52 +00:00
signature_header_json_ld['Origin'] = domain_full
2021-12-31 15:16:53 +00:00
signature_header_json['SharesCatalog'] = \
shared_item_federation_tokens[domain_full]
2022-02-27 23:46:52 +00:00
signature_header_json_ld['SharesCatalog'] = \
shared_item_federation_tokens[domain_full]
2021-08-05 11:24:24 +00:00
if debug:
print('SharesCatalog added to header')
elif debug:
2021-12-31 15:16:53 +00:00
print(domain_full + ' not in shared_item_federation_tokens')
2021-08-05 11:24:24 +00:00
elif debug:
2021-12-31 15:16:53 +00:00
print(to_domain + ' not in shared_items_federated_domains ' +
2021-12-25 18:05:01 +00:00
str(shared_items_federated_domains))
2021-08-05 11:24:24 +00:00
if debug:
2021-12-31 15:16:53 +00:00
print('signature_header_json: ' + str(signature_header_json))
2021-08-05 11:24:24 +00:00
2019-07-05 18:57:19 +00:00
# Keep the number of threads being used small
2021-12-25 21:37:41 +00:00
while len(send_threads) > 1000:
2019-10-16 14:46:29 +00:00
print('WARN: Maximum threads reached - killing send thread')
2021-12-25 21:37:41 +00:00
send_threads[0].kill()
send_threads.pop(0)
2019-10-16 14:46:29 +00:00
print('WARN: thread killed')
2022-03-13 11:01:07 +00:00
print('THREAD: thread_send_post')
2020-04-04 10:05:27 +00:00
thr = \
2021-12-29 21:55:09 +00:00
thread_with_trace(target=thread_send_post,
2021-12-28 21:36:27 +00:00
args=(session,
2021-12-31 15:40:01 +00:00
post_json_str,
2021-12-28 21:36:27 +00:00
federation_list,
2021-12-31 15:16:53 +00:00
inbox_url, base_dir,
2022-04-03 12:43:20 +00:00
signature_header_json.copy(),
signature_header_json_ld.copy(),
2022-02-28 11:55:36 +00:00
post_log, debug, http_prefix,
domain_full), daemon=True)
2021-12-25 21:37:41 +00:00
send_threads.append(thr)
2022-07-28 09:59:18 +00:00
begin_thread(thr, 'send_post')
2019-07-05 18:57:19 +00:00
return 0
2020-04-04 10:05:27 +00:00
2021-12-29 21:55:09 +00:00
def send_post_via_server(signing_priv_key_pem: str, project_version: str,
base_dir: str, session,
2021-12-31 15:16:53 +00:00
from_nickname: str, password: str,
from_domain: str, from_port: int,
to_nickname: str, to_domain: str, to_port: int,
2022-05-30 15:15:17 +00:00
cc_str: str,
2022-05-31 16:51:56 +00:00
http_prefix: str, content: str,
2021-12-31 15:16:53 +00:00
comments_enabled: bool,
attach_image_filename: str, media_type: str,
image_description: str, city: str,
2021-12-29 21:55:09 +00:00
cached_webfingers: {}, person_cache: {},
2021-12-31 15:16:53 +00:00
is_article: bool, system_language: str,
languages_understood: [],
2021-12-29 21:55:09 +00:00
low_bandwidth: bool,
content_license_url: str,
2022-05-23 12:14:36 +00:00
event_date: str, event_time: str, event_end_time: str,
2022-07-18 16:18:04 +00:00
location: str, translate: {},
2021-12-29 21:55:09 +00:00
debug: bool = False,
2021-12-31 15:16:53 +00:00
in_reply_to: str = None,
in_reply_to_atom_uri: str = None,
conversation_id: str = None,
2021-12-29 21:55:09 +00:00
subject: str = None) -> int:
2019-07-16 10:19:04 +00:00
"""Send a post via a proxy (c2s)
"""
2019-07-16 11:33:40 +00:00
if not session:
2021-12-29 21:55:09 +00:00
print('WARN: No session for send_post_via_server')
2019-07-16 11:33:40 +00:00
return 6
2019-07-16 10:19:04 +00:00
2021-12-31 15:16:53 +00:00
from_domain_full = get_full_domain(from_domain, from_port)
2019-07-16 10:19:04 +00:00
2021-12-31 15:16:53 +00:00
handle = http_prefix + '://' + from_domain_full + '/@' + from_nickname
2019-07-16 10:19:04 +00:00
# lookup the inbox for the To handle
2021-12-31 15:16:53 +00:00
wf_request = \
2021-12-29 21:55:09 +00:00
webfinger_handle(session, handle, http_prefix, cached_webfingers,
2021-12-31 15:16:53 +00:00
from_domain_full, project_version, debug, False,
2021-12-29 21:55:09 +00:00
signing_priv_key_pem)
2021-12-31 15:16:53 +00:00
if not wf_request:
2019-07-16 10:19:04 +00:00
if debug:
2021-03-18 10:01:01 +00:00
print('DEBUG: post webfinger failed for ' + handle)
2019-07-16 10:19:04 +00:00
return 1
2021-12-31 15:16:53 +00:00
if not isinstance(wf_request, dict):
2021-03-18 10:01:01 +00:00
print('WARN: post webfinger for ' + handle +
2021-12-31 15:16:53 +00:00
' did not return a dict. ' + str(wf_request))
2020-06-23 10:41:12 +00:00
return 1
2019-07-16 10:19:04 +00:00
2021-12-31 15:16:53 +00:00
post_to_box = 'outbox'
if is_article:
post_to_box = 'tlblogs'
2019-07-16 10:19:04 +00:00
# get the actor inbox for the To handle
2021-12-31 15:16:53 +00:00
origin_domain = from_domain
2021-12-31 17:38:22 +00:00
(inbox_url, _, _, from_person_id, _, _,
_, _) = get_person_box(signing_priv_key_pem,
origin_domain,
base_dir, session, wf_request,
person_cache,
project_version, http_prefix,
from_nickname,
from_domain_full, post_to_box,
82796)
2021-12-31 15:16:53 +00:00
if not inbox_url:
2019-07-16 10:19:04 +00:00
if debug:
2021-12-31 15:16:53 +00:00
print('DEBUG: post no ' + post_to_box +
2021-03-18 10:01:01 +00:00
' was found for ' + handle)
2019-07-16 10:19:04 +00:00
return 3
2021-12-31 15:16:53 +00:00
if not from_person_id:
2019-07-16 10:19:04 +00:00
if debug:
2021-03-18 10:01:01 +00:00
print('DEBUG: post no actor was found for ' + handle)
2019-07-16 10:19:04 +00:00
return 4
# Get the json for the c2s post, not saving anything to file
2021-12-25 16:17:53 +00:00
# Note that base_dir is set to None
2021-12-31 15:16:53 +00:00
save_to_file = False
2021-12-25 20:39:35 +00:00
client_to_server = True
2021-12-31 15:16:53 +00:00
if to_domain.lower().endswith('public'):
to_person_id = 'https://www.w3.org/ns/activitystreams#Public'
2022-05-30 15:15:17 +00:00
cc_str = \
local_actor_url(http_prefix, from_nickname, from_domain_full) + \
2021-08-14 11:13:39 +00:00
'/followers'
2019-07-17 14:43:51 +00:00
else:
2021-12-31 15:16:53 +00:00
if to_domain.lower().endswith('followers') or \
to_domain.lower().endswith('followersonly'):
to_person_id = \
local_actor_url(http_prefix,
from_nickname, from_domain_full) + \
2021-08-14 11:13:39 +00:00
'/followers'
2019-07-17 14:43:51 +00:00
else:
2021-12-31 17:38:22 +00:00
to_domain_full = get_full_domain(to_domain, to_port)
2021-12-31 15:16:53 +00:00
to_person_id = \
2021-12-31 17:38:22 +00:00
local_actor_url(http_prefix, to_nickname, to_domain_full)
2020-04-04 10:05:27 +00:00
2021-12-25 22:09:19 +00:00
post_json_object = \
2021-12-29 21:55:09 +00:00
_create_post_base(base_dir,
2021-12-31 15:16:53 +00:00
from_nickname, from_domain, from_port,
2022-05-30 15:15:17 +00:00
to_person_id, cc_str, http_prefix, content,
2022-05-31 16:20:16 +00:00
save_to_file, client_to_server,
2021-12-31 15:16:53 +00:00
comments_enabled,
attach_image_filename, media_type,
image_description, city,
False, is_article, in_reply_to,
in_reply_to_atom_uri, subject,
False,
event_date, event_time, location,
2022-05-23 12:14:36 +00:00
None, None, None, event_date, event_end_time,
2021-12-29 21:55:09 +00:00
None, None, None, None, None, system_language,
2021-12-31 15:16:53 +00:00
conversation_id, low_bandwidth,
2022-07-18 16:18:04 +00:00
content_license_url, languages_understood,
translate)
2020-04-04 10:05:27 +00:00
2021-12-31 15:16:53 +00:00
auth_header = create_basic_auth_header(from_nickname, password)
2019-07-16 14:23:06 +00:00
2021-12-31 15:16:53 +00:00
if attach_image_filename:
2020-04-04 10:05:27 +00:00
headers = {
2021-12-31 15:16:53 +00:00
'host': from_domain_full,
'Authorization': auth_header
2020-03-22 20:36:19 +00:00
}
2021-12-31 15:16:53 +00:00
post_result = \
post_image(session, attach_image_filename, [],
2022-02-28 11:55:36 +00:00
inbox_url, headers, http_prefix, from_domain_full)
2021-12-31 15:16:53 +00:00
if not post_result:
2020-04-04 10:05:27 +00:00
if debug:
2021-03-18 10:01:01 +00:00
print('DEBUG: post failed to upload image')
2020-04-04 10:08:37 +00:00
# return 9
2020-04-04 10:05:27 +00:00
headers = {
2021-12-31 15:16:53 +00:00
'host': from_domain_full,
2020-04-04 10:05:27 +00:00
'Content-type': 'application/json',
2021-12-31 15:16:53 +00:00
'Authorization': auth_header
2020-03-22 20:36:19 +00:00
}
2021-12-31 15:16:53 +00:00
post_dumps = json.dumps(post_json_object)
post_result, unauthorized, return_code = \
post_json_string(session, post_dumps, [],
2022-02-28 11:55:36 +00:00
inbox_url, headers, debug,
http_prefix, from_domain_full,
5, True)
2021-12-31 15:16:53 +00:00
if not post_result:
2020-04-04 10:05:27 +00:00
if debug:
2021-10-18 10:20:57 +00:00
if unauthorized:
print('DEBUG: POST failed for c2s to ' +
2021-12-31 15:16:53 +00:00
inbox_url + ' unathorized')
2021-10-18 10:20:57 +00:00
else:
2021-12-29 21:55:09 +00:00
print('DEBUG: POST failed for c2s to ' +
2021-12-31 15:16:53 +00:00
inbox_url + ' return code ' + str(return_code))
2020-04-04 10:05:27 +00:00
return 5
2019-07-16 10:19:04 +00:00
if debug:
print('DEBUG: c2s POST success')
return 0
2020-04-04 10:05:27 +00:00
2021-12-29 21:55:09 +00:00
def group_followers_by_domain(base_dir: str, nickname: str, domain: str) -> {}:
"""Returns a dictionary with followers grouped by domain
"""
2020-04-04 10:05:27 +00:00
handle = nickname + '@' + domain
2021-12-31 15:16:53 +00:00
followers_filename = base_dir + '/accounts/' + handle + '/followers.txt'
if not os.path.isfile(followers_filename):
return None
2020-04-04 10:05:27 +00:00
grouped = {}
2022-06-09 14:46:30 +00:00
with open(followers_filename, 'r', encoding='utf-8') as foll_file:
2021-12-31 15:16:53 +00:00
for follower_handle in foll_file:
if '@' not in follower_handle:
2021-07-04 12:50:42 +00:00
continue
2022-06-21 11:58:50 +00:00
fhandle1 = follower_handle.strip()
fhandle = remove_eol(fhandle1)
2021-12-31 17:38:22 +00:00
follower_domain = fhandle.split('@')[1]
2021-12-31 15:16:53 +00:00
if not grouped.get(follower_domain):
2021-12-31 17:38:22 +00:00
grouped[follower_domain] = [fhandle]
2021-07-04 12:50:42 +00:00
else:
2021-12-31 17:38:22 +00:00
grouped[follower_domain].append(fhandle)
return grouped
2019-10-16 10:58:31 +00:00
2020-04-04 10:05:27 +00:00
2021-12-29 21:55:09 +00:00
def _add_followers_to_public_post(post_json_object: {}) -> None:
2019-10-16 10:58:31 +00:00
"""Adds followers entry to cc if it doesn't exist
"""
2021-12-25 22:09:19 +00:00
if not post_json_object.get('actor'):
2019-10-16 10:58:31 +00:00
return
2021-12-25 22:09:19 +00:00
if isinstance(post_json_object['object'], str):
if not post_json_object.get('to'):
2019-10-16 10:58:31 +00:00
return
2021-12-25 22:09:19 +00:00
if len(post_json_object['to']) > 1:
2019-10-16 10:58:31 +00:00
return
2021-12-25 22:09:19 +00:00
if len(post_json_object['to']) == 0:
2019-10-16 10:58:31 +00:00
return
2021-12-25 22:09:19 +00:00
if not post_json_object['to'][0].endswith('#Public'):
2019-10-16 10:58:31 +00:00
return
2021-12-25 22:09:19 +00:00
if post_json_object.get('cc'):
2019-10-16 10:58:31 +00:00
return
2021-12-25 22:09:19 +00:00
post_json_object['cc'] = post_json_object['actor'] + '/followers'
2021-12-26 10:57:03 +00:00
elif has_object_dict(post_json_object):
2021-12-25 22:09:19 +00:00
if not post_json_object['object'].get('to'):
2019-10-16 10:58:31 +00:00
return
2021-12-25 22:09:19 +00:00
if len(post_json_object['object']['to']) > 1:
2019-10-16 10:58:31 +00:00
return
2021-12-31 17:38:22 +00:00
if len(post_json_object['object']['to']) == 0:
2019-10-16 10:58:31 +00:00
return
2021-12-31 17:38:22 +00:00
if not post_json_object['object']['to'][0].endswith('#Public'):
2019-10-16 10:58:31 +00:00
return
2021-12-25 22:09:19 +00:00
if post_json_object['object'].get('cc'):
2019-10-16 10:58:31 +00:00
return
2021-12-25 22:09:19 +00:00
post_json_object['object']['cc'] = \
post_json_object['actor'] + '/followers'
2020-04-04 10:05:27 +00:00
2021-12-29 21:55:09 +00:00
def send_signed_json(post_json_object: {}, session, base_dir: str,
nickname: str, domain: str, port: int,
2022-05-30 15:15:17 +00:00
to_nickname: str, to_domain: str,
2022-05-31 13:34:29 +00:00
to_port: int, http_prefix: str,
2021-12-29 21:55:09 +00:00
client_to_server: bool, federation_list: [],
2021-12-31 15:16:53 +00:00
send_threads: [], post_log: [], cached_webfingers: {},
2021-12-29 21:55:09 +00:00
person_cache: {}, debug: bool, project_version: str,
2021-12-31 15:16:53 +00:00
shared_items_token: str, group_account: bool,
2021-12-29 21:55:09 +00:00
signing_priv_key_pem: str,
source_id: int, curr_domain: str,
onion_domain: str, i2p_domain: str) -> int:
2019-07-05 18:57:19 +00:00
"""Sends a signed json object to an inbox/outbox
"""
2019-07-16 22:57:45 +00:00
if debug:
2021-12-29 21:55:09 +00:00
print('DEBUG: send_signed_json start')
2019-07-16 10:19:04 +00:00
if not session:
2021-12-29 21:55:09 +00:00
print('WARN: No session specified for send_signed_json')
2019-07-16 10:19:04 +00:00
return 8
2021-12-31 15:16:53 +00:00
with_digest = True
2019-07-05 18:57:19 +00:00
2021-12-31 15:16:53 +00:00
if to_domain.endswith('.onion') or to_domain.endswith('.i2p'):
2021-12-25 17:09:22 +00:00
http_prefix = 'http'
2020-03-22 21:16:02 +00:00
2021-12-31 15:16:53 +00:00
if to_nickname == 'inbox':
2019-08-23 13:47:29 +00:00
# shared inbox actor on @domain@domain
2021-12-31 15:16:53 +00:00
to_nickname = to_domain
2019-08-16 20:04:24 +00:00
2021-12-31 15:16:53 +00:00
to_domain = get_full_domain(to_domain, to_port)
2019-07-05 18:57:19 +00:00
2021-12-31 15:16:53 +00:00
to_domain_url = http_prefix + '://' + to_domain
if not site_is_active(to_domain_url, 10):
print('Domain is inactive: ' + to_domain_url)
return 9
2021-12-31 15:16:53 +00:00
print('Domain is active: ' + to_domain_url)
handle_base = to_domain_url + '/@'
if to_nickname:
handle = handle_base + to_nickname
2019-10-21 14:12:22 +00:00
else:
2021-12-31 15:16:53 +00:00
single_user_instance_nickname = 'dev'
handle = handle_base + single_user_instance_nickname
2020-03-22 21:16:02 +00:00
2019-07-16 22:57:45 +00:00
if debug:
2021-12-31 15:16:53 +00:00
print('DEBUG: handle - ' + handle + ' to_port ' + str(to_port))
2019-07-05 18:57:19 +00:00
2022-05-27 08:59:16 +00:00
# domain shown in the user agent
ua_domain = curr_domain
if to_domain.endswith('.onion'):
ua_domain = onion_domain
elif to_domain.endswith('.i2p'):
ua_domain = i2p_domain
2019-08-23 13:47:29 +00:00
# lookup the inbox for the To handle
2021-12-31 15:16:53 +00:00
wf_request = webfinger_handle(session, handle, http_prefix,
cached_webfingers,
2022-05-27 08:59:16 +00:00
ua_domain, project_version, debug,
2021-12-31 15:16:53 +00:00
group_account, signing_priv_key_pem)
if not wf_request:
2019-08-23 13:47:29 +00:00
if debug:
2020-04-04 10:05:27 +00:00
print('DEBUG: webfinger for ' + handle + ' failed')
2019-08-23 13:47:29 +00:00
return 1
2021-12-31 15:16:53 +00:00
if not isinstance(wf_request, dict):
2020-06-23 10:41:12 +00:00
print('WARN: Webfinger for ' + handle + ' did not return a dict. ' +
2021-12-31 15:16:53 +00:00
str(wf_request))
2020-06-23 10:41:12 +00:00
return 1
2019-07-05 18:57:19 +00:00
2021-12-31 15:16:53 +00:00
if wf_request.get('errors'):
2019-10-17 14:41:47 +00:00
if debug:
2020-04-04 10:05:27 +00:00
print('DEBUG: webfinger for ' + handle +
2021-12-31 15:16:53 +00:00
' failed with errors ' + str(wf_request['errors']))
2020-03-22 21:16:02 +00:00
2021-12-25 20:39:35 +00:00
if not client_to_server:
2021-12-31 15:16:53 +00:00
post_to_box = 'inbox'
2019-07-05 22:13:20 +00:00
else:
2021-12-31 15:16:53 +00:00
post_to_box = 'outbox'
2020-09-27 18:35:35 +00:00
# get the actor inbox/outbox for the To handle
2021-12-31 15:16:53 +00:00
origin_domain = domain
2021-12-31 17:38:22 +00:00
(inbox_url, _, pub_key, to_person_id, shared_inbox_url, _,
_, _) = get_person_box(signing_priv_key_pem,
origin_domain,
base_dir, session, wf_request,
person_cache,
project_version, http_prefix,
nickname, domain, post_to_box,
source_id)
2021-12-31 15:16:53 +00:00
print("inbox_url: " + str(inbox_url))
print("to_person_id: " + str(to_person_id))
print("shared_inbox_url: " + str(shared_inbox_url))
if inbox_url:
2022-02-26 17:07:14 +00:00
if inbox_url.endswith('/actor/inbox') or \
inbox_url.endswith('/instance.actor/inbox'):
2021-12-31 15:16:53 +00:00
inbox_url = shared_inbox_url
if not inbox_url:
2019-07-16 22:57:45 +00:00
if debug:
2021-12-31 15:16:53 +00:00
print('DEBUG: missing inbox_url')
2019-07-05 18:57:19 +00:00
return 3
2019-08-04 21:26:31 +00:00
if debug:
2021-12-31 15:16:53 +00:00
print('DEBUG: Sending to endpoint ' + inbox_url)
2020-03-22 21:16:02 +00:00
2021-12-31 15:16:53 +00:00
if not pub_key:
2019-07-16 22:57:45 +00:00
if debug:
print('DEBUG: missing pubkey')
2019-07-05 18:57:19 +00:00
return 4
2021-12-31 15:16:53 +00:00
if not to_person_id:
2019-07-16 22:57:45 +00:00
if debug:
2022-01-03 15:09:45 +00:00
print('DEBUG: missing person_id')
2019-07-05 22:13:20 +00:00
return 5
2021-12-31 15:16:53 +00:00
# shared_inbox is optional
2019-07-05 18:57:19 +00:00
# get the senders private key
account_domain = origin_domain
if onion_domain:
if account_domain == onion_domain:
account_domain = curr_domain
if i2p_domain:
if account_domain == i2p_domain:
account_domain = curr_domain
2021-12-31 15:16:53 +00:00
private_key_pem = \
_get_person_key(nickname, account_domain, base_dir, 'private', debug)
2021-12-31 15:16:53 +00:00
if len(private_key_pem) == 0:
2019-07-06 13:49:25 +00:00
if debug:
2020-04-04 10:05:27 +00:00
print('DEBUG: Private key not found for ' +
nickname + '@' + account_domain +
2021-12-25 16:17:53 +00:00
' in ' + base_dir + '/keys/private')
2019-07-05 22:13:20 +00:00
return 6
2019-07-05 18:57:19 +00:00
2021-12-31 15:16:53 +00:00
if to_domain not in inbox_url:
2019-07-16 22:57:45 +00:00
if debug:
2021-12-31 15:16:53 +00:00
print('DEBUG: ' + to_domain + ' is not in ' + inbox_url)
2019-07-05 22:13:20 +00:00
return 7
2021-12-31 15:40:01 +00:00
post_path = inbox_url.split(to_domain, 1)[1]
2021-12-29 21:55:09 +00:00
_add_followers_to_public_post(post_json_object)
2020-03-22 21:16:02 +00:00
2021-12-25 22:09:19 +00:00
if not post_json_object.get('signature'):
try:
2021-12-31 15:16:53 +00:00
signed_post_json_object = post_json_object.copy()
generate_json_signature(signed_post_json_object, private_key_pem)
post_json_object = signed_post_json_object
2021-12-31 17:38:22 +00:00
except BaseException as ex:
print('WARN: send_signed_json failed to JSON-LD sign post, ' +
str(ex))
pprint(signed_post_json_object)
2020-06-15 13:08:19 +00:00
# convert json to string so that there are no
# subsequent conversions after creating message body digest
2021-12-31 15:40:01 +00:00
post_json_str = json.dumps(post_json_object)
# if the sender domain has changed from clearnet to onion or i2p
# then change the content of the post accordingly
2022-03-14 12:06:14 +00:00
if debug:
print('Checking for changed origin domain: ' +
domain + ' ' + curr_domain)
if domain != curr_domain:
if not curr_domain.endswith('.onion') and \
not curr_domain.endswith('.i2p'):
if debug:
print('Changing post content sender domain from ' +
curr_domain + ' to ' + domain)
post_json_str = \
2022-03-14 12:06:14 +00:00
post_json_str.replace(curr_domain, domain)
# construct the http header, including the message body digest
2021-12-31 15:16:53 +00:00
signature_header_json = \
create_signed_header(None, private_key_pem, nickname, domain, port,
to_domain, to_port,
2021-12-31 15:40:01 +00:00
post_path, http_prefix, with_digest,
post_json_str,
'application/activity+json')
signature_header_json_ld = \
create_signed_header(None, private_key_pem, nickname, domain, port,
to_domain, to_port,
post_path, http_prefix, with_digest,
post_json_str,
'application/ld+json')
# optionally add a token so that the receiving instance may access
# your shared items catalog
2021-12-31 15:16:53 +00:00
if shared_items_token:
signature_header_json['Origin'] = get_full_domain(domain, port)
signature_header_json['SharesCatalog'] = shared_items_token
2021-08-05 11:24:24 +00:00
elif debug:
print('Not sending shared items federation token')
2020-03-22 21:16:02 +00:00
2019-06-30 13:20:23 +00:00
# Keep the number of threads being used small
2021-12-25 21:37:41 +00:00
while len(send_threads) > 1000:
2019-10-04 12:22:56 +00:00
print('WARN: Maximum threads reached - killing send thread')
2021-12-25 21:37:41 +00:00
send_threads[0].kill()
send_threads.pop(0)
2019-10-04 12:22:56 +00:00
print('WARN: thread killed')
2019-10-16 18:19:18 +00:00
2019-07-16 22:57:45 +00:00
if debug:
print('DEBUG: starting thread to send post')
2021-12-25 22:09:19 +00:00
pprint(post_json_object)
2022-02-28 11:55:36 +00:00
domain_full = get_full_domain(domain, port)
2022-03-13 11:01:07 +00:00
print('THREAD: thread_send_post 2')
2020-04-04 10:05:27 +00:00
thr = \
2021-12-29 21:55:09 +00:00
thread_with_trace(target=thread_send_post,
2021-12-28 21:36:27 +00:00
args=(session,
2021-12-31 15:40:01 +00:00
post_json_str,
2021-12-28 21:36:27 +00:00
federation_list,
2021-12-31 15:16:53 +00:00
inbox_url, base_dir,
2022-04-03 12:43:20 +00:00
signature_header_json.copy(),
signature_header_json_ld.copy(),
2022-02-28 11:55:36 +00:00
post_log, debug,
http_prefix, domain_full), daemon=True)
2021-12-25 21:37:41 +00:00
send_threads.append(thr)
2022-07-28 09:59:18 +00:00
# begin_thread(thr, 'send_signed_json')
2019-06-30 10:14:02 +00:00
return 0
2020-04-04 10:05:27 +00:00
2021-12-31 15:16:53 +00:00
def add_to_field(activity_type: str, post_json_object: {},
2021-12-28 19:33:29 +00:00
debug: bool) -> ({}, bool):
2021-03-20 12:00:00 +00:00
"""The Follow/Add/Remove activity doesn't have a 'to' field and so one
2019-08-18 09:39:12 +00:00
needs to be added so that activity distribution happens in a consistent way
Returns true if a 'to' field exists or was added
"""
2021-12-25 22:09:19 +00:00
if post_json_object.get('to'):
return post_json_object, True
2020-03-22 21:16:02 +00:00
2019-08-18 09:39:12 +00:00
if debug:
2021-12-25 22:09:19 +00:00
pprint(post_json_object)
2019-08-18 09:39:12 +00:00
print('DEBUG: no "to" field when sending to named addresses 2')
2021-12-31 15:16:53 +00:00
is_same_type = False
to_field_added = False
2021-12-25 22:09:19 +00:00
if post_json_object.get('object'):
if isinstance(post_json_object['object'], str):
if post_json_object.get('type'):
2021-12-31 15:16:53 +00:00
if post_json_object['type'] == activity_type:
is_same_type = True
2019-08-18 09:39:12 +00:00
if debug:
2022-03-03 15:28:10 +00:00
print('DEBUG: ' +
'add_to_field1 "to" field assigned to ' +
activity_type)
2021-12-31 17:38:22 +00:00
to_address = post_json_object['object']
if '/statuses/' in to_address:
to_address = to_address.split('/statuses/')[0]
post_json_object['to'] = [to_address]
2022-03-03 15:28:10 +00:00
if debug:
print('DEBUG: "to" address added: ' + to_address)
2021-12-31 15:16:53 +00:00
to_field_added = True
2021-12-26 10:57:03 +00:00
elif has_object_dict(post_json_object):
2021-03-20 12:00:00 +00:00
# add a to field to bookmark add or remove
2021-12-25 22:09:19 +00:00
if post_json_object.get('type') and \
post_json_object.get('actor') and \
post_json_object['object'].get('type'):
if post_json_object['type'] == 'Add' or \
post_json_object['type'] == 'Remove':
if post_json_object['object']['type'] == 'Document':
post_json_object['to'] = \
[post_json_object['actor']]
post_json_object['object']['to'] = \
[post_json_object['actor']]
2021-12-31 15:16:53 +00:00
to_field_added = True
2021-03-20 12:00:00 +00:00
2021-12-31 15:16:53 +00:00
if not to_field_added and \
2021-12-25 22:09:19 +00:00
post_json_object['object'].get('type'):
2021-12-31 15:16:53 +00:00
if post_json_object['object']['type'] == activity_type:
is_same_type = True
2021-12-25 22:09:19 +00:00
if isinstance(post_json_object['object']['object'], str):
2019-08-18 09:39:12 +00:00
if debug:
2022-03-03 15:28:10 +00:00
print('DEBUG: add_to_field2 ' +
'"to" field assigned to ' +
2021-12-31 15:16:53 +00:00
activity_type)
2021-12-31 17:38:22 +00:00
to_address = post_json_object['object']['object']
if '/statuses/' in to_address:
to_address = to_address.split('/statuses/')[0]
post_json_object['object']['to'] = [to_address]
2022-03-03 15:28:10 +00:00
post_json_object['to'] = [to_address]
if debug:
print('DEBUG: "to" address added: ' + to_address)
2021-12-31 15:16:53 +00:00
to_field_added = True
2019-08-18 09:39:12 +00:00
2021-12-31 15:16:53 +00:00
if not is_same_type:
2021-12-25 22:09:19 +00:00
return post_json_object, True
2021-12-31 15:16:53 +00:00
if to_field_added:
2021-12-25 22:09:19 +00:00
return post_json_object, True
return post_json_object, False
2020-04-04 10:05:27 +00:00
2021-12-29 21:55:09 +00:00
def _is_profile_update(post_json_object: {}) -> bool:
2021-07-04 12:50:42 +00:00
"""Is the given post a profile update?
for actor updates there is no 'to' within the object
"""
2021-12-25 22:09:19 +00:00
if post_json_object.get('type'):
2022-04-09 15:11:22 +00:00
if has_object_string_type(post_json_object, False):
2021-12-25 22:09:19 +00:00
if (post_json_object['type'] == 'Update' and
(post_json_object['object']['type'] == 'Person' or
post_json_object['object']['type'] == 'Application' or
post_json_object['object']['type'] == 'Group' or
post_json_object['object']['type'] == 'Service')):
2021-10-13 10:11:02 +00:00
return True
2021-07-04 12:50:42 +00:00
return False
2022-03-14 13:29:41 +00:00
def _send_to_named_addresses(server, session, session_onion, session_i2p,
base_dir: str,
2021-12-29 21:55:09 +00:00
nickname: str, domain: str,
onion_domain: str, i2p_domain: str, port: int,
http_prefix: str, federation_list: [],
2021-12-31 15:16:53 +00:00
send_threads: [], post_log: [],
2021-12-29 21:55:09 +00:00
cached_webfingers: {}, person_cache: {},
post_json_object: {}, debug: bool,
project_version: str,
shared_items_federated_domains: [],
2021-12-31 15:16:53 +00:00
shared_item_federation_tokens: {},
signing_priv_key_pem: str,
proxy_type: str) -> None:
2019-07-15 18:20:52 +00:00
"""sends a post to the specific named addresses in to/cc
"""
2019-07-16 10:19:04 +00:00
if not session:
print('WARN: No session for sendToNamedAddresses')
return
2021-12-25 22:09:19 +00:00
if not post_json_object.get('object'):
2019-07-16 10:19:04 +00:00
return
2021-12-31 15:16:53 +00:00
is_profile_update = False
2021-12-26 10:57:03 +00:00
if has_object_dict(post_json_object):
2021-12-29 21:55:09 +00:00
if _is_profile_update(post_json_object):
2021-07-04 12:50:42 +00:00
# use the original object, which has a 'to'
2021-12-31 15:16:53 +00:00
recipients_object = post_json_object
is_profile_update = True
2020-03-22 21:16:02 +00:00
2021-12-31 15:16:53 +00:00
if not is_profile_update:
2021-12-25 22:09:19 +00:00
if not post_json_object['object'].get('to'):
2019-08-20 20:35:15 +00:00
if debug:
2021-12-25 22:09:19 +00:00
pprint(post_json_object)
2020-04-04 10:05:27 +00:00
print('DEBUG: ' +
'no "to" field when sending to named addresses')
2022-04-09 15:11:22 +00:00
if has_object_string_type(post_json_object, debug):
2021-12-25 22:09:19 +00:00
if post_json_object['object']['type'] == 'Follow' or \
post_json_object['object']['type'] == 'Join':
post_json_obj2 = post_json_object['object']['object']
if isinstance(post_json_obj2, str):
2019-08-20 20:35:15 +00:00
if debug:
2022-03-03 15:28:10 +00:00
print('DEBUG: _send_to_named_addresses ' +
'"to" field assigned to Follow')
2021-12-25 22:09:19 +00:00
post_json_object['object']['to'] = \
[post_json_object['object']['object']]
if not post_json_object['object'].get('to'):
2019-08-20 20:35:15 +00:00
return
2021-12-31 15:16:53 +00:00
recipients_object = post_json_object['object']
2020-03-22 21:16:02 +00:00
else:
2021-12-31 15:16:53 +00:00
post_json_object, field_added = \
2021-12-28 19:33:29 +00:00
add_to_field('Follow', post_json_object, debug)
2021-12-31 15:16:53 +00:00
if not field_added:
2019-08-18 16:49:35 +00:00
return
2021-12-31 15:16:53 +00:00
post_json_object, field_added = \
2021-12-28 19:33:29 +00:00
add_to_field('Like', post_json_object, debug)
2021-12-31 15:16:53 +00:00
if not field_added:
2019-07-16 19:07:45 +00:00
return
2021-12-31 15:16:53 +00:00
recipients_object = post_json_object
2019-07-15 18:20:52 +00:00
2020-04-04 10:05:27 +00:00
recipients = []
2021-12-31 15:16:53 +00:00
recipient_type = ('to', 'cc')
for rtype in recipient_type:
if not recipients_object.get(rtype):
2019-08-18 09:39:12 +00:00
continue
2021-12-31 15:16:53 +00:00
if isinstance(recipients_object[rtype], list):
2019-08-18 21:08:38 +00:00
if debug:
2021-12-31 15:16:53 +00:00
pprint(recipients_object)
print('recipients_object: ' + str(recipients_object[rtype]))
for address in recipients_object[rtype]:
2019-08-18 21:15:09 +00:00
if not address:
continue
if '/' not in address:
continue
2019-08-18 20:54:33 +00:00
if address.endswith('#Public'):
continue
if address.endswith('/followers'):
continue
recipients.append(address)
2021-12-31 15:16:53 +00:00
elif isinstance(recipients_object[rtype], str):
address = recipients_object[rtype]
2019-08-18 21:15:09 +00:00
if address:
if '/' in address:
if address.endswith('#Public'):
continue
if address.endswith('/followers'):
continue
recipients.append(address)
2019-07-15 18:20:52 +00:00
if not recipients:
2019-08-18 20:54:33 +00:00
if debug:
print('DEBUG: no individual recipients')
2019-07-15 18:20:52 +00:00
return
2019-07-15 18:29:30 +00:00
if debug:
2020-04-04 10:05:27 +00:00
print('DEBUG: Sending individually addressed posts: ' +
str(recipients))
# randomize the recipients list order, so that we are not favoring
# any particular account in terms of delivery time
random.shuffle(recipients)
2019-07-15 18:29:30 +00:00
# this is after the message has arrived at the server
2021-12-25 20:39:35 +00:00
client_to_server = False
2019-07-15 18:20:52 +00:00
for address in recipients:
2021-12-31 15:16:53 +00:00
to_nickname = get_nickname_from_actor(address)
if not to_nickname:
2019-07-15 18:20:52 +00:00
continue
2021-12-31 15:16:53 +00:00
to_domain, to_port = get_domain_from_actor(address)
if not to_domain:
2019-07-15 18:20:52 +00:00
continue
2021-03-18 23:25:27 +00:00
# Don't send profile/actor updates to yourself
2021-12-31 15:16:53 +00:00
if is_profile_update:
2021-12-26 12:45:03 +00:00
domain_full = get_full_domain(domain, port)
2021-12-31 17:38:22 +00:00
to_domain_full = get_full_domain(to_domain, to_port)
2021-12-31 15:16:53 +00:00
if nickname == to_nickname and \
2021-12-31 17:38:22 +00:00
domain_full == to_domain_full:
2021-03-18 23:25:27 +00:00
if debug:
print('Not sending profile update to self. ' +
2021-12-26 10:00:46 +00:00
nickname + '@' + domain_full)
2021-03-18 23:25:27 +00:00
continue
2020-03-02 16:11:34 +00:00
# if we have an alt onion domain and we are sending to
# another onion domain then switch the clearnet
# domain for the onion one
2021-12-31 15:16:53 +00:00
from_domain = domain
from_domain_full = get_full_domain(domain, port)
from_http_prefix = http_prefix
curr_session = session
curr_proxy_type = proxy_type
2022-03-14 13:29:41 +00:00
session_type = 'default'
2021-12-25 20:43:43 +00:00
if onion_domain:
if not from_domain.endswith('.onion') and \
to_domain.endswith('.onion'):
2021-12-31 15:16:53 +00:00
from_domain = onion_domain
from_domain_full = onion_domain
from_http_prefix = 'http'
curr_session = session_onion
port = 80
to_port = 80
curr_proxy_type = 'tor'
2022-03-14 13:29:41 +00:00
session_type = 'tor'
if i2p_domain:
if not from_domain.endswith('.i2p') and \
to_domain.endswith('.i2p'):
2021-12-31 15:16:53 +00:00
from_domain = i2p_domain
from_domain_full = i2p_domain
from_http_prefix = 'http'
curr_session = session_i2p
port = 80
to_port = 80
curr_proxy_type = 'i2p'
2022-03-14 13:29:41 +00:00
session_type = 'i2p'
2022-03-14 11:22:28 +00:00
if debug:
to_domain_full = get_full_domain(to_domain, to_port)
print('DEBUG: Post sending s2s: ' +
nickname + '@' + from_domain_full +
' to ' + to_nickname + '@' + to_domain_full)
# if the "to" domain is within the shared items
# federation list then send the token for this domain
# so that it can request a catalog
2021-12-31 15:16:53 +00:00
shared_items_token = None
if to_domain in shared_items_federated_domains:
if shared_item_federation_tokens.get(from_domain_full):
shared_items_token = \
shared_item_federation_tokens[from_domain_full]
2021-12-26 17:53:07 +00:00
group_account = has_group_type(base_dir, address, person_cache)
if not curr_session:
curr_session = create_session(curr_proxy_type)
2022-03-14 13:29:41 +00:00
if server:
if session_type == 'tor':
server.session_onion = curr_session
elif session_type == 'i2p':
server.session_i2p = curr_session
else:
server.session = curr_session
send_signed_json(post_json_object, curr_session, base_dir,
2021-12-31 15:16:53 +00:00
nickname, from_domain, port,
to_nickname, to_domain, to_port,
2022-05-31 13:34:29 +00:00
from_http_prefix, client_to_server,
2021-12-29 21:55:09 +00:00
federation_list,
2021-12-31 15:16:53 +00:00
send_threads, post_log, cached_webfingers,
2021-12-29 21:55:09 +00:00
person_cache, debug, project_version,
2021-12-31 15:16:53 +00:00
shared_items_token, group_account,
signing_priv_key_pem, 34436782,
domain, onion_domain, i2p_domain)
2021-12-29 21:55:09 +00:00
2022-03-14 13:29:41 +00:00
def send_to_named_addresses_thread(server, session, session_onion, session_i2p,
base_dir: str, nickname: str, domain: str,
2021-12-29 21:55:09 +00:00
onion_domain: str,
i2p_domain: str, port: int,
http_prefix: str, federation_list: [],
2021-12-31 15:16:53 +00:00
send_threads: [], post_log: [],
2021-12-29 21:55:09 +00:00
cached_webfingers: {}, person_cache: {},
post_json_object: {}, debug: bool,
project_version: str,
shared_items_federated_domains: [],
2021-12-31 15:16:53 +00:00
shared_item_federation_tokens: {},
signing_priv_key_pem: str,
proxy_type: str):
2021-10-20 20:00:09 +00:00
"""Returns a thread used to send a post to named addresses
"""
2022-03-13 11:01:07 +00:00
print('THREAD: _send_to_named_addresses')
2021-12-31 15:16:53 +00:00
send_thread = \
2021-12-29 21:55:09 +00:00
thread_with_trace(target=_send_to_named_addresses,
2022-03-14 13:29:41 +00:00
args=(server, session, session_onion, session_i2p,
base_dir, nickname, domain,
2021-12-28 21:36:27 +00:00
onion_domain, i2p_domain, port,
http_prefix, federation_list,
2021-12-31 15:16:53 +00:00
send_threads, post_log,
2021-12-28 21:36:27 +00:00
cached_webfingers, person_cache,
post_json_object, debug,
project_version,
shared_items_federated_domains,
2021-12-31 15:16:53 +00:00
shared_item_federation_tokens,
signing_priv_key_pem,
proxy_type), daemon=True)
2022-07-28 09:59:18 +00:00
if not begin_thread(send_thread, 'send_to_named_addresses_thread'):
2021-10-20 20:00:09 +00:00
print('WARN: socket error while starting ' +
2022-07-28 09:59:18 +00:00
'thread to send to named addresses.')
2021-10-20 20:00:09 +00:00
return None
2021-12-31 15:16:53 +00:00
return send_thread
2021-10-20 20:00:09 +00:00
2021-12-29 21:55:09 +00:00
def _has_shared_inbox(session, http_prefix: str, domain: str,
2022-05-27 10:00:23 +00:00
debug: bool, signing_priv_key_pem: str,
ua_domain: str) -> bool:
"""Returns true if the given domain has a shared inbox
This tries the new and the old way of webfingering the shared inbox
"""
2021-12-31 15:16:53 +00:00
try_handles = []
if ':' not in domain:
2021-12-31 15:16:53 +00:00
try_handles.append(domain + '@' + domain)
try_handles.append('inbox@' + domain)
for handle in try_handles:
wf_request = webfinger_handle(session, handle, http_prefix, {},
2022-05-27 10:00:23 +00:00
ua_domain, __version__, debug, False,
2021-12-31 15:16:53 +00:00
signing_priv_key_pem)
if wf_request:
if isinstance(wf_request, dict):
if not wf_request.get('errors'):
return True
return False
2019-11-04 10:43:19 +00:00
2020-04-04 10:05:27 +00:00
2021-12-29 21:55:09 +00:00
def _sending_profile_update(post_json_object: {}) -> bool:
2020-12-18 10:43:19 +00:00
"""Returns true if the given json is a profile update
"""
2021-12-25 22:09:19 +00:00
if post_json_object['type'] != 'Update':
2020-12-18 10:43:19 +00:00
return False
2022-04-09 15:11:22 +00:00
if not has_object_string_type(post_json_object, False):
2020-12-18 10:43:19 +00:00
return False
2021-12-31 15:16:53 +00:00
activity_type = post_json_object['object']['type']
2021-12-31 17:38:22 +00:00
if activity_type in ('Person', 'Application', 'Group', 'Service'):
2020-12-18 10:43:19 +00:00
return True
return False
2022-03-14 13:29:41 +00:00
def send_to_followers(server, session, session_onion, session_i2p,
base_dir: str, nickname: str, domain: str,
2021-12-29 21:55:09 +00:00
onion_domain: str, i2p_domain: str, port: int,
http_prefix: str, federation_list: [],
2021-12-31 15:16:53 +00:00
send_threads: [], post_log: [],
2021-12-29 21:55:09 +00:00
cached_webfingers: {}, person_cache: {},
post_json_object: {}, debug: bool,
project_version: str,
shared_items_federated_domains: [],
2021-12-31 15:16:53 +00:00
shared_item_federation_tokens: {},
2021-12-29 21:55:09 +00:00
signing_priv_key_pem: str) -> None:
2019-07-08 13:30:04 +00:00
"""sends a post to the followers of the given nickname
"""
2021-12-29 21:55:09 +00:00
print('send_to_followers')
2022-05-31 12:38:18 +00:00
if not _post_is_addressed_to_followers(nickname, domain,
2021-12-29 21:55:09 +00:00
port, http_prefix,
post_json_object):
2019-07-15 18:29:30 +00:00
if debug:
print('Post is not addressed to followers')
2019-07-08 13:30:04 +00:00
return
2019-08-20 21:04:24 +00:00
print('Post is addressed to followers')
2020-03-22 21:16:02 +00:00
2021-12-29 21:55:09 +00:00
grouped = group_followers_by_domain(base_dir, nickname, domain)
2019-07-08 13:30:04 +00:00
if not grouped:
2019-07-15 18:29:30 +00:00
if debug:
print('Post to followers did not resolve any domains')
2019-07-08 13:30:04 +00:00
return
2019-08-20 21:04:24 +00:00
print('Post to followers resolved domains')
2021-08-11 12:49:15 +00:00
# print(str(grouped))
2019-07-08 13:30:04 +00:00
2019-07-15 18:29:30 +00:00
# this is after the message has arrived at the server
2021-12-25 20:39:35 +00:00
client_to_server = False
2019-07-15 18:20:52 +00:00
curr_proxy_type = None
if domain.endswith('.onion'):
curr_proxy_type = 'tor'
elif domain.endswith('.i2p'):
curr_proxy_type = 'i2p'
2021-12-31 15:16:53 +00:00
sending_start_time = datetime.datetime.utcnow()
2020-12-18 15:09:41 +00:00
print('Sending post to followers begins ' +
2021-12-31 15:16:53 +00:00
sending_start_time.strftime("%Y-%m-%dT%H:%M:%SZ"))
sending_ctr = 0
# randomize the order of sending to instances
randomized_instances = []
2021-12-31 15:16:53 +00:00
for follower_domain, follower_handles in grouped.items():
randomized_instances.append([follower_domain, follower_handles])
random.shuffle(randomized_instances)
# send out to each instance
for group_send in randomized_instances:
follower_domain = group_send[0]
follower_handles = group_send[1]
2020-12-18 15:09:41 +00:00
print('Sending post to followers progress ' +
2021-12-31 15:16:53 +00:00
str(int(sending_ctr * 100 / len(grouped.items()))) + '% ' +
follower_domain)
sending_ctr += 1
2020-12-18 15:09:41 +00:00
2019-07-16 22:57:45 +00:00
if debug:
2021-12-31 15:16:53 +00:00
pprint(follower_handles)
# if the followers domain is within the shared items
# federation list then send the token for this domain
# so that it can request a catalog
2021-12-31 15:16:53 +00:00
shared_items_token = None
if follower_domain in shared_items_federated_domains:
2021-12-26 12:45:03 +00:00
domain_full = get_full_domain(domain, port)
2021-12-31 15:16:53 +00:00
if shared_item_federation_tokens.get(domain_full):
shared_items_token = shared_item_federation_tokens[domain_full]
# check that the follower's domain is active
2021-12-31 15:16:53 +00:00
follower_domain_url = http_prefix + '://' + follower_domain
if not site_is_active(follower_domain_url, 10):
2020-12-18 19:59:19 +00:00
print('Sending post to followers domain is inactive: ' +
2021-12-31 15:16:53 +00:00
follower_domain_url)
continue
2020-12-18 19:59:19 +00:00
print('Sending post to followers domain is active: ' +
2021-12-31 15:16:53 +00:00
follower_domain_url)
# select the appropriate session
curr_session = session
curr_http_prefix = http_prefix
if onion_domain:
if follower_domain.endswith('.onion'):
curr_session = session_onion
curr_http_prefix = 'http'
if i2p_domain:
if follower_domain.endswith('.i2p'):
curr_session = session_i2p
curr_http_prefix = 'http'
2022-05-27 10:00:23 +00:00
# get the domain showin by the user agent
ua_domain = domain
if follower_domain.endswith('.onion'):
ua_domain = onion_domain
elif follower_domain.endswith('.i2p'):
ua_domain = i2p_domain
2021-12-31 15:16:53 +00:00
with_shared_inbox = \
_has_shared_inbox(curr_session, curr_http_prefix, follower_domain,
2022-05-27 10:00:23 +00:00
debug, signing_priv_key_pem, ua_domain)
if debug:
2021-12-31 15:16:53 +00:00
if with_shared_inbox:
print(follower_domain + ' has shared inbox')
if not with_shared_inbox:
print('Sending post to followers, ' + follower_domain +
2020-12-18 19:59:19 +00:00
' does not have a shared inbox')
2021-12-31 15:16:53 +00:00
to_port = port
2020-04-04 10:05:27 +00:00
index = 0
2021-12-31 15:16:53 +00:00
to_domain = follower_handles[index].split('@')[1]
if ':' in to_domain:
to_port = get_port_from_domain(to_domain)
to_domain = remove_domain_port(to_domain)
2019-08-22 19:47:10 +00:00
2020-03-02 16:23:30 +00:00
# if we are sending to an onion domain and we
# have an alt onion domain then use the alt
2021-12-31 15:16:53 +00:00
from_domain = domain
from_http_prefix = http_prefix
2022-03-14 13:29:41 +00:00
session_type = 'default'
2021-12-25 20:43:43 +00:00
if onion_domain:
2021-12-31 15:16:53 +00:00
if to_domain.endswith('.onion'):
from_domain = onion_domain
from_http_prefix = 'http'
port = 80
to_port = 80
curr_proxy_type = 'tor'
2022-03-14 13:29:41 +00:00
session_type = 'tor'
if i2p_domain:
2021-12-31 15:16:53 +00:00
if to_domain.endswith('.i2p'):
from_domain = i2p_domain
from_http_prefix = 'http'
port = 80
to_port = 80
curr_proxy_type = 'i2p'
2022-03-14 13:29:41 +00:00
session_type = 'i2p'
if not curr_session:
curr_session = create_session(curr_proxy_type)
2022-03-14 13:29:41 +00:00
if server:
if session_type == 'tor':
server.session_onion = curr_session
elif session_type == 'i2p':
server.session_i2p = curr_session
else:
server.session = curr_session
2020-03-02 16:23:30 +00:00
2021-12-31 15:16:53 +00:00
if with_shared_inbox:
to_nickname = follower_handles[index].split('@')[0]
2021-12-26 00:07:44 +00:00
group_account = False
2021-12-31 15:16:53 +00:00
if to_nickname.startswith('!'):
2021-12-26 00:07:44 +00:00
group_account = True
2021-12-31 15:16:53 +00:00
to_nickname = to_nickname[1:]
# if there are more than one followers on the domain
# then send the post to the shared inbox
2021-12-31 15:16:53 +00:00
if len(follower_handles) > 1:
to_nickname = 'inbox'
2021-12-31 15:16:53 +00:00
if to_nickname != 'inbox' and post_json_object.get('type'):
2021-12-29 21:55:09 +00:00
if _sending_profile_update(post_json_object):
2020-12-18 19:59:19 +00:00
print('Sending post to followers ' +
2021-12-31 15:16:53 +00:00
'shared inbox of ' + to_domain)
to_nickname = 'inbox'
2020-02-05 11:46:05 +00:00
2020-12-18 19:59:19 +00:00
print('Sending post to followers from ' +
nickname + '@' + domain +
2021-12-31 15:16:53 +00:00
' to ' + to_nickname + '@' + to_domain)
2020-04-04 10:05:27 +00:00
send_signed_json(post_json_object, curr_session, base_dir,
2021-12-31 15:16:53 +00:00
nickname, from_domain, port,
to_nickname, to_domain, to_port,
2022-05-31 13:34:29 +00:00
from_http_prefix,
2021-12-31 17:38:22 +00:00
client_to_server, federation_list,
2021-12-31 15:16:53 +00:00
send_threads, post_log, cached_webfingers,
2021-12-29 21:55:09 +00:00
person_cache, debug, project_version,
2021-12-31 15:16:53 +00:00
shared_items_token, group_account,
signing_priv_key_pem, 639342,
domain, onion_domain, i2p_domain)
else:
# randomize the order of handles, so that we are not
# favoring any particular account in terms of its delivery time
random.shuffle(follower_handles)
# send to individual followers without using a shared inbox
2021-12-31 15:16:53 +00:00
for handle in follower_handles:
2020-12-18 19:59:19 +00:00
print('Sending post to followers ' + handle)
2021-12-31 15:16:53 +00:00
to_nickname = handle.split('@')[0]
2020-03-22 21:16:02 +00:00
2021-12-26 00:07:44 +00:00
group_account = False
2021-12-31 15:16:53 +00:00
if to_nickname.startswith('!'):
2021-12-26 00:07:44 +00:00
group_account = True
2021-12-31 15:16:53 +00:00
to_nickname = to_nickname[1:]
2021-12-25 22:09:19 +00:00
if post_json_object['type'] != 'Update':
2020-12-18 19:59:19 +00:00
print('Sending post to followers from ' +
nickname + '@' + domain + ' to ' +
2021-12-31 15:16:53 +00:00
to_nickname + '@' + to_domain)
2020-12-18 19:59:19 +00:00
else:
print('Sending post to followers profile update from ' +
nickname + '@' + domain + ' to ' +
2021-12-31 15:16:53 +00:00
to_nickname + '@' + to_domain)
2020-04-04 10:05:27 +00:00
send_signed_json(post_json_object, curr_session, base_dir,
2021-12-31 15:16:53 +00:00
nickname, from_domain, port,
to_nickname, to_domain, to_port,
2022-05-31 13:34:29 +00:00
from_http_prefix,
2021-12-31 17:38:22 +00:00
client_to_server, federation_list,
2021-12-31 15:16:53 +00:00
send_threads, post_log, cached_webfingers,
2021-12-29 21:55:09 +00:00
person_cache, debug, project_version,
2021-12-31 15:16:53 +00:00
shared_items_token, group_account,
signing_priv_key_pem, 634219,
domain, onion_domain, i2p_domain)
2020-02-05 11:46:05 +00:00
2019-11-07 20:51:29 +00:00
time.sleep(4)
2019-11-04 10:43:19 +00:00
2019-11-07 21:16:40 +00:00
if debug:
2021-12-29 21:55:09 +00:00
print('DEBUG: End of send_to_followers')
2019-11-07 21:16:40 +00:00
2021-12-31 15:16:53 +00:00
sending_end_time = datetime.datetime.utcnow()
sending_mins = \
int((sending_end_time - sending_start_time).total_seconds() / 60)
print('Sending post to followers ends ' + str(sending_mins) + ' mins')
2020-12-18 15:09:41 +00:00
2020-04-04 10:05:27 +00:00
2022-03-14 13:29:41 +00:00
def send_to_followers_thread(server, session, session_onion, session_i2p,
base_dir: str, nickname: str, domain: str,
2021-12-29 21:55:09 +00:00
onion_domain: str, i2p_domain: str, port: int,
http_prefix: str, federation_list: [],
2021-12-31 15:16:53 +00:00
send_threads: [], post_log: [],
2021-12-29 21:55:09 +00:00
cached_webfingers: {}, person_cache: {},
post_json_object: {}, debug: bool,
project_version: str,
shared_items_federated_domains: [],
2021-12-31 15:16:53 +00:00
shared_item_federation_tokens: {},
2021-12-29 21:55:09 +00:00
signing_priv_key_pem: str):
2019-11-04 10:43:19 +00:00
"""Returns a thread used to send a post to followers
"""
2022-03-13 11:01:07 +00:00
print('THREAD: send_to_followers')
2021-12-31 15:16:53 +00:00
send_thread = \
2021-12-29 21:55:09 +00:00
thread_with_trace(target=send_to_followers,
2022-03-14 13:29:41 +00:00
args=(server, session, session_onion, session_i2p,
base_dir, nickname, domain,
2021-12-28 21:36:27 +00:00
onion_domain, i2p_domain, port,
http_prefix, federation_list,
2021-12-31 15:16:53 +00:00
send_threads, post_log,
2021-12-28 21:36:27 +00:00
cached_webfingers, person_cache,
2022-04-03 12:43:20 +00:00
post_json_object.copy(), debug,
2021-12-28 21:36:27 +00:00
project_version,
shared_items_federated_domains,
2021-12-31 15:16:53 +00:00
shared_item_federation_tokens,
2021-12-28 21:36:27 +00:00
signing_priv_key_pem), daemon=True)
2022-07-28 09:59:18 +00:00
if not begin_thread(send_thread, 'send_to_followers_thread'):
print('WARN: error while starting ' +
2022-07-28 09:59:18 +00:00
'thread to send to followers.')
return None
2021-12-31 15:16:53 +00:00
return send_thread
2019-07-08 13:30:04 +00:00
2020-04-04 10:05:27 +00:00
2021-12-29 21:55:09 +00:00
def create_inbox(recent_posts_cache: {},
2022-05-31 15:16:55 +00:00
base_dir: str, nickname: str, domain: str, port: int,
2021-12-31 15:16:53 +00:00
http_prefix: str, items_per_page: int, header_only: bool,
page_number: int) -> {}:
2021-12-29 21:55:09 +00:00
return _create_box_indexed(recent_posts_cache,
2022-05-31 14:08:50 +00:00
base_dir, 'inbox',
2021-12-29 21:55:09 +00:00
nickname, domain, port, http_prefix,
2021-12-31 15:16:53 +00:00
items_per_page, header_only, True,
0, False, 0, page_number)
2020-04-04 10:05:27 +00:00
2022-05-31 15:16:55 +00:00
def create_bookmarks_timeline(base_dir: str,
2021-12-29 21:55:09 +00:00
nickname: str, domain: str,
2021-12-31 15:16:53 +00:00
port: int, http_prefix: str, items_per_page: int,
header_only: bool, page_number: int) -> {}:
2022-05-31 14:08:50 +00:00
return _create_box_indexed({}, base_dir, 'tlbookmarks',
2021-12-29 21:55:09 +00:00
nickname, domain,
2021-12-31 15:16:53 +00:00
port, http_prefix, items_per_page, header_only,
True, 0, False, 0, page_number)
2020-04-04 10:05:27 +00:00
2021-12-29 21:55:09 +00:00
def create_dm_timeline(recent_posts_cache: {},
2022-05-31 15:16:55 +00:00
base_dir: str, nickname: str, domain: str,
2021-12-31 15:16:53 +00:00
port: int, http_prefix: str, items_per_page: int,
header_only: bool, page_number: int) -> {}:
2021-12-29 21:55:09 +00:00
return _create_box_indexed(recent_posts_cache,
2022-05-31 14:08:50 +00:00
base_dir, 'dm', nickname,
2021-12-31 15:16:53 +00:00
domain, port, http_prefix, items_per_page,
header_only, True, 0, False, 0, page_number)
2020-04-04 10:05:27 +00:00
2021-12-29 21:55:09 +00:00
def create_replies_timeline(recent_posts_cache: {},
2022-05-31 15:16:55 +00:00
base_dir: str, nickname: str, domain: str,
2021-12-31 15:16:53 +00:00
port: int, http_prefix: str, items_per_page: int,
header_only: bool, page_number: int) -> {}:
2022-05-31 14:08:50 +00:00
return _create_box_indexed(recent_posts_cache,
2021-12-29 21:55:09 +00:00
base_dir, 'tlreplies',
nickname, domain, port, http_prefix,
2021-12-31 15:16:53 +00:00
items_per_page, header_only, True,
0, False, 0, page_number)
2020-11-27 12:29:20 +00:00
2022-05-31 15:16:55 +00:00
def create_blogs_timeline(base_dir: str, nickname: str, domain: str,
2021-12-31 15:16:53 +00:00
port: int, http_prefix: str, items_per_page: int,
header_only: bool, page_number: int) -> {}:
2022-05-31 14:08:50 +00:00
return _create_box_indexed({}, base_dir, 'tlblogs', nickname,
2021-12-29 21:55:09 +00:00
domain, port, http_prefix,
2021-12-31 15:16:53 +00:00
items_per_page, header_only, True,
0, False, 0, page_number)
2020-04-04 10:05:27 +00:00
2022-05-31 15:16:55 +00:00
def create_features_timeline(base_dir: str,
2021-12-29 21:55:09 +00:00
nickname: str, domain: str,
2021-12-31 15:16:53 +00:00
port: int, http_prefix: str, items_per_page: int,
header_only: bool, page_number: int) -> {}:
2022-05-31 14:08:50 +00:00
return _create_box_indexed({}, base_dir, 'tlfeatures', nickname,
2021-12-29 21:55:09 +00:00
domain, port, http_prefix,
2021-12-31 15:16:53 +00:00
items_per_page, header_only, True,
0, False, 0, page_number)
2020-04-04 10:05:27 +00:00
2022-05-31 15:16:55 +00:00
def create_media_timeline(base_dir: str, nickname: str, domain: str,
2021-12-31 15:16:53 +00:00
port: int, http_prefix: str, items_per_page: int,
header_only: bool, page_number: int) -> {}:
2022-05-31 14:08:50 +00:00
return _create_box_indexed({}, base_dir, 'tlmedia', nickname,
2021-12-29 21:55:09 +00:00
domain, port, http_prefix,
2021-12-31 15:16:53 +00:00
items_per_page, header_only, True,
0, False, 0, page_number)
2021-12-29 21:55:09 +00:00
2022-05-31 15:16:55 +00:00
def create_news_timeline(base_dir: str, domain: str,
2021-12-31 15:16:53 +00:00
port: int, http_prefix: str, items_per_page: int,
header_only: bool, newswire_votes_threshold: int,
2021-12-29 21:55:09 +00:00
positive_voting: bool, voting_time_mins: int,
2021-12-31 15:16:53 +00:00
page_number: int) -> {}:
2022-05-31 14:08:50 +00:00
return _create_box_indexed({}, base_dir, 'outbox', 'news',
2021-12-29 21:55:09 +00:00
domain, port, http_prefix,
2021-12-31 15:16:53 +00:00
items_per_page, header_only, True,
2021-12-29 21:55:09 +00:00
newswire_votes_threshold, positive_voting,
2021-12-31 15:16:53 +00:00
voting_time_mins, page_number)
2021-12-29 21:55:09 +00:00
2022-05-31 15:16:55 +00:00
def create_outbox(base_dir: str, nickname: str, domain: str,
2021-12-29 21:55:09 +00:00
port: int, http_prefix: str,
2021-12-31 15:16:53 +00:00
items_per_page: int, header_only: bool, authorized: bool,
page_number: int) -> {}:
2022-05-31 14:08:50 +00:00
return _create_box_indexed({}, base_dir, 'outbox',
2021-12-29 21:55:09 +00:00
nickname, domain, port, http_prefix,
2021-12-31 15:16:53 +00:00
items_per_page, header_only, authorized,
0, False, 0, page_number)
2021-12-29 21:55:09 +00:00
def create_moderation(base_dir: str, nickname: str, domain: str, port: int,
2021-12-31 15:16:53 +00:00
http_prefix: str, items_per_page: int, header_only: bool,
page_number: int) -> {}:
box_dir = create_person_dir(nickname, domain, base_dir, 'inbox')
2020-04-04 10:05:27 +00:00
boxname = 'moderation'
2019-08-12 13:22:17 +00:00
2021-12-26 12:45:03 +00:00
domain = get_full_domain(domain, port)
2019-08-12 13:22:17 +00:00
2021-12-31 15:16:53 +00:00
if not page_number:
page_number = 1
2019-11-16 22:09:54 +00:00
2021-12-31 15:16:53 +00:00
page_str = '?page=' + str(page_number)
box_url = local_actor_url(http_prefix, nickname, domain) + '/' + boxname
box_header = {
2020-03-22 20:36:19 +00:00
'@context': 'https://www.w3.org/ns/activitystreams',
2021-12-31 15:16:53 +00:00
'first': box_url + '?page=true',
'id': box_url,
'last': box_url + '?page=true',
2020-03-22 20:36:19 +00:00
'totalItems': 0,
'type': 'OrderedCollection'
}
2021-12-31 17:38:22 +00:00
box_items = {
2020-03-22 20:36:19 +00:00
'@context': 'https://www.w3.org/ns/activitystreams',
2021-12-31 15:16:53 +00:00
'id': box_url + page_str,
2020-03-22 20:36:19 +00:00
'orderedItems': [
],
2021-12-31 15:16:53 +00:00
'partOf': box_url,
2020-03-22 20:36:19 +00:00
'type': 'OrderedCollectionPage'
}
2019-08-12 13:22:17 +00:00
2021-12-28 19:33:29 +00:00
if is_moderator(base_dir, nickname):
2021-12-31 15:16:53 +00:00
moderation_index_file = base_dir + '/accounts/moderation.txt'
if os.path.isfile(moderation_index_file):
2022-06-09 14:46:30 +00:00
with open(moderation_index_file, 'r',
encoding='utf-8') as index_file:
2021-12-31 15:16:53 +00:00
lines = index_file.readlines()
box_header['totalItems'] = len(lines)
if header_only:
return box_header
page_lines = []
2020-04-04 10:05:27 +00:00
if len(lines) > 0:
2021-12-31 15:16:53 +00:00
end_line_number = \
len(lines) - 1 - int(items_per_page * page_number)
2022-05-31 15:16:55 +00:00
end_line_number = max(end_line_number, 0)
2021-12-31 15:16:53 +00:00
start_line_number = \
len(lines) - 1 - int(items_per_page * (page_number - 1))
2022-05-31 15:16:55 +00:00
start_line_number = max(start_line_number, 0)
2021-12-31 15:16:53 +00:00
line_number = start_line_number
while line_number >= end_line_number:
line_no_str = lines[line_number].strip('\n').strip('\r')
page_lines.append(line_no_str)
line_number -= 1
for post_url in page_lines:
2021-12-26 23:41:34 +00:00
post_filename = \
2021-12-31 15:16:53 +00:00
box_dir + '/' + post_url.replace('/', '#') + '.json'
2021-12-26 23:41:34 +00:00
if os.path.isfile(post_filename):
post_json_object = load_json(post_filename)
2021-12-25 22:09:19 +00:00
if post_json_object:
2021-12-31 17:38:22 +00:00
box_items['orderedItems'].append(post_json_object)
2021-12-31 15:16:53 +00:00
if header_only:
return box_header
2021-12-31 17:38:22 +00:00
return box_items
2019-08-12 13:22:17 +00:00
2020-04-04 10:05:27 +00:00
2021-12-29 21:55:09 +00:00
def is_image_media(session, base_dir: str, http_prefix: str,
nickname: str, domain: str,
2022-05-31 15:16:55 +00:00
post_json_object: {},
2021-12-29 21:55:09 +00:00
yt_replace_domain: str,
twitter_replacement_domain: str,
allow_local_network_access: bool,
recent_posts_cache: {}, debug: bool,
system_language: str,
domain_full: str, person_cache: {},
2022-03-24 14:40:28 +00:00
signing_priv_key_pem: str,
bold_reading: bool) -> bool:
2019-09-28 11:29:42 +00:00
"""Returns true if the given post has attached image media
"""
2021-12-25 22:09:19 +00:00
if post_json_object['type'] == 'Announce':
2021-12-31 15:16:53 +00:00
blocked_cache = {}
2021-12-31 17:38:22 +00:00
post_json_announce = \
2021-12-29 21:55:09 +00:00
download_announce(session, base_dir, http_prefix,
nickname, domain, post_json_object,
2022-05-31 13:52:46 +00:00
__version__,
2021-12-29 21:55:09 +00:00
yt_replace_domain,
twitter_replacement_domain,
allow_local_network_access,
recent_posts_cache, debug,
system_language,
domain_full, person_cache,
signing_priv_key_pem,
2022-03-24 14:40:28 +00:00
blocked_cache, bold_reading)
2021-12-31 17:38:22 +00:00
if post_json_announce:
post_json_object = post_json_announce
2021-12-25 22:09:19 +00:00
if post_json_object['type'] != 'Create':
2019-09-28 11:29:42 +00:00
return False
2021-12-26 10:57:03 +00:00
if not has_object_dict(post_json_object):
2019-09-28 11:29:42 +00:00
return False
2021-12-25 22:09:19 +00:00
if post_json_object['object'].get('moderationStatus'):
2019-11-16 22:20:16 +00:00
return False
2021-12-25 22:09:19 +00:00
if post_json_object['object']['type'] != 'Note' and \
post_json_object['object']['type'] != 'Page' and \
post_json_object['object']['type'] != 'Event' and \
post_json_object['object']['type'] != 'ChatMessage' and \
2021-12-25 22:09:19 +00:00
post_json_object['object']['type'] != 'Article':
2019-09-28 11:29:42 +00:00
return False
2021-12-25 22:09:19 +00:00
if not post_json_object['object'].get('attachment'):
2019-09-28 11:29:42 +00:00
return False
2021-12-25 22:09:19 +00:00
if not isinstance(post_json_object['object']['attachment'], list):
2019-09-28 11:29:42 +00:00
return False
2021-12-25 22:09:19 +00:00
for attach in post_json_object['object']['attachment']:
2019-09-28 11:29:42 +00:00
if attach.get('mediaType') and attach.get('url'):
if attach['mediaType'].startswith('image/') or \
attach['mediaType'].startswith('audio/') or \
attach['mediaType'].startswith('video/'):
2019-09-28 11:29:42 +00:00
return True
return False
2020-04-04 10:05:27 +00:00
2021-12-31 15:16:53 +00:00
def _add_post_string_to_timeline(post_str: str, boxname: str,
posts_in_box: [], box_actor: str) -> bool:
2019-11-18 11:28:17 +00:00
""" is this a valid timeline post?
"""
2020-05-03 13:18:35 +00:00
# must be a recognized ActivityPub type
2021-12-31 15:16:53 +00:00
if ('"Note"' in post_str or
'"EncryptedMessage"' in post_str or
'"ChatMessage"' in post_str or
2021-12-31 15:16:53 +00:00
'"Event"' in post_str or
'"Article"' in post_str or
'"Patch"' in post_str or
'"Announce"' in post_str or
('"Question"' in post_str and
('"Create"' in post_str or '"Update"' in post_str))):
2019-11-18 11:28:17 +00:00
2020-04-04 10:05:27 +00:00
if boxname == 'dm':
2021-12-31 15:16:53 +00:00
if '#Public' in post_str or '/followers' in post_str:
return False
2020-04-04 10:05:27 +00:00
elif boxname == 'tlreplies':
2021-12-31 15:16:53 +00:00
if box_actor not in post_str:
return False
2021-12-31 17:38:22 +00:00
elif boxname in ('tlblogs', 'tlnews', 'tlfeatures'):
2021-12-31 15:16:53 +00:00
if '"Create"' not in post_str:
2020-02-24 14:39:25 +00:00
return False
2021-12-31 15:16:53 +00:00
if '"Article"' not in post_str:
2020-02-24 14:39:25 +00:00
return False
2020-04-04 10:05:27 +00:00
elif boxname == 'tlmedia':
2021-12-31 15:16:53 +00:00
if '"Create"' in post_str:
if ('mediaType' not in post_str or
('image/' not in post_str and
'video/' not in post_str and
'audio/' not in post_str)):
2019-11-18 11:28:17 +00:00
return False
# add the post to the dictionary
2021-12-31 15:16:53 +00:00
posts_in_box.append(post_str)
return True
return False
2020-04-04 10:05:27 +00:00
2021-12-31 15:16:53 +00:00
def _add_post_to_timeline(file_path: str, boxname: str,
posts_in_box: [], box_actor: str) -> bool:
""" Reads a post from file and decides whether it is valid
"""
2022-06-09 14:46:30 +00:00
with open(file_path, 'r', encoding='utf-8') as post_file:
2021-12-31 15:16:53 +00:00
post_str = post_file.read()
2021-12-31 15:16:53 +00:00
if file_path.endswith('.json'):
2021-12-31 17:38:22 +00:00
replies_filename = file_path.replace('.json', '.replies')
if os.path.isfile(replies_filename):
2020-11-28 19:39:37 +00:00
# append a replies identifier, which will later be removed
2021-12-31 15:16:53 +00:00
post_str += '<hasReplies>'
2020-11-28 19:39:37 +00:00
mitm_filename = file_path.replace('.json', '.mitm')
if os.path.isfile(mitm_filename):
# append a mitm identifier, which will later be removed
post_str += '<postmitm>'
2021-12-31 15:16:53 +00:00
return _add_post_string_to_timeline(post_str, boxname, posts_in_box,
box_actor)
2019-11-18 11:28:17 +00:00
return False
2020-04-04 10:05:27 +00:00
2021-12-28 19:33:29 +00:00
def remove_post_interactions(post_json_object: {}, force: bool) -> bool:
2021-04-30 09:48:39 +00:00
""" Don't show likes, replies, bookmarks, DMs or shares (announces) to
unauthorized viewers. This makes the timeline less useful to
marketers and other surveillance-oriented organizations.
Returns False if this is a private post
"""
2021-12-31 15:40:01 +00:00
has_object = False
2021-12-26 10:57:03 +00:00
if has_object_dict(post_json_object):
2021-12-31 15:40:01 +00:00
has_object = True
if has_object:
post_obj = post_json_object['object']
2021-04-30 11:45:46 +00:00
if not force:
# If not authorized and it's a private post
# then just don't show it within timelines
2021-12-28 14:41:10 +00:00
if not is_public_post(post_json_object):
2021-04-30 11:45:46 +00:00
return False
else:
2021-12-31 15:40:01 +00:00
post_obj = post_json_object
2021-04-30 11:45:46 +00:00
2021-04-30 09:48:39 +00:00
# clear the likes
2021-12-31 15:40:01 +00:00
if post_obj.get('likes'):
post_obj['likes'] = {
2021-04-30 11:45:46 +00:00
'items': []
}
2021-11-10 12:16:03 +00:00
# clear the reactions
2021-12-31 15:40:01 +00:00
if post_obj.get('reactions'):
post_obj['reactions'] = {
2021-11-10 12:16:03 +00:00
'items': []
}
2021-04-30 09:48:39 +00:00
# remove other collections
2021-12-31 15:16:53 +00:00
remove_collections = (
2021-04-30 09:48:39 +00:00
'replies', 'shares', 'bookmarks', 'ignores'
)
2021-12-31 15:16:53 +00:00
for remove_name in remove_collections:
2021-12-31 15:40:01 +00:00
if post_obj.get(remove_name):
post_obj[remove_name] = {}
2021-04-30 09:48:39 +00:00
return True
2021-12-29 21:55:09 +00:00
def _passed_newswire_voting(newswire_votes_threshold: int,
base_dir: str, domain: str,
post_filename: str,
positive_voting: bool,
voting_time_mins: int) -> bool:
2021-04-30 13:24:33 +00:00
"""Returns true if the post has passed through newswire voting
"""
# apply votes within this timeline
2021-12-25 20:17:35 +00:00
if newswire_votes_threshold <= 0:
2021-04-30 13:24:33 +00:00
return True
# note that the presence of an arrival file also indicates
# that this post is moderated
2021-12-31 15:16:53 +00:00
arrival_date = \
2021-12-27 22:46:10 +00:00
locate_news_arrival(base_dir, domain, post_filename)
2021-12-31 15:16:53 +00:00
if not arrival_date:
2021-04-30 13:24:33 +00:00
return True
# how long has elapsed since this post arrived?
2021-12-31 15:16:53 +00:00
curr_date = datetime.datetime.utcnow()
time_diff_mins = \
int((curr_date - arrival_date).total_seconds() / 60)
2021-04-30 13:24:33 +00:00
# has the voting time elapsed?
2021-12-31 15:16:53 +00:00
if time_diff_mins < voting_time_mins:
2021-04-30 13:24:33 +00:00
# voting is still happening, so don't add this
# post to the timeline
return False
# if there a votes file for this post?
2021-12-31 15:16:53 +00:00
votes_filename = \
2021-12-27 22:38:48 +00:00
locate_news_votes(base_dir, domain, post_filename)
2021-12-31 15:16:53 +00:00
if not votes_filename:
2021-04-30 13:24:33 +00:00
return True
# load the votes file and count the votes
2021-12-31 15:16:53 +00:00
votes_json = load_json(votes_filename, 0, 2)
if not votes_json:
2021-04-30 13:24:33 +00:00
return True
2021-12-25 20:14:45 +00:00
if not positive_voting:
2021-12-31 15:16:53 +00:00
if votes_on_newswire_item(votes_json) >= \
2021-12-25 20:17:35 +00:00
newswire_votes_threshold:
2021-04-30 13:24:33 +00:00
# Too many veto votes.
# Continue without incrementing
# the posts counter
return False
else:
2021-12-27 22:32:59 +00:00
if votes_on_newswire_item < \
2021-12-25 20:17:35 +00:00
newswire_votes_threshold:
2021-04-30 13:24:33 +00:00
# Not enough votes.
# Continue without incrementing
# the posts counter
return False
return True
2021-12-29 21:55:09 +00:00
def _create_box_indexed(recent_posts_cache: {},
2022-05-31 14:08:50 +00:00
base_dir: str, boxname: str,
2021-12-29 21:55:09 +00:00
nickname: str, domain: str, port: int,
http_prefix: str,
2021-12-31 15:16:53 +00:00
items_per_page: int, header_only: bool,
authorized: bool,
2021-12-29 21:55:09 +00:00
newswire_votes_threshold: int, positive_voting: bool,
2021-12-31 15:16:53 +00:00
voting_time_mins: int, page_number: int) -> {}:
2019-11-18 11:28:17 +00:00
"""Constructs the box feed for a person with the given nickname
"""
2021-12-31 15:16:53 +00:00
if not authorized or not page_number:
page_number = 1
2019-11-18 11:28:17 +00:00
2021-12-31 17:38:22 +00:00
if boxname not in ('inbox', 'dm', 'tlreplies', 'tlmedia',
'tlblogs', 'tlnews', 'tlfeatures', 'outbox',
'tlbookmarks', 'bookmarks'):
2021-08-01 16:17:36 +00:00
print('ERROR: invalid boxname ' + boxname)
2019-11-18 11:28:17 +00:00
return None
2020-08-23 11:13:35 +00:00
# bookmarks and events timelines are like the inbox
# but have their own separate index
2021-12-31 15:16:53 +00:00
index_box_name = boxname
timeline_nickname = nickname
2020-05-21 22:02:27 +00:00
if boxname == "tlbookmarks":
boxname = "bookmarks"
2021-12-31 15:16:53 +00:00
index_box_name = boxname
2020-11-27 14:17:00 +00:00
elif boxname == "tlfeatures":
boxname = "tlblogs"
2021-12-31 15:16:53 +00:00
index_box_name = boxname
timeline_nickname = 'news'
2019-11-18 11:28:17 +00:00
2021-12-31 15:16:53 +00:00
original_domain = domain
2021-12-26 12:45:03 +00:00
domain = get_full_domain(domain, port)
2019-11-18 11:28:17 +00:00
2021-12-31 15:16:53 +00:00
box_actor = local_actor_url(http_prefix, nickname, domain)
2020-01-19 20:19:56 +00:00
2021-12-31 15:16:53 +00:00
page_str = '?page=true'
if page_number:
2022-05-31 15:16:55 +00:00
page_number = max(page_number, 1)
2019-11-18 11:28:17 +00:00
try:
2021-12-31 15:16:53 +00:00
page_str = '?page=' + str(page_number)
2020-04-04 10:05:27 +00:00
except BaseException:
2021-12-29 21:55:09 +00:00
print('EX: _create_box_indexed ' +
2021-10-29 18:48:15 +00:00
'unable to convert page number to string')
2021-12-31 15:16:53 +00:00
box_url = local_actor_url(http_prefix, nickname, domain) + '/' + boxname
box_header = {
2020-03-22 20:36:19 +00:00
'@context': 'https://www.w3.org/ns/activitystreams',
2021-12-31 15:16:53 +00:00
'first': box_url + '?page=true',
'id': box_url,
'last': box_url + '?page=true',
2020-03-22 20:36:19 +00:00
'totalItems': 0,
'type': 'OrderedCollection'
}
2021-12-31 17:38:22 +00:00
box_items = {
2020-03-22 20:36:19 +00:00
'@context': 'https://www.w3.org/ns/activitystreams',
2021-12-31 15:16:53 +00:00
'id': box_url + page_str,
2020-03-22 20:36:19 +00:00
'orderedItems': [
],
2021-12-31 15:16:53 +00:00
'partOf': box_url,
2020-03-22 20:36:19 +00:00
'type': 'OrderedCollectionPage'
}
2019-11-18 11:28:17 +00:00
2021-12-31 15:16:53 +00:00
posts_in_box = []
post_urls_in_box = []
2019-11-18 11:28:17 +00:00
2021-12-31 15:16:53 +00:00
index_filename = \
acct_dir(base_dir, timeline_nickname, original_domain) + \
'/' + index_box_name + '.index'
total_posts_count = 0
posts_added_to_timeline = 0
if os.path.isfile(index_filename):
2022-06-09 14:46:30 +00:00
with open(index_filename, 'r', encoding='utf-8') as index_file:
2021-12-31 15:16:53 +00:00
posts_added_to_timeline = 0
while posts_added_to_timeline < items_per_page:
post_filename = index_file.readline()
2021-12-26 23:41:34 +00:00
if not post_filename:
2020-05-21 21:53:12 +00:00
break
2019-11-18 15:04:08 +00:00
2021-04-30 13:24:33 +00:00
# Has this post passed through the newswire voting stage?
2021-12-29 21:55:09 +00:00
if not _passed_newswire_voting(newswire_votes_threshold,
base_dir, domain,
post_filename,
positive_voting,
voting_time_mins):
2021-04-30 13:24:33 +00:00
continue
2020-10-08 19:47:23 +00:00
# Skip through any posts previous to the current page
2021-12-31 15:16:53 +00:00
if total_posts_count < int((page_number - 1) * items_per_page):
total_posts_count += 1
2019-11-18 11:28:17 +00:00
continue
2019-11-18 12:54:41 +00:00
# if this is a full path then remove the directories
2021-12-26 23:41:34 +00:00
if '/' in post_filename:
post_filename = post_filename.split('/')[-1]
2019-11-18 12:54:41 +00:00
2019-11-18 11:28:17 +00:00
# filename of the post without any extension or path
2020-04-04 10:05:27 +00:00
# This should also correspond to any index entry in
# the posts cache
2022-06-21 11:58:50 +00:00
post_url = remove_eol(post_filename)
2021-12-31 15:16:53 +00:00
post_url = post_url.replace('.json', '').strip()
2021-12-31 15:16:53 +00:00
if post_url in post_urls_in_box:
2021-09-06 08:48:58 +00:00
continue
2019-11-25 10:10:59 +00:00
# is the post cached in memory?
2021-12-26 20:01:37 +00:00
if recent_posts_cache.get('index'):
2021-12-31 15:16:53 +00:00
if post_url in recent_posts_cache['index']:
if recent_posts_cache['json'].get(post_url):
url = recent_posts_cache['json'][post_url]
2021-12-29 21:55:09 +00:00
if _add_post_string_to_timeline(url,
boxname,
2021-12-31 15:16:53 +00:00
posts_in_box,
box_actor):
total_posts_count += 1
posts_added_to_timeline += 1
post_urls_in_box.append(post_url)
2021-09-06 08:50:44 +00:00
continue
2021-12-31 17:38:22 +00:00
print('Post not added to timeline')
2020-05-21 19:28:09 +00:00
# read the post from file
2021-12-31 15:16:53 +00:00
full_post_filename = \
2021-12-26 20:36:08 +00:00
locate_post(base_dir, nickname,
2021-12-31 15:16:53 +00:00
original_domain, post_url, False)
if full_post_filename:
2021-03-05 19:29:09 +00:00
# has the post been rejected?
2021-12-31 15:16:53 +00:00
if os.path.isfile(full_post_filename + '.reject'):
2021-03-05 19:29:09 +00:00
continue
2021-03-05 19:37:58 +00:00
2021-12-31 15:16:53 +00:00
if _add_post_to_timeline(full_post_filename, boxname,
posts_in_box, box_actor):
posts_added_to_timeline += 1
total_posts_count += 1
post_urls_in_box.append(post_url)
2021-04-30 09:24:56 +00:00
else:
2021-12-31 15:16:53 +00:00
print('WARN: Unable to add post ' + post_url +
2021-04-30 09:24:56 +00:00
' nickname ' + nickname +
' timeline ' + boxname)
2020-05-21 19:28:09 +00:00
else:
2021-12-31 15:16:53 +00:00
if timeline_nickname != nickname:
2021-02-17 14:01:45 +00:00
# if this is the features timeline
2021-12-31 15:16:53 +00:00
full_post_filename = \
locate_post(base_dir, timeline_nickname,
original_domain, post_url, False)
if full_post_filename:
if _add_post_to_timeline(full_post_filename,
boxname,
posts_in_box, box_actor):
posts_added_to_timeline += 1
total_posts_count += 1
post_urls_in_box.append(post_url)
2021-04-30 09:24:56 +00:00
else:
print('WARN: Unable to add features post ' +
2021-12-31 15:16:53 +00:00
post_url + ' nickname ' + nickname +
2021-04-30 09:24:56 +00:00
' timeline ' + boxname)
2020-11-28 13:04:30 +00:00
else:
2021-02-17 14:01:45 +00:00
print('WARN: features timeline. ' +
2021-12-31 15:16:53 +00:00
'Unable to locate post ' + post_url)
2020-11-28 13:04:30 +00:00
else:
2022-02-25 13:42:36 +00:00
if timeline_nickname == 'news':
print('WARN: Unable to locate news post ' +
post_url + ' nickname ' + nickname)
else:
print('WARN: Unable to locate post ' + post_url +
' nickname ' + nickname)
2021-12-31 15:16:53 +00:00
if total_posts_count < 3:
print('Posts added to json timeline ' + boxname + ': ' +
2021-12-31 15:16:53 +00:00
str(posts_added_to_timeline))
2021-03-05 15:59:39 +00:00
2019-11-18 11:28:17 +00:00
# Generate first and last entries within header
2021-12-31 15:16:53 +00:00
if total_posts_count > 0:
last_page = int(total_posts_count / items_per_page)
2022-05-31 15:16:55 +00:00
last_page = max(last_page, 1)
2021-12-31 15:16:53 +00:00
box_header['last'] = \
2021-12-26 10:19:59 +00:00
local_actor_url(http_prefix, nickname, domain) + \
2021-12-31 15:16:53 +00:00
'/' + boxname + '?page=' + str(last_page)
if header_only:
box_header['totalItems'] = len(posts_in_box)
prev_page_str = 'true'
if page_number > 1:
prev_page_str = str(page_number - 1)
box_header['prev'] = \
2021-12-26 10:19:59 +00:00
local_actor_url(http_prefix, nickname, domain) + \
2021-12-31 15:16:53 +00:00
'/' + boxname + '?page=' + prev_page_str
2020-04-04 10:05:27 +00:00
2021-12-31 15:16:53 +00:00
next_page_str = str(page_number + 1)
box_header['next'] = \
2021-12-26 10:19:59 +00:00
local_actor_url(http_prefix, nickname, domain) + \
2021-12-31 15:16:53 +00:00
'/' + boxname + '?page=' + next_page_str
return box_header
2019-11-18 11:28:17 +00:00
2021-12-31 15:16:53 +00:00
for post_str in posts_in_box:
2020-11-28 19:39:37 +00:00
# Check if the post has replies
2021-12-31 15:16:53 +00:00
has_replies = False
if post_str.endswith('<hasReplies>'):
has_replies = True
2020-11-28 19:39:37 +00:00
# remove the replies identifier
2021-12-31 15:16:53 +00:00
post_str = post_str.replace('<hasReplies>', '')
2020-11-28 19:39:37 +00:00
# Check if the post was delivered via a third party
mitm = False
if post_str.endswith('<postmitm>'):
mitm = True
# remove the mitm identifier
post_str = post_str.replace('<postmitm>', '')
2021-12-31 17:38:22 +00:00
pst = None
2019-11-18 11:28:17 +00:00
try:
2021-12-31 17:38:22 +00:00
pst = json.loads(post_str)
2020-04-04 10:05:27 +00:00
except BaseException:
2021-12-31 15:16:53 +00:00
print('EX: _create_box_indexed unable to load json ' + post_str)
2019-11-18 11:28:17 +00:00
continue
2020-11-28 19:39:37 +00:00
# Does this post have replies?
# This will be used to indicate that replies exist within the html
2021-12-29 21:55:09 +00:00
# created by individual_post_as_html
2021-12-31 17:38:22 +00:00
pst['hasReplies'] = has_replies
2020-11-28 19:39:37 +00:00
# was the post delivered via a third party?
pst['mitm'] = mitm
2021-04-30 11:45:46 +00:00
if not authorized:
2021-12-31 17:38:22 +00:00
if not remove_post_interactions(pst, False):
2021-04-30 11:45:46 +00:00
continue
2019-11-18 11:28:17 +00:00
2021-12-31 17:38:22 +00:00
box_items['orderedItems'].append(pst)
2019-11-18 11:28:17 +00:00
2021-12-31 17:38:22 +00:00
return box_items
2019-11-18 11:28:17 +00:00
2020-04-04 10:05:27 +00:00
2021-12-28 19:33:29 +00:00
def expire_cache(base_dir: str, person_cache: {},
http_prefix: str, archive_dir: str,
recent_posts_cache: {},
max_posts_in_box: int):
2019-08-20 11:51:29 +00:00
"""Thread used to expire actors from the cache and archive old posts
"""
while True:
# once per day
2020-04-04 10:05:27 +00:00
time.sleep(60 * 60 * 24)
2021-12-29 21:55:09 +00:00
expire_person_cache(person_cache)
archive_posts(base_dir, http_prefix, archive_dir, recent_posts_cache,
2021-12-31 15:16:53 +00:00
max_posts_in_box)
2020-04-04 10:05:27 +00:00
2019-08-20 11:51:29 +00:00
2021-12-29 21:55:09 +00:00
def archive_posts(base_dir: str, http_prefix: str, archive_dir: str,
recent_posts_cache: {},
2021-12-31 15:16:53 +00:00
max_posts_in_box=32000) -> None:
2019-07-12 20:43:55 +00:00
"""Archives posts for all accounts
"""
2021-12-31 15:16:53 +00:00
if max_posts_in_box == 0:
2020-12-08 14:09:54 +00:00
return
2021-12-25 23:41:17 +00:00
if archive_dir:
if not os.path.isdir(archive_dir):
os.mkdir(archive_dir)
2020-12-08 14:09:54 +00:00
2021-12-25 23:41:17 +00:00
if archive_dir:
if not os.path.isdir(archive_dir + '/accounts'):
os.mkdir(archive_dir + '/accounts')
2019-07-12 20:43:55 +00:00
2021-12-31 17:38:22 +00:00
for _, dirs, _ in os.walk(base_dir + '/accounts'):
2019-07-12 20:43:55 +00:00
for handle in dirs:
if '@' in handle:
2020-04-04 10:05:27 +00:00
nickname = handle.split('@')[0]
domain = handle.split('@')[1]
2021-12-31 15:16:53 +00:00
archive_subdir = None
2021-12-25 23:41:17 +00:00
if archive_dir:
if not os.path.isdir(archive_dir + '/accounts/' + handle):
os.mkdir(archive_dir + '/accounts/' + handle)
if not os.path.isdir(archive_dir + '/accounts/' +
2020-04-04 10:05:27 +00:00
handle + '/inbox'):
2021-12-25 23:41:17 +00:00
os.mkdir(archive_dir + '/accounts/' +
2020-04-04 10:05:27 +00:00
handle + '/inbox')
2021-12-25 23:41:17 +00:00
if not os.path.isdir(archive_dir + '/accounts/' +
2020-04-04 10:05:27 +00:00
handle + '/outbox'):
2021-12-25 23:41:17 +00:00
os.mkdir(archive_dir + '/accounts/' +
2020-04-04 10:05:27 +00:00
handle + '/outbox')
2021-12-31 15:16:53 +00:00
archive_subdir = archive_dir + '/accounts/' + \
2020-04-04 10:05:27 +00:00
handle + '/inbox'
2021-12-29 21:55:09 +00:00
archive_posts_for_person(http_prefix,
nickname, domain, base_dir,
2021-12-31 15:16:53 +00:00
'inbox', archive_subdir,
recent_posts_cache, max_posts_in_box)
2021-12-25 23:41:17 +00:00
if archive_dir:
2021-12-31 15:16:53 +00:00
archive_subdir = archive_dir + '/accounts/' + \
2020-04-04 10:05:27 +00:00
handle + '/outbox'
2021-12-29 21:55:09 +00:00
archive_posts_for_person(http_prefix,
nickname, domain, base_dir,
2021-12-31 15:16:53 +00:00
'outbox', archive_subdir,
recent_posts_cache, max_posts_in_box)
2020-12-13 22:13:45 +00:00
break
2019-07-12 20:43:55 +00:00
2020-04-04 10:05:27 +00:00
2022-08-10 15:05:28 +00:00
def _expire_posts_for_person(http_prefix: str, nickname: str, domain: str,
base_dir: str, recent_posts_cache: {},
2022-08-10 19:54:01 +00:00
max_age_days: int, debug: bool,
keep_dms: bool) -> int:
2022-08-10 15:05:28 +00:00
"""Removes posts older than some number of days
"""
expired_post_count = 0
if max_age_days <= 0:
return expired_post_count
boxname = 'outbox'
box_dir = create_person_dir(nickname, domain, base_dir, boxname)
posts_in_box = os.scandir(box_dir)
for post_filename in posts_in_box:
post_filename = post_filename.name
if not post_filename.endswith('.json'):
continue
# Time of file creation
full_filename = os.path.join(box_dir, post_filename)
if not os.path.isfile(full_filename):
continue
content = ''
try:
with open(full_filename, 'r', encoding='utf-8') as fp_content:
content = fp_content.read()
except OSError:
print('EX: expire_posts_for_person unable to open content ' +
full_filename)
if '"published":' not in content:
continue
published_str = content.split('"published":')[1]
if '"' not in published_str:
continue
published_str = published_str.split('"')[1]
if not published_str.endswith('Z'):
continue
# get time difference
if not valid_post_date(published_str, max_age_days, debug):
2022-08-10 19:54:01 +00:00
if keep_dms:
post_json_object = load_json(full_filename)
if not post_json_object:
continue
if is_dm(post_json_object):
continue
2022-08-10 15:05:28 +00:00
delete_post(base_dir, http_prefix, nickname, domain,
full_filename, debug, recent_posts_cache, True)
expired_post_count += 1
return expired_post_count
def get_post_expiry_keep_dms(base_dir: str, nickname: str, domain: str) -> int:
"""Returns true if dms should expire
"""
keep_dms = True
handle = nickname + '@' + domain
expire_dms_filename = \
base_dir + '/accounts/' + handle + '/.expire_posts_dms'
if os.path.isfile(expire_dms_filename):
keep_dms = False
return keep_dms
def set_post_expiry_keep_dms(base_dir: str, nickname: str, domain: str,
keep_dms: bool) -> None:
"""Sets whether to keep DMs during post expiry for an account
"""
handle = nickname + '@' + domain
expire_dms_filename = \
base_dir + '/accounts/' + handle + '/.expire_posts_dms'
if keep_dms:
if os.path.isfile(expire_dms_filename):
try:
os.remove(expire_dms_filename)
except OSError:
print('EX: unable to write set_post_expiry_keep_dms False ' +
expire_dms_filename)
return
try:
with open(expire_dms_filename, 'w+', encoding='utf-8') as fp_expire:
fp_expire.write('\n')
except OSError:
print('EX: unable to write set_post_expiry_keep_dms True ' +
expire_dms_filename)
2022-08-10 15:05:28 +00:00
def expire_posts(base_dir: str, http_prefix: str,
recent_posts_cache: {}, debug: bool) -> int:
"""Expires posts for instance accounts
"""
expired_post_count = 0
for _, dirs, _ in os.walk(base_dir + '/accounts'):
for handle in dirs:
if '@' not in handle:
continue
nickname = handle.split('@')[0]
domain = handle.split('@')[1]
expire_posts_filename = \
base_dir + '/accounts/' + handle + '/.expire_posts_days'
if not os.path.isfile(expire_posts_filename):
continue
keep_dms = get_post_expiry_keep_dms(base_dir, nickname, domain)
2022-08-10 15:05:28 +00:00
expire_days_str = None
try:
with open(expire_posts_filename, 'r',
encoding='utf-8') as fp_expire:
expire_days_str = fp_expire.read()
except OSError:
print('EX: expire_posts failed to read days file ' +
expire_posts_filename)
continue
if not expire_days_str:
continue
if not expire_days_str.isdigit():
continue
max_age_days = int(expire_days_str)
if max_age_days <= 0:
continue
expired_post_count += \
_expire_posts_for_person(http_prefix,
nickname, domain, base_dir,
recent_posts_cache,
2022-08-10 19:54:01 +00:00
max_age_days, debug,
keep_dms)
2022-08-10 15:05:28 +00:00
break
return expired_post_count
def get_post_expiry_days(base_dir: str, nickname: str, domain: str) -> int:
2022-08-10 18:41:45 +00:00
"""Returns the post expiry period for the given account
"""
handle = nickname + '@' + domain
expire_posts_filename = \
base_dir + '/accounts/' + handle + '/.expire_posts_days'
if not os.path.isfile(expire_posts_filename):
return 0
days_str = None
try:
with open(expire_posts_filename, 'r', encoding='utf-8') as fp_expire:
days_str = fp_expire.read()
except OSError:
print('EX: unable to write post expire days ' +
expire_posts_filename)
if not days_str:
return 0
if not days_str.isdigit():
return 0
return int(days_str)
2022-08-10 15:05:28 +00:00
def set_post_expiry_days(base_dir: str, nickname: str, domain: str,
max_age_days: int) -> None:
"""Sets the number of days after which posts from an account will expire
"""
handle = nickname + '@' + domain
expire_posts_filename = \
base_dir + '/accounts/' + handle + '/.expire_posts_days'
try:
with open(expire_posts_filename, 'w+', encoding='utf-8') as fp_expire:
fp_expire.write(str(max_age_days))
except OSError:
print('EX: unable to write post expire days ' +
expire_posts_filename)
2021-12-29 21:55:09 +00:00
def archive_posts_for_person(http_prefix: str, nickname: str, domain: str,
base_dir: str,
boxname: str, archive_dir: str,
recent_posts_cache: {},
2021-12-31 15:16:53 +00:00
max_posts_in_box=32000) -> None:
2019-07-04 16:24:23 +00:00
"""Retain a maximum number of posts within the given box
2019-06-29 13:44:21 +00:00
Move any others to an archive directory
"""
2021-12-31 17:38:22 +00:00
if boxname not in ('inbox', 'outbox'):
2019-07-04 16:24:23 +00:00
return
2021-12-25 23:41:17 +00:00
if archive_dir:
if not os.path.isdir(archive_dir):
os.mkdir(archive_dir)
2021-12-31 15:16:53 +00:00
box_dir = create_person_dir(nickname, domain, base_dir, boxname)
posts_in_box = os.scandir(box_dir)
no_of_posts = 0
2021-12-31 17:38:22 +00:00
for _ in posts_in_box:
2021-12-31 15:16:53 +00:00
no_of_posts += 1
if no_of_posts <= max_posts_in_box:
print('Checked ' + str(no_of_posts) + ' ' + boxname +
2020-04-04 10:05:27 +00:00
' posts for ' + nickname + '@' + domain)
2019-06-29 13:44:21 +00:00
return
2019-10-20 11:18:25 +00:00
# remove entries from the index
2020-04-04 10:05:27 +00:00
handle = nickname + '@' + domain
2021-12-31 15:16:53 +00:00
index_filename = \
base_dir + '/accounts/' + handle + '/' + boxname + '.index'
if os.path.isfile(index_filename):
index_ctr = 0
2019-10-20 11:18:25 +00:00
# get the existing index entries as a string
2021-12-31 17:38:22 +00:00
new_index = ''
2022-06-09 14:46:30 +00:00
with open(index_filename, 'r', encoding='utf-8') as index_file:
2021-12-31 15:16:53 +00:00
for post_id in index_file:
2021-12-31 17:38:22 +00:00
new_index += post_id
2021-12-31 15:16:53 +00:00
index_ctr += 1
if index_ctr >= max_posts_in_box:
2019-10-20 11:18:25 +00:00
break
# save the new index file
2021-12-31 17:38:22 +00:00
if len(new_index) > 0:
2022-06-09 14:46:30 +00:00
with open(index_filename, 'w+', encoding='utf-8') as index_file:
2021-12-31 17:38:22 +00:00
index_file.write(new_index)
2019-10-20 11:18:25 +00:00
2021-12-31 15:16:53 +00:00
posts_in_box_dict = {}
posts_ctr = 0
posts_in_box = os.scandir(box_dir)
for post_filename in posts_in_box:
2021-12-26 23:41:34 +00:00
post_filename = post_filename.name
if not post_filename.endswith('.json'):
continue
# Time of file creation
2021-12-31 17:38:22 +00:00
full_filename = os.path.join(box_dir, post_filename)
if os.path.isfile(full_filename):
content = ''
try:
with open(full_filename, 'r', encoding='utf-8') as fp_content:
content = fp_content.read()
except OSError:
print('EX: unable to open content ' + full_filename)
2019-11-06 14:50:17 +00:00
if '"published":' in content:
2021-12-31 15:16:53 +00:00
published_str = content.split('"published":')[1]
if '"' in published_str:
published_str = published_str.split('"')[1]
if published_str.endswith('Z'):
posts_in_box_dict[published_str] = post_filename
posts_ctr += 1
no_of_posts = posts_ctr
if no_of_posts <= max_posts_in_box:
print('Checked ' + str(no_of_posts) + ' ' + boxname +
2020-04-04 10:05:27 +00:00
' posts for ' + nickname + '@' + domain)
return
2019-11-06 14:50:17 +00:00
# sort the list in ascending order of date
2021-12-31 15:16:53 +00:00
posts_in_box_sorted = \
OrderedDict(sorted(posts_in_box_dict.items(), reverse=False))
2019-09-14 17:12:03 +00:00
2019-10-19 10:10:52 +00:00
# directory containing cached html posts
2021-12-31 15:16:53 +00:00
post_cache_dir = box_dir.replace('/' + boxname, '/postcache')
2019-10-19 10:10:52 +00:00
2021-12-31 15:16:53 +00:00
remove_ctr = 0
for published_str, post_filename in posts_in_box_sorted.items():
file_path = os.path.join(box_dir, post_filename)
if not os.path.isfile(file_path):
2019-09-24 21:16:44 +00:00
continue
2021-12-25 23:41:17 +00:00
if archive_dir:
2021-12-31 15:40:01 +00:00
archive_path = os.path.join(archive_dir, post_filename)
os.rename(file_path, archive_path)
2020-10-21 10:39:09 +00:00
extensions = ('replies', 'votes', 'arrived', 'muted')
for ext in extensions:
2021-12-31 15:16:53 +00:00
ext_path = file_path.replace('.json', '.' + ext)
if os.path.isfile(ext_path):
os.rename(ext_path,
2021-12-31 15:40:01 +00:00
archive_path.replace('.json', '.' + ext))
2020-10-21 10:39:09 +00:00
else:
2021-12-31 15:16:53 +00:00
ext_path = file_path.replace('.json',
'.json.' + ext)
if os.path.isfile(ext_path):
os.rename(ext_path,
2021-12-31 15:40:01 +00:00
archive_path.replace('.json',
'.json.' + ext))
2019-09-24 21:16:44 +00:00
else:
2021-12-28 14:55:45 +00:00
delete_post(base_dir, http_prefix, nickname, domain,
file_path, False, recent_posts_cache, False)
2019-10-19 10:10:52 +00:00
# remove cached html posts
2021-12-31 15:16:53 +00:00
post_cache_filename = \
os.path.join(post_cache_dir, post_filename)
post_cache_filename = post_cache_filename.replace('.json', '.html')
if os.path.isfile(post_cache_filename):
try:
2021-12-31 15:16:53 +00:00
os.remove(post_cache_filename)
2021-11-25 18:42:38 +00:00
except OSError:
2021-12-29 21:55:09 +00:00
print('EX: archive_posts_for_person unable to delete ' +
2021-12-31 15:16:53 +00:00
post_cache_filename)
2019-10-19 10:10:52 +00:00
2021-12-31 15:16:53 +00:00
no_of_posts -= 1
remove_ctr += 1
if no_of_posts <= max_posts_in_box:
2019-09-24 21:16:44 +00:00
break
2021-12-25 23:41:17 +00:00
if archive_dir:
2021-12-31 15:16:53 +00:00
print('Archived ' + str(remove_ctr) + ' ' + boxname +
2020-04-04 10:05:27 +00:00
' posts for ' + nickname + '@' + domain)
2020-02-26 20:39:18 +00:00
else:
2021-12-31 15:16:53 +00:00
print('Removed ' + str(remove_ctr) + ' ' + boxname +
2020-04-04 10:05:27 +00:00
' posts for ' + nickname + '@' + domain)
2021-12-31 15:16:53 +00:00
print(nickname + '@' + domain + ' has ' + str(no_of_posts) +
2020-04-04 10:05:27 +00:00
' in ' + boxname)
2019-07-03 10:31:02 +00:00
2021-12-29 21:55:09 +00:00
def get_public_posts_of_person(base_dir: str, nickname: str, domain: str,
raw: bool, simple: bool, proxy_type: str,
port: int, http_prefix: str,
debug: bool, project_version: str,
system_language: str,
signing_priv_key_pem: str,
2021-12-31 15:16:53 +00:00
origin_domain: str) -> None:
2019-07-03 10:31:02 +00:00
""" This is really just for test purposes
"""
2021-09-15 11:09:16 +00:00
if debug:
2021-12-25 23:03:28 +00:00
if signing_priv_key_pem:
2021-09-15 11:09:16 +00:00
print('Signing key available')
else:
print('Signing key missing')
2020-06-24 09:04:58 +00:00
print('Starting new session for getting public posts')
2021-12-28 16:56:57 +00:00
session = create_session(proxy_type)
if not session:
2021-08-01 13:44:27 +00:00
if debug:
print('Session was not created')
return
2021-12-25 22:17:49 +00:00
person_cache = {}
2021-12-25 22:28:18 +00:00
cached_webfingers = {}
2021-12-25 23:45:30 +00:00
federation_list = []
2021-12-26 00:07:44 +00:00
group_account = False
2021-08-02 20:43:53 +00:00
if nickname.startswith('!'):
nickname = nickname[1:]
2021-12-26 00:07:44 +00:00
group_account = True
2021-12-26 12:45:03 +00:00
domain_full = get_full_domain(domain, port)
2021-12-26 10:00:46 +00:00
handle = http_prefix + "://" + domain_full + "/@" + nickname
2021-08-02 20:43:53 +00:00
2021-12-31 15:16:53 +00:00
wf_request = \
2021-12-29 21:55:09 +00:00
webfinger_handle(session, handle, http_prefix, cached_webfingers,
2021-12-31 15:16:53 +00:00
origin_domain, project_version, debug, group_account,
2021-12-29 21:55:09 +00:00
signing_priv_key_pem)
2021-12-31 15:16:53 +00:00
if not wf_request:
2021-08-01 13:44:27 +00:00
if debug:
print('No webfinger result was returned for ' + handle)
2019-07-03 10:31:02 +00:00
sys.exit()
2021-12-31 15:16:53 +00:00
if not isinstance(wf_request, dict):
2020-06-23 10:41:12 +00:00
print('Webfinger for ' + handle + ' did not return a dict. ' +
2021-12-31 15:16:53 +00:00
str(wf_request))
2020-06-23 10:41:12 +00:00
sys.exit()
2019-07-03 10:31:02 +00:00
2021-08-01 13:44:27 +00:00
if debug:
print('Getting the outbox for ' + handle)
2021-12-31 17:38:22 +00:00
(person_url, _, _, person_id, _, _,
_, _) = get_person_box(signing_priv_key_pem,
origin_domain,
base_dir, session, wf_request,
person_cache,
project_version, http_prefix,
nickname, domain, 'outbox',
62524)
2021-08-01 13:44:27 +00:00
if debug:
2021-12-31 17:38:22 +00:00
print('Actor url: ' + str(person_id))
if not person_id:
return
2021-08-01 13:44:27 +00:00
2021-12-25 21:02:44 +00:00
max_mentions = 10
2021-12-25 21:04:51 +00:00
max_emoji = 10
2021-12-31 15:16:53 +00:00
max_attachments = 5
_get_posts(session, person_url, 30, max_mentions, max_emoji,
2022-05-31 13:45:18 +00:00
max_attachments, federation_list, raw, simple, debug,
2021-12-31 15:16:53 +00:00
project_version, http_prefix, origin_domain, system_language,
2021-12-29 21:55:09 +00:00
signing_priv_key_pem)
def get_public_post_domains(session, base_dir: str, nickname: str, domain: str,
2021-12-31 15:16:53 +00:00
origin_domain: str,
2021-12-29 21:55:09 +00:00
proxy_type: str, port: int, http_prefix: str,
debug: bool, project_version: str,
2021-12-31 15:16:53 +00:00
word_frequency: {}, domain_list: [],
2021-12-29 21:55:09 +00:00
system_language: str,
signing_priv_key_pem: str) -> []:
2020-07-08 10:09:51 +00:00
""" Returns a list of domains referenced within public posts
"""
2020-09-25 10:05:23 +00:00
if not session:
2021-12-28 16:56:57 +00:00
session = create_session(proxy_type)
2020-07-08 10:09:51 +00:00
if not session:
2021-12-31 15:16:53 +00:00
return domain_list
2021-12-25 22:17:49 +00:00
person_cache = {}
2021-12-25 22:28:18 +00:00
cached_webfingers = {}
2020-07-08 10:09:51 +00:00
2021-12-26 12:45:03 +00:00
domain_full = get_full_domain(domain, port)
2021-12-26 10:00:46 +00:00
handle = http_prefix + "://" + domain_full + "/@" + nickname
2021-12-31 15:16:53 +00:00
wf_request = \
2021-12-29 21:55:09 +00:00
webfinger_handle(session, handle, http_prefix, cached_webfingers,
domain, project_version, debug, False,
signing_priv_key_pem)
2021-12-31 15:16:53 +00:00
if not wf_request:
return domain_list
if not isinstance(wf_request, dict):
2020-07-08 10:09:51 +00:00
print('Webfinger for ' + handle + ' did not return a dict. ' +
2021-12-31 15:16:53 +00:00
str(wf_request))
return domain_list
2021-12-31 17:38:22 +00:00
(person_url, _, _, _, _, _,
_, _) = get_person_box(signing_priv_key_pem,
origin_domain,
base_dir, session, wf_request,
person_cache,
project_version, http_prefix,
nickname, domain, 'outbox',
92522)
2021-12-31 15:16:53 +00:00
post_domains = \
2022-05-31 17:13:28 +00:00
get_post_domains(session, person_url, 64, debug,
2021-12-29 21:55:09 +00:00
project_version, http_prefix, domain,
2021-12-31 15:16:53 +00:00
word_frequency, domain_list, system_language,
2021-12-29 21:55:09 +00:00
signing_priv_key_pem)
2021-12-31 15:16:53 +00:00
post_domains.sort()
return post_domains
2020-04-04 10:05:27 +00:00
2021-12-29 21:55:09 +00:00
def download_follow_collection(signing_priv_key_pem: str,
2021-12-31 15:16:53 +00:00
follow_type: str,
2021-12-29 21:55:09 +00:00
session, http_prefix: str,
2021-12-31 15:16:53 +00:00
actor: str, page_number: int,
no_of_pages: int, debug: bool) -> []:
"""Returns a list of following/followers for the given actor
by downloading the json for their following/followers collection
2021-01-10 21:57:53 +00:00
"""
prof = 'https://www.w3.org/ns/activitystreams'
if '/channel/' not in actor or '/accounts/' not in actor:
2021-12-31 15:16:53 +00:00
accept_str = \
2021-09-14 13:05:10 +00:00
'application/activity+json; ' + \
2021-09-10 21:47:26 +00:00
'profile="' + prof + '"'
2021-12-31 15:16:53 +00:00
session_headers = {
'Accept': accept_str
2021-01-10 21:57:53 +00:00
}
else:
2021-12-31 15:16:53 +00:00
accept_str = \
2021-09-14 13:05:10 +00:00
'application/ld+json; ' + \
2021-09-10 21:47:26 +00:00
'profile="' + prof + '"'
2021-12-31 15:16:53 +00:00
session_headers = {
'Accept': accept_str
2021-01-10 21:57:53 +00:00
}
result = []
2021-12-31 15:16:53 +00:00
for page_ctr in range(no_of_pages):
url = \
actor + '/' + follow_type + '?page=' + str(page_number + page_ctr)
followers_json = \
get_json(signing_priv_key_pem, session, url, session_headers, None,
2021-12-29 21:55:09 +00:00
debug, __version__, http_prefix, None)
2021-12-31 15:16:53 +00:00
if followers_json:
if followers_json.get('orderedItems'):
for follower_actor in followers_json['orderedItems']:
if follower_actor not in result:
result.append(follower_actor)
elif followers_json.get('items'):
for follower_actor in followers_json['items']:
if follower_actor not in result:
result.append(follower_actor)
2021-01-10 21:57:53 +00:00
else:
break
else:
break
return result
2021-12-29 21:55:09 +00:00
def get_public_post_info(session, base_dir: str, nickname: str, domain: str,
2021-12-31 15:16:53 +00:00
origin_domain: str,
2021-12-29 21:55:09 +00:00
proxy_type: str, port: int, http_prefix: str,
debug: bool, project_version: str,
2021-12-31 15:16:53 +00:00
word_frequency: {}, system_language: str,
2021-12-29 21:55:09 +00:00
signing_priv_key_pem: str) -> []:
""" Returns a dict of domains referenced within public posts
"""
if not session:
2021-12-28 16:56:57 +00:00
session = create_session(proxy_type)
if not session:
return {}
2021-12-25 22:17:49 +00:00
person_cache = {}
2021-12-25 22:28:18 +00:00
cached_webfingers = {}
2021-12-26 12:45:03 +00:00
domain_full = get_full_domain(domain, port)
2021-12-26 10:00:46 +00:00
handle = http_prefix + "://" + domain_full + "/@" + nickname
2021-12-31 15:16:53 +00:00
wf_request = \
2021-12-29 21:55:09 +00:00
webfinger_handle(session, handle, http_prefix, cached_webfingers,
domain, project_version, debug, False,
signing_priv_key_pem)
2021-12-31 15:16:53 +00:00
if not wf_request:
return {}
2021-12-31 15:16:53 +00:00
if not isinstance(wf_request, dict):
print('Webfinger for ' + handle + ' did not return a dict. ' +
2021-12-31 15:16:53 +00:00
str(wf_request))
return {}
2021-12-31 17:38:22 +00:00
(person_url, _, _, _, _, _,
_, _) = get_person_box(signing_priv_key_pem,
origin_domain,
base_dir, session, wf_request,
person_cache,
project_version, http_prefix,
nickname, domain, 'outbox',
13863)
2021-12-31 15:16:53 +00:00
max_posts = 64
post_domains = \
2022-05-31 17:13:28 +00:00
get_post_domains(session, person_url, max_posts, debug,
2021-12-29 21:55:09 +00:00
project_version, http_prefix, domain,
2021-12-31 15:16:53 +00:00
word_frequency, [], system_language,
2021-12-29 21:55:09 +00:00
signing_priv_key_pem)
2021-12-31 15:16:53 +00:00
post_domains.sort()
domains_info = {}
2021-12-31 17:38:22 +00:00
for pdomain in post_domains:
if not domains_info.get(pdomain):
domains_info[pdomain] = []
2021-12-31 15:16:53 +00:00
blocked_posts = \
2021-12-29 21:55:09 +00:00
_get_posts_for_blocked_domains(base_dir, session,
2021-12-31 15:16:53 +00:00
person_url, max_posts,
2021-12-29 21:55:09 +00:00
debug,
project_version, http_prefix,
domain, signing_priv_key_pem)
2021-12-31 15:16:53 +00:00
for blocked_domain, post_url_list in blocked_posts.items():
domains_info[blocked_domain] += post_url_list
2021-12-31 15:16:53 +00:00
return domains_info
2021-12-29 21:55:09 +00:00
def get_public_post_domains_blocked(session, base_dir: str,
nickname: str, domain: str,
proxy_type: str, port: int,
http_prefix: str,
debug: bool, project_version: str,
2021-12-31 15:16:53 +00:00
word_frequency: {}, domain_list: [],
2021-12-29 21:55:09 +00:00
system_language: str,
signing_priv_key_pem: str) -> []:
""" Returns a list of domains referenced within public posts which
are globally blocked on this instance
"""
2021-12-31 15:16:53 +00:00
origin_domain = domain
post_domains = \
2021-12-29 21:55:09 +00:00
get_public_post_domains(session, base_dir, nickname, domain,
2021-12-31 15:16:53 +00:00
origin_domain,
2021-12-29 21:55:09 +00:00
proxy_type, port, http_prefix,
debug, project_version,
2021-12-31 15:16:53 +00:00
word_frequency, domain_list, system_language,
2021-12-29 21:55:09 +00:00
signing_priv_key_pem)
2021-12-31 15:16:53 +00:00
if not post_domains:
return []
2021-12-31 15:16:53 +00:00
blocking_filename = base_dir + '/accounts/blocking.txt'
if not os.path.isfile(blocking_filename):
return []
# read the blocked domains as a single string
2021-12-31 15:40:01 +00:00
blocked_str = ''
2022-06-09 14:46:30 +00:00
with open(blocking_filename, 'r', encoding='utf-8') as fp_block:
2021-12-31 15:40:01 +00:00
blocked_str = fp_block.read()
2021-12-31 15:16:53 +00:00
blocked_domains = []
2021-12-31 17:38:22 +00:00
for domain_name in post_domains:
if '@' not in domain_name:
continue
# get the domain after the @
2021-12-31 17:38:22 +00:00
domain_name = domain_name.split('@')[1].strip()
if is_evil(domain_name):
blocked_domains.append(domain_name)
2020-09-25 10:12:36 +00:00
continue
2021-12-31 17:38:22 +00:00
if domain_name in blocked_str:
blocked_domains.append(domain_name)
2021-12-31 15:16:53 +00:00
return blocked_domains
2021-12-29 21:55:09 +00:00
def _get_non_mutuals_of_person(base_dir: str,
nickname: str, domain: str) -> []:
2020-09-25 13:21:56 +00:00
"""Returns the followers who are not mutuals of a person
i.e. accounts which follow you but you don't follow them
"""
followers = \
2021-12-27 13:58:17 +00:00
get_followers_list(base_dir, nickname, domain, 'followers.txt')
2020-09-25 13:21:56 +00:00
following = \
2021-12-27 13:58:17 +00:00
get_followers_list(base_dir, nickname, domain, 'following.txt')
2021-12-31 15:16:53 +00:00
non_mutuals = []
2020-09-25 14:33:20 +00:00
for handle in followers:
if handle not in following:
2021-12-31 15:16:53 +00:00
non_mutuals.append(handle)
return non_mutuals
2020-09-25 13:21:56 +00:00
2021-12-29 21:55:09 +00:00
def check_domains(session, base_dir: str,
nickname: str, domain: str,
proxy_type: str, port: int, http_prefix: str,
debug: bool, project_version: str,
2021-12-31 15:40:01 +00:00
max_blocked_domains: int, single_check: bool,
2021-12-29 21:55:09 +00:00
system_language: str,
signing_priv_key_pem: str) -> None:
2020-09-25 13:21:56 +00:00
"""Checks follower accounts for references to globally blocked domains
"""
2021-12-31 15:16:53 +00:00
word_frequency = {}
non_mutuals = _get_non_mutuals_of_person(base_dir, nickname, domain)
if not non_mutuals:
2020-09-25 13:33:44 +00:00
print('No non-mutual followers were found')
2020-09-25 13:21:56 +00:00
return
2021-12-31 15:16:53 +00:00
follower_warning_filename = base_dir + '/accounts/followerWarnings.txt'
update_follower_warnings = False
follower_warning_str = ''
if os.path.isfile(follower_warning_filename):
2022-06-09 14:46:30 +00:00
with open(follower_warning_filename, 'r',
encoding='utf-8') as fp_warn:
2021-12-31 15:16:53 +00:00
follower_warning_str = fp_warn.read()
2020-09-25 13:21:56 +00:00
2021-12-31 15:40:01 +00:00
if single_check:
2020-09-25 13:21:56 +00:00
# checks a single random non-mutual
2021-12-31 15:16:53 +00:00
index = random.randrange(0, len(non_mutuals))
handle = non_mutuals[index]
2020-09-25 14:23:33 +00:00
if '@' in handle:
2021-12-31 15:16:53 +00:00
non_mutual_nickname = handle.split('@')[0]
non_mutual_domain = handle.split('@')[1].strip()
blocked_domains = \
2021-12-29 21:55:09 +00:00
get_public_post_domains_blocked(session, base_dir,
2021-12-31 15:16:53 +00:00
non_mutual_nickname,
non_mutual_domain,
2021-12-29 21:55:09 +00:00
proxy_type, port, http_prefix,
debug, project_version,
2021-12-31 15:16:53 +00:00
word_frequency, [],
2021-12-29 21:55:09 +00:00
system_language,
signing_priv_key_pem)
2021-12-31 15:16:53 +00:00
if blocked_domains:
if len(blocked_domains) > max_blocked_domains:
follower_warning_str += handle + '\n'
update_follower_warnings = True
2020-09-25 13:21:56 +00:00
else:
# checks all non-mutuals
2021-12-31 15:16:53 +00:00
for handle in non_mutuals:
2020-09-25 14:23:33 +00:00
if '@' not in handle:
continue
2021-12-31 15:16:53 +00:00
if handle in follower_warning_str:
2020-09-25 13:21:56 +00:00
continue
2021-12-31 15:16:53 +00:00
non_mutual_nickname = handle.split('@')[0]
non_mutual_domain = handle.split('@')[1].strip()
blocked_domains = \
2021-12-29 21:55:09 +00:00
get_public_post_domains_blocked(session, base_dir,
2021-12-31 15:16:53 +00:00
non_mutual_nickname,
non_mutual_domain,
2021-12-29 21:55:09 +00:00
proxy_type, port, http_prefix,
debug, project_version,
2021-12-31 15:16:53 +00:00
word_frequency, [],
2021-12-29 21:55:09 +00:00
system_language,
signing_priv_key_pem)
2021-12-31 15:16:53 +00:00
if blocked_domains:
2020-09-25 14:23:33 +00:00
print(handle)
2021-12-31 17:38:22 +00:00
for bdomain in blocked_domains:
print(' ' + bdomain)
2021-12-31 15:16:53 +00:00
if len(blocked_domains) > max_blocked_domains:
follower_warning_str += handle + '\n'
update_follower_warnings = True
2020-09-25 13:21:56 +00:00
2021-12-31 15:16:53 +00:00
if update_follower_warnings and follower_warning_str:
2022-06-09 14:46:30 +00:00
with open(follower_warning_filename, 'w+',
encoding='utf-8') as fp_warn:
2021-12-31 15:16:53 +00:00
fp_warn.write(follower_warning_str)
2021-12-31 15:40:01 +00:00
if not single_check:
2021-12-31 15:16:53 +00:00
print(follower_warning_str)
2020-09-25 13:21:56 +00:00
2021-12-28 19:33:29 +00:00
def populate_replies_json(base_dir: str, nickname: str, domain: str,
2021-12-31 15:16:53 +00:00
post_replies_filename: str, authorized: bool,
replies_json: {}) -> None:
pub_str = 'https://www.w3.org/ns/activitystreams#Public'
2019-08-02 18:37:23 +00:00
# populate the items list with replies
2021-12-31 17:38:22 +00:00
replies_boxes = ('outbox', 'inbox')
2022-06-09 14:46:30 +00:00
with open(post_replies_filename, 'r', encoding='utf-8') as replies_file:
2021-12-31 15:16:53 +00:00
for message_id in replies_file:
2021-12-31 17:38:22 +00:00
reply_found = False
2019-08-02 18:37:23 +00:00
# examine inbox and outbox
2021-12-31 17:38:22 +00:00
for boxname in replies_boxes:
2022-06-21 11:58:50 +00:00
message_id2 = remove_eol(message_id)
2021-12-31 15:16:53 +00:00
search_filename = \
2021-12-26 12:02:29 +00:00
acct_dir(base_dir, nickname, domain) + '/' + \
2021-06-22 12:42:52 +00:00
boxname + '/' + \
2021-12-31 15:16:53 +00:00
message_id2.replace('/', '#') + '.json'
if os.path.isfile(search_filename):
2019-08-02 18:37:23 +00:00
if authorized or \
2022-06-10 13:01:39 +00:00
text_in_file(pub_str, search_filename):
2021-12-31 15:16:53 +00:00
post_json_object = load_json(search_filename)
2021-12-25 22:09:19 +00:00
if post_json_object:
if post_json_object['object'].get('cc'):
pjo = post_json_object
2020-04-04 10:05:27 +00:00
if (authorized or
2021-12-31 15:16:53 +00:00
(pub_str in pjo['object']['to'] or
pub_str in pjo['object']['cc'])):
replies_json['orderedItems'].append(pjo)
2021-12-31 17:38:22 +00:00
reply_found = True
2019-08-02 18:37:23 +00:00
else:
if authorized or \
2021-12-31 15:16:53 +00:00
pub_str in post_json_object['object']['to']:
2021-12-25 22:09:19 +00:00
pjo = post_json_object
2021-12-31 15:16:53 +00:00
replies_json['orderedItems'].append(pjo)
2021-12-31 17:38:22 +00:00
reply_found = True
2019-08-02 18:37:23 +00:00
break
# if not in either inbox or outbox then examine the shared inbox
2021-12-31 17:38:22 +00:00
if not reply_found:
2022-06-21 11:58:50 +00:00
message_id2 = remove_eol(message_id)
2021-12-31 15:16:53 +00:00
search_filename = \
2021-12-25 16:17:53 +00:00
base_dir + \
2020-04-04 10:05:27 +00:00
'/accounts/inbox@' + \
2021-06-22 12:42:52 +00:00
domain + '/inbox/' + \
2021-12-31 15:16:53 +00:00
message_id2.replace('/', '#') + '.json'
if os.path.isfile(search_filename):
2019-08-02 18:37:23 +00:00
if authorized or \
2022-06-10 13:01:39 +00:00
text_in_file(pub_str, search_filename):
2020-04-04 10:05:27 +00:00
# get the json of the reply and append it to
# the collection
2021-12-31 15:16:53 +00:00
post_json_object = load_json(search_filename)
2021-12-25 22:09:19 +00:00
if post_json_object:
if post_json_object['object'].get('cc'):
pjo = post_json_object
2020-04-04 10:05:27 +00:00
if (authorized or
2021-12-31 15:16:53 +00:00
(pub_str in pjo['object']['to'] or
pub_str in pjo['object']['cc'])):
2021-12-25 22:09:19 +00:00
pjo = post_json_object
2021-12-31 15:16:53 +00:00
replies_json['orderedItems'].append(pjo)
2019-08-02 18:37:23 +00:00
else:
if authorized or \
2021-12-31 15:16:53 +00:00
pub_str in post_json_object['object']['to']:
2021-12-25 22:09:19 +00:00
pjo = post_json_object
2021-12-31 15:16:53 +00:00
replies_json['orderedItems'].append(pjo)
2020-04-04 10:05:27 +00:00
2019-09-28 16:10:45 +00:00
2021-12-31 15:16:53 +00:00
def _reject_announce(announce_filename: str,
2021-12-29 21:55:09 +00:00
base_dir: str, nickname: str, domain: str,
2021-12-31 15:16:53 +00:00
announce_post_id: str, recent_posts_cache: {}):
2019-09-28 16:58:21 +00:00
"""Marks an announce as rejected
"""
2021-12-31 15:16:53 +00:00
reject_post_id(base_dir, nickname, domain, announce_post_id,
2021-12-26 20:20:36 +00:00
recent_posts_cache)
# reject the post referenced by the announce activity object
2021-12-31 15:16:53 +00:00
if not os.path.isfile(announce_filename + '.reject'):
2022-06-09 14:46:30 +00:00
with open(announce_filename + '.reject', 'w+',
encoding='utf-8') as reject_announce_file:
2021-12-31 15:16:53 +00:00
reject_announce_file.write('\n')
2019-09-28 16:58:21 +00:00
2020-04-04 10:05:27 +00:00
2021-12-29 21:55:09 +00:00
def download_announce(session, base_dir: str, http_prefix: str,
nickname: str, domain: str,
post_json_object: {}, project_version: str,
yt_replace_domain: str,
twitter_replacement_domain: str,
allow_local_network_access: bool,
recent_posts_cache: {}, debug: bool,
system_language: str,
domain_full: str, person_cache: {},
signing_priv_key_pem: str,
2022-03-24 14:40:28 +00:00
blocked_cache: {}, bold_reading: bool) -> {}:
2019-09-28 16:10:45 +00:00
"""Download the post referenced by an announce
"""
2021-12-25 22:09:19 +00:00
if not post_json_object.get('object'):
2021-06-22 20:30:27 +00:00
return None
2021-12-25 22:09:19 +00:00
if not isinstance(post_json_object['object'], str):
2021-03-03 17:09:31 +00:00
return None
2021-06-03 12:17:24 +00:00
# ignore self-boosts
2021-12-25 22:09:19 +00:00
if post_json_object['actor'] in post_json_object['object']:
2021-06-03 12:17:24 +00:00
return None
2019-09-28 16:10:45 +00:00
# get the announced post
2021-12-31 15:16:53 +00:00
announce_cache_dir = base_dir + '/cache/announce/' + nickname
if not os.path.isdir(announce_cache_dir):
os.mkdir(announce_cache_dir)
2021-12-26 19:47:06 +00:00
post_id = None
2021-12-25 22:09:19 +00:00
if post_json_object.get('id'):
2021-12-27 11:20:57 +00:00
post_id = remove_id_ending(post_json_object['id'])
2021-12-31 15:16:53 +00:00
announce_filename = \
announce_cache_dir + '/' + \
2021-12-25 22:09:19 +00:00
post_json_object['object'].replace('/', '#') + '.json'
2019-09-28 16:10:45 +00:00
2021-12-31 15:16:53 +00:00
if os.path.isfile(announce_filename + '.reject'):
2021-03-03 17:09:31 +00:00
return None
2019-09-28 16:10:45 +00:00
2021-12-31 15:16:53 +00:00
if os.path.isfile(announce_filename):
2021-03-14 19:32:11 +00:00
if debug:
print('Reading cached Announce content for ' +
2021-12-25 22:09:19 +00:00
post_json_object['object'])
2021-12-31 15:16:53 +00:00
post_json_object = load_json(announce_filename)
2021-12-25 22:09:19 +00:00
if post_json_object:
return post_json_object
2019-09-28 16:10:45 +00:00
else:
2021-12-31 15:16:53 +00:00
profile_str = 'https://www.w3.org/ns/activitystreams'
accept_str = \
2021-09-14 13:05:10 +00:00
'application/activity+json; ' + \
2021-12-31 15:16:53 +00:00
'profile="' + profile_str + '"'
as_header = {
'Accept': accept_str
2020-03-31 11:07:58 +00:00
}
2021-12-25 22:09:19 +00:00
if '/channel/' in post_json_object['actor'] or \
'/accounts/' in post_json_object['actor']:
2021-12-31 15:16:53 +00:00
accept_str = \
2021-09-14 13:05:10 +00:00
'application/ld+json; ' + \
2021-12-31 15:16:53 +00:00
'profile="' + profile_str + '"'
as_header = {
'Accept': accept_str
2020-03-31 11:07:58 +00:00
}
2021-12-31 15:16:53 +00:00
actor_nickname = get_nickname_from_actor(post_json_object['actor'])
if not actor_nickname:
print('WARN: download_announce no actor_nickname')
return None
2021-12-31 15:16:53 +00:00
actor_domain, actor_port = \
2021-12-27 19:05:25 +00:00
get_domain_from_actor(post_json_object['actor'])
2021-12-31 15:16:53 +00:00
if not actor_domain:
2020-04-04 10:05:27 +00:00
print('Announce actor does not contain a ' +
'valid domain or port number: ' +
2021-12-25 22:09:19 +00:00
str(post_json_object['actor']))
2021-03-03 17:09:31 +00:00
return None
2021-12-31 15:16:53 +00:00
if is_blocked(base_dir, nickname, domain,
actor_nickname, actor_domain):
2020-04-04 10:05:27 +00:00
print('Announce download blocked actor: ' +
2021-12-31 15:16:53 +00:00
actor_nickname + '@' + actor_domain)
2021-03-03 17:09:31 +00:00
return None
2021-12-31 15:16:53 +00:00
object_nickname = get_nickname_from_actor(post_json_object['object'])
2021-12-31 17:38:22 +00:00
object_domain, _ = \
2021-12-27 19:05:25 +00:00
get_domain_from_actor(post_json_object['object'])
2021-12-31 15:16:53 +00:00
if not object_domain:
2020-04-04 10:05:27 +00:00
print('Announce object does not contain a ' +
'valid domain or port number: ' +
2021-12-25 22:09:19 +00:00
str(post_json_object['object']))
2021-03-03 17:09:31 +00:00
return None
2021-12-31 15:16:53 +00:00
if is_blocked(base_dir, nickname, domain, object_nickname,
object_domain):
if object_nickname and object_domain:
2020-04-04 10:05:27 +00:00
print('Announce download blocked object: ' +
2021-12-31 15:16:53 +00:00
object_nickname + '@' + object_domain)
2020-02-19 18:55:29 +00:00
else:
2020-04-04 10:05:27 +00:00
print('Announce download blocked object: ' +
2021-12-25 22:09:19 +00:00
str(post_json_object['object']))
2021-03-03 17:09:31 +00:00
return None
2021-03-14 19:46:46 +00:00
if debug:
print('Downloading Announce content for ' +
2021-12-25 22:09:19 +00:00
post_json_object['object'])
2021-12-31 15:16:53 +00:00
announced_json = \
2021-12-29 21:55:09 +00:00
get_json(signing_priv_key_pem, session,
post_json_object['object'],
2021-12-31 15:16:53 +00:00
as_header, None, debug, project_version,
2021-12-29 21:55:09 +00:00
http_prefix, domain)
2020-01-19 20:19:56 +00:00
2021-12-31 15:16:53 +00:00
if not announced_json:
2021-03-03 17:09:31 +00:00
return None
2019-12-04 09:44:41 +00:00
2021-12-31 15:16:53 +00:00
if not isinstance(announced_json, dict):
2022-03-14 14:50:44 +00:00
print('WARN: announced post json is not a dict - ' +
2021-12-25 22:09:19 +00:00
post_json_object['object'])
2021-12-31 15:16:53 +00:00
_reject_announce(announce_filename,
2021-12-29 21:55:09 +00:00
base_dir, nickname, domain, post_id,
recent_posts_cache)
2021-03-03 17:09:31 +00:00
return None
2021-12-31 15:16:53 +00:00
if not announced_json.get('id'):
2022-03-14 14:50:44 +00:00
print('WARN: announced post does not have an id ' +
str(announced_json))
2021-12-31 15:16:53 +00:00
_reject_announce(announce_filename,
2021-12-29 21:55:09 +00:00
base_dir, nickname, domain, post_id,
recent_posts_cache)
2021-03-03 17:09:31 +00:00
return None
2021-12-31 15:16:53 +00:00
if not announced_json.get('type'):
2022-03-14 14:50:44 +00:00
print('WARN: announced post does not have a type ' +
str(announced_json))
2021-12-31 15:16:53 +00:00
_reject_announce(announce_filename,
2021-12-29 21:55:09 +00:00
base_dir, nickname, domain, post_id,
recent_posts_cache)
2021-03-03 17:09:31 +00:00
return None
2021-12-31 15:16:53 +00:00
if announced_json['type'] == 'Video':
converted_json = \
2021-12-29 21:55:09 +00:00
convert_video_to_note(base_dir, nickname, domain,
system_language,
2021-12-31 15:16:53 +00:00
announced_json, blocked_cache)
if converted_json:
announced_json = converted_json
if '/statuses/' not in announced_json['id']:
2022-03-14 14:50:44 +00:00
print('WARN: announced post id does not contain /statuses/ ' +
str(announced_json))
2021-12-31 15:16:53 +00:00
_reject_announce(announce_filename,
2021-12-29 21:55:09 +00:00
base_dir, nickname, domain, post_id,
recent_posts_cache)
2021-03-03 17:09:31 +00:00
return None
2021-12-31 15:16:53 +00:00
if not has_users_path(announced_json['id']):
2022-03-14 14:50:44 +00:00
print('WARN: announced post id does not contain /users/ ' +
str(announced_json))
2021-12-31 15:16:53 +00:00
_reject_announce(announce_filename,
2021-12-29 21:55:09 +00:00
base_dir, nickname, domain, post_id,
recent_posts_cache)
2021-03-03 17:09:31 +00:00
return None
2021-12-31 15:16:53 +00:00
if announced_json['type'] != 'Note' and \
announced_json['type'] != 'Page' and \
announced_json['type'] != 'Article':
2022-03-14 14:50:44 +00:00
print('WARN: announced post is not Note/Page/Article ' +
str(announced_json))
# You can only announce Note or Article types
2021-12-31 15:16:53 +00:00
_reject_announce(announce_filename,
2021-12-29 21:55:09 +00:00
base_dir, nickname, domain, post_id,
recent_posts_cache)
2021-03-03 17:09:31 +00:00
return None
2021-12-31 15:16:53 +00:00
if not announced_json.get('content'):
2022-03-14 14:50:44 +00:00
print('WARN: announced post does not have content ' +
str(announced_json))
2021-12-31 15:16:53 +00:00
_reject_announce(announce_filename,
2021-12-29 21:55:09 +00:00
base_dir, nickname, domain, post_id,
recent_posts_cache)
2021-03-03 17:09:31 +00:00
return None
2021-12-31 15:16:53 +00:00
if not announced_json.get('published'):
2022-03-14 14:50:44 +00:00
print('WARN: announced post does not have published ' +
str(announced_json))
2021-12-31 15:16:53 +00:00
_reject_announce(announce_filename,
2021-12-29 21:55:09 +00:00
base_dir, nickname, domain, post_id,
recent_posts_cache)
2021-03-03 17:09:31 +00:00
return None
if '.' in announced_json['published'] and \
'Z' in announced_json['published']:
announced_json['published'] = \
announced_json['published'].split('.')[0] + 'Z'
2021-12-31 15:16:53 +00:00
if not valid_post_date(announced_json['published'], 90, debug):
2022-03-14 14:50:44 +00:00
print('WARN: announced post is not recently published ' +
str(announced_json))
2021-12-31 15:16:53 +00:00
_reject_announce(announce_filename,
2021-12-29 21:55:09 +00:00
base_dir, nickname, domain, post_id,
recent_posts_cache)
2021-03-03 17:09:31 +00:00
return None
2022-06-14 10:51:40 +00:00
if not understood_post_language(base_dir, nickname,
2021-12-31 15:16:53 +00:00
announced_json, system_language,
2021-12-29 21:55:09 +00:00
http_prefix, domain_full,
person_cache):
2021-07-18 19:35:34 +00:00
return None
# Check the content of the announce
2021-12-31 15:16:53 +00:00
content_str = announced_json['content']
using_content_map = False
if announced_json.get('contentMap'):
if announced_json['contentMap'].get(system_language):
content_str = announced_json['contentMap'][system_language]
using_content_map = True
2021-12-31 15:16:53 +00:00
if dangerous_markup(content_str, allow_local_network_access):
2022-03-14 14:50:44 +00:00
print('WARN: announced post contains dangerous markup ' +
str(announced_json))
2021-12-31 15:16:53 +00:00
_reject_announce(announce_filename,
2021-12-29 21:55:09 +00:00
base_dir, nickname, domain, post_id,
recent_posts_cache)
2021-03-03 17:09:31 +00:00
return None
2022-06-02 17:47:56 +00:00
summary_str = \
get_summary_from_post(announced_json, system_language, [])
media_descriptions = \
get_media_descriptions_from_post(announced_json)
2022-06-02 17:47:56 +00:00
content_all = content_str
if summary_str:
content_all = \
summary_str + ' ' + content_str + ' ' + media_descriptions
2022-09-25 17:26:11 +00:00
if is_filtered(base_dir, nickname, domain, content_all,
system_language):
2022-03-14 14:50:44 +00:00
print('WARN: announced post has been filtered ' +
str(announced_json))
2021-12-31 15:16:53 +00:00
_reject_announce(announce_filename,
2021-12-29 21:55:09 +00:00
base_dir, nickname, domain, post_id,
recent_posts_cache)
2021-03-03 17:09:31 +00:00
return None
if reject_twitter_summary(base_dir, nickname, domain,
summary_str):
print('WARN: announced post has twitter summary ' +
str(announced_json))
_reject_announce(announce_filename,
base_dir, nickname, domain, post_id,
recent_posts_cache)
return None
2021-12-31 15:16:53 +00:00
if invalid_ciphertext(content_str):
2022-03-14 14:50:44 +00:00
print('WARN: announced post contains invalid ciphertext ' +
str(announced_json))
2021-12-31 15:16:53 +00:00
_reject_announce(announce_filename,
2021-12-29 21:55:09 +00:00
base_dir, nickname, domain, post_id,
recent_posts_cache)
return None
2020-05-17 09:44:42 +00:00
# remove any long words
2021-12-31 15:16:53 +00:00
content_str = remove_long_words(content_str, 40, [])
2020-01-19 20:19:56 +00:00
# Prevent the same word from being repeated many times
2021-12-31 15:16:53 +00:00
content_str = limit_repeated_words(content_str, 6)
# remove text formatting, such as bold/italics
2022-03-24 14:40:28 +00:00
content_str = remove_text_formatting(content_str, bold_reading)
2021-01-30 11:59:26 +00:00
# set the content after santitization
if using_content_map:
announced_json['contentMap'][system_language] = content_str
2021-12-31 15:16:53 +00:00
announced_json['content'] = content_str
2019-09-28 16:10:45 +00:00
# wrap in create to be consistent with other posts
2021-12-31 15:16:53 +00:00
announced_json = \
2021-12-28 19:33:29 +00:00
outbox_message_create_wrap(http_prefix,
2021-12-31 15:16:53 +00:00
actor_nickname, actor_domain,
actor_port, announced_json)
if announced_json['type'] != 'Create':
2022-03-14 14:50:44 +00:00
print('WARN: announced post could not be wrapped in Create ' +
str(announced_json))
# Create wrap failed
2021-12-31 15:16:53 +00:00
_reject_announce(announce_filename,
2021-12-29 21:55:09 +00:00
base_dir, nickname, domain, post_id,
recent_posts_cache)
2021-03-03 17:09:31 +00:00
return None
2019-09-28 16:10:45 +00:00
2021-12-25 22:09:19 +00:00
# labelAccusatoryPost(post_json_object, translate)
2019-09-28 16:10:45 +00:00
# set the id to the original status
2021-12-31 15:16:53 +00:00
announced_json['id'] = post_json_object['object']
announced_json['object']['id'] = post_json_object['object']
2019-09-28 16:10:45 +00:00
# check that the repeat isn't for a blocked account
2021-12-31 15:16:53 +00:00
attributed_nickname = \
get_nickname_from_actor(announced_json['object']['id'])
attributed_domain, attributed_port = \
get_domain_from_actor(announced_json['object']['id'])
if attributed_nickname and attributed_domain:
attributed_domain = \
get_full_domain(attributed_domain, attributed_port)
2021-12-29 21:55:09 +00:00
if is_blocked(base_dir, nickname, domain,
2021-12-31 15:16:53 +00:00
attributed_nickname, attributed_domain):
2022-03-14 14:50:44 +00:00
print('WARN: announced post handle is blocked ' +
str(attributed_nickname) + '@' + attributed_domain)
2021-12-31 15:16:53 +00:00
_reject_announce(announce_filename,
2021-12-29 21:55:09 +00:00
base_dir, nickname, domain, post_id,
recent_posts_cache)
2021-03-03 17:09:31 +00:00
return None
2021-12-31 15:16:53 +00:00
post_json_object = announced_json
2021-12-28 21:36:27 +00:00
replace_you_tube(post_json_object, yt_replace_domain, system_language)
replace_twitter(post_json_object, twitter_replacement_domain,
system_language)
2021-12-31 15:16:53 +00:00
if save_json(post_json_object, announce_filename):
2021-12-25 22:09:19 +00:00
return post_json_object
2021-03-03 17:09:31 +00:00
return None
2019-12-01 13:45:30 +00:00
2020-04-04 10:05:27 +00:00
2021-12-29 21:55:09 +00:00
def is_muted_conv(base_dir: str, nickname: str, domain: str, post_id: str,
2021-12-31 15:16:53 +00:00
conversation_id: str) -> bool:
2020-08-27 17:40:09 +00:00
"""Returns true if the given post is muted
"""
2021-12-31 15:16:53 +00:00
if conversation_id:
conv_muted_filename = \
2021-12-26 12:02:29 +00:00
acct_dir(base_dir, nickname, domain) + '/conversation/' + \
2021-12-31 15:16:53 +00:00
conversation_id.replace('/', '#') + '.muted'
if os.path.isfile(conv_muted_filename):
2021-08-12 10:22:04 +00:00
return True
2021-12-26 23:41:34 +00:00
post_filename = locate_post(base_dir, nickname, domain, post_id)
if not post_filename:
2020-08-27 17:40:09 +00:00
return False
2021-12-26 23:41:34 +00:00
if os.path.isfile(post_filename + '.muted'):
2020-08-27 17:40:09 +00:00
return True
return False
2021-12-29 21:55:09 +00:00
def send_block_via_server(base_dir: str, session,
2021-12-31 15:16:53 +00:00
from_nickname: str, password: str,
from_domain: str, from_port: int,
http_prefix: str, blocked_url: str,
2021-12-29 21:55:09 +00:00
cached_webfingers: {}, person_cache: {},
debug: bool, project_version: str,
signing_priv_key_pem: str) -> {}:
2020-04-01 20:13:42 +00:00
"""Creates a block via c2s
"""
if not session:
2021-12-29 21:55:09 +00:00
print('WARN: No session for send_block_via_server')
2020-04-01 20:13:42 +00:00
return 6
2021-12-31 15:16:53 +00:00
from_domain_full = get_full_domain(from_domain, from_port)
2020-04-01 20:13:42 +00:00
2021-12-31 15:16:53 +00:00
block_actor = local_actor_url(http_prefix, from_nickname, from_domain_full)
to_url = 'https://www.w3.org/ns/activitystreams#Public'
cc_url = block_actor + '/followers'
2020-04-01 20:13:42 +00:00
2021-12-31 15:16:53 +00:00
new_block_json = {
2020-04-01 20:13:42 +00:00
"@context": "https://www.w3.org/ns/activitystreams",
'type': 'Block',
2021-12-31 15:16:53 +00:00
'actor': block_actor,
'object': blocked_url,
'to': [to_url],
'cc': [cc_url]
2020-04-01 20:13:42 +00:00
}
2021-12-31 15:16:53 +00:00
handle = http_prefix + '://' + from_domain_full + '/@' + from_nickname
2020-04-01 20:13:42 +00:00
# lookup the inbox for the To handle
2021-12-31 15:16:53 +00:00
wf_request = webfinger_handle(session, handle, http_prefix,
cached_webfingers,
from_domain, project_version, debug, False,
signing_priv_key_pem)
if not wf_request:
2020-04-01 20:13:42 +00:00
if debug:
2021-03-18 10:01:01 +00:00
print('DEBUG: block webfinger failed for ' + handle)
2020-04-01 20:13:42 +00:00
return 1
2021-12-31 15:16:53 +00:00
if not isinstance(wf_request, dict):
2021-03-18 10:01:01 +00:00
print('WARN: block Webfinger for ' + handle +
2021-12-31 15:16:53 +00:00
' did not return a dict. ' + str(wf_request))
2020-06-23 10:41:12 +00:00
return 1
2020-04-01 20:13:42 +00:00
2021-12-31 15:16:53 +00:00
post_to_box = 'outbox'
2020-04-01 20:13:42 +00:00
# get the actor inbox for the To handle
2021-12-31 15:16:53 +00:00
origin_domain = from_domain
2021-12-31 17:38:22 +00:00
(inbox_url, _, _, from_person_id, _, _,
_, _) = get_person_box(signing_priv_key_pem,
origin_domain,
base_dir, session, wf_request,
person_cache,
project_version, http_prefix,
from_nickname,
from_domain, post_to_box, 72652)
2021-12-31 15:16:53 +00:00
if not inbox_url:
2020-04-01 20:13:42 +00:00
if debug:
2021-12-31 15:16:53 +00:00
print('DEBUG: block no ' + post_to_box +
' was found for ' + handle)
2020-04-01 20:13:42 +00:00
return 3
2021-12-31 15:16:53 +00:00
if not from_person_id:
2020-04-01 20:13:42 +00:00
if debug:
2021-03-18 10:01:01 +00:00
print('DEBUG: block no actor was found for ' + handle)
2020-04-01 20:13:42 +00:00
return 4
2021-12-31 15:16:53 +00:00
auth_header = create_basic_auth_header(from_nickname, password)
2020-04-01 20:13:42 +00:00
headers = {
2021-12-31 15:16:53 +00:00
'host': from_domain,
2020-04-01 20:13:42 +00:00
'Content-type': 'application/json',
2021-12-31 15:16:53 +00:00
'Authorization': auth_header
2020-04-01 20:13:42 +00:00
}
2021-12-31 15:16:53 +00:00
post_result = post_json(http_prefix, from_domain_full,
session, new_block_json, [], inbox_url,
headers, 30, True)
if not post_result:
2021-03-18 10:01:01 +00:00
print('WARN: block unable to post')
2020-04-01 20:13:42 +00:00
if debug:
print('DEBUG: c2s POST block success')
2021-12-31 15:16:53 +00:00
return new_block_json
2020-04-01 20:13:42 +00:00
2021-12-29 21:55:09 +00:00
def send_mute_via_server(base_dir: str, session,
2021-12-31 15:16:53 +00:00
from_nickname: str, password: str,
from_domain: str, from_port: int,
http_prefix: str, muted_url: str,
2021-12-29 21:55:09 +00:00
cached_webfingers: {}, person_cache: {},
debug: bool, project_version: str,
signing_priv_key_pem: str) -> {}:
2021-03-20 21:20:41 +00:00
"""Creates a mute via c2s
"""
if not session:
2021-12-29 21:55:09 +00:00
print('WARN: No session for send_mute_via_server')
2021-03-20 21:20:41 +00:00
return 6
2021-12-31 15:16:53 +00:00
from_domain_full = get_full_domain(from_domain, from_port)
2021-03-20 21:20:41 +00:00
2021-12-31 15:16:53 +00:00
actor = local_actor_url(http_prefix, from_nickname, from_domain_full)
2021-12-26 17:21:37 +00:00
handle = replace_users_with_at(actor)
2021-03-20 21:20:41 +00:00
2021-12-31 15:16:53 +00:00
new_mute_json = {
2021-03-20 21:20:41 +00:00
"@context": "https://www.w3.org/ns/activitystreams",
'type': 'Ignore',
'actor': actor,
2021-03-21 12:44:58 +00:00
'to': [actor],
2021-12-31 15:16:53 +00:00
'object': muted_url
2021-03-20 21:20:41 +00:00
}
# lookup the inbox for the To handle
2021-12-31 15:16:53 +00:00
wf_request = webfinger_handle(session, handle, http_prefix,
cached_webfingers,
from_domain, project_version, debug, False,
signing_priv_key_pem)
if not wf_request:
2021-03-20 21:20:41 +00:00
if debug:
print('DEBUG: mute webfinger failed for ' + handle)
return 1
2021-12-31 15:16:53 +00:00
if not isinstance(wf_request, dict):
2021-03-20 21:20:41 +00:00
print('WARN: mute Webfinger for ' + handle +
2021-12-31 15:16:53 +00:00
' did not return a dict. ' + str(wf_request))
2021-03-20 21:20:41 +00:00
return 1
2021-12-31 15:16:53 +00:00
post_to_box = 'outbox'
2021-03-20 21:20:41 +00:00
# get the actor inbox for the To handle
2021-12-31 15:16:53 +00:00
origin_domain = from_domain
2021-12-31 17:38:22 +00:00
(inbox_url, _, _, from_person_id, _, _,
_, _) = get_person_box(signing_priv_key_pem,
origin_domain,
base_dir, session, wf_request,
person_cache,
project_version, http_prefix,
from_nickname,
from_domain, post_to_box, 72652)
2021-12-31 15:16:53 +00:00
if not inbox_url:
2021-03-20 21:20:41 +00:00
if debug:
2021-12-31 15:16:53 +00:00
print('DEBUG: mute no ' + post_to_box + ' was found for ' + handle)
2021-03-20 21:20:41 +00:00
return 3
2021-12-31 15:16:53 +00:00
if not from_person_id:
2021-03-20 21:20:41 +00:00
if debug:
print('DEBUG: mute no actor was found for ' + handle)
return 4
2021-12-31 15:16:53 +00:00
auth_header = create_basic_auth_header(from_nickname, password)
2021-03-20 21:20:41 +00:00
headers = {
2021-12-31 15:16:53 +00:00
'host': from_domain,
2021-03-20 21:20:41 +00:00
'Content-type': 'application/json',
2021-12-31 15:16:53 +00:00
'Authorization': auth_header
2021-03-20 21:20:41 +00:00
}
2021-12-31 15:16:53 +00:00
post_result = post_json(http_prefix, from_domain_full,
session, new_mute_json, [], inbox_url,
headers, 3, True)
if post_result is None:
2021-03-20 21:20:41 +00:00
print('WARN: mute unable to post')
if debug:
print('DEBUG: c2s POST mute success')
2021-12-31 15:16:53 +00:00
return new_mute_json
2021-03-20 21:20:41 +00:00
2021-12-29 21:55:09 +00:00
def send_undo_mute_via_server(base_dir: str, session,
2021-12-31 15:16:53 +00:00
from_nickname: str, password: str,
from_domain: str, from_port: int,
http_prefix: str, muted_url: str,
2021-12-29 21:55:09 +00:00
cached_webfingers: {}, person_cache: {},
debug: bool, project_version: str,
signing_priv_key_pem: str) -> {}:
2021-03-20 21:20:41 +00:00
"""Undoes a mute via c2s
"""
if not session:
2021-12-29 21:55:09 +00:00
print('WARN: No session for send_undo_mute_via_server')
2021-03-20 21:20:41 +00:00
return 6
2021-12-31 15:16:53 +00:00
from_domain_full = get_full_domain(from_domain, from_port)
2021-03-20 21:20:41 +00:00
2021-12-31 15:16:53 +00:00
actor = local_actor_url(http_prefix, from_nickname, from_domain_full)
2021-12-26 17:21:37 +00:00
handle = replace_users_with_at(actor)
2021-03-20 21:20:41 +00:00
2021-12-31 15:16:53 +00:00
undo_mute_json = {
2021-03-20 21:20:41 +00:00
"@context": "https://www.w3.org/ns/activitystreams",
'type': 'Undo',
'actor': actor,
2021-03-21 12:44:58 +00:00
'to': [actor],
2021-03-20 21:20:41 +00:00
'object': {
'type': 'Ignore',
'actor': actor,
2021-03-21 12:44:58 +00:00
'to': [actor],
2021-12-31 15:16:53 +00:00
'object': muted_url
2021-03-20 21:20:41 +00:00
}
}
# lookup the inbox for the To handle
2021-12-31 15:16:53 +00:00
wf_request = webfinger_handle(session, handle, http_prefix,
cached_webfingers,
from_domain, project_version, debug, False,
signing_priv_key_pem)
if not wf_request:
2021-03-20 21:20:41 +00:00
if debug:
print('DEBUG: undo mute webfinger failed for ' + handle)
return 1
2021-12-31 15:16:53 +00:00
if not isinstance(wf_request, dict):
2021-03-20 21:20:41 +00:00
print('WARN: undo mute Webfinger for ' + handle +
2021-12-31 15:16:53 +00:00
' did not return a dict. ' + str(wf_request))
2021-03-20 21:20:41 +00:00
return 1
2021-12-31 15:16:53 +00:00
post_to_box = 'outbox'
2021-03-20 21:20:41 +00:00
# get the actor inbox for the To handle
2021-12-31 15:16:53 +00:00
origin_domain = from_domain
2021-12-31 17:38:22 +00:00
(inbox_url, _, _, from_person_id, _, _,
_, _) = get_person_box(signing_priv_key_pem,
origin_domain,
base_dir, session, wf_request,
person_cache,
project_version, http_prefix,
from_nickname,
from_domain, post_to_box, 72652)
2021-12-31 15:16:53 +00:00
if not inbox_url:
2021-03-20 21:20:41 +00:00
if debug:
2021-12-31 15:16:53 +00:00
print('DEBUG: undo mute no ' + post_to_box +
2021-03-20 21:20:41 +00:00
' was found for ' + handle)
return 3
2021-12-31 15:16:53 +00:00
if not from_person_id:
2021-03-20 21:20:41 +00:00
if debug:
print('DEBUG: undo mute no actor was found for ' + handle)
return 4
2021-12-31 15:16:53 +00:00
auth_header = create_basic_auth_header(from_nickname, password)
2021-03-20 21:20:41 +00:00
headers = {
2021-12-31 15:16:53 +00:00
'host': from_domain,
2021-03-20 21:20:41 +00:00
'Content-type': 'application/json',
2021-12-31 15:16:53 +00:00
'Authorization': auth_header
2021-03-20 21:20:41 +00:00
}
2021-12-31 15:16:53 +00:00
post_result = post_json(http_prefix, from_domain_full,
session, undo_mute_json, [], inbox_url,
headers, 3, True)
if post_result is None:
2021-03-20 21:20:41 +00:00
print('WARN: undo mute unable to post')
if debug:
print('DEBUG: c2s POST undo mute success')
2021-12-31 15:16:53 +00:00
return undo_mute_json
2021-03-20 21:20:41 +00:00
2021-12-29 21:55:09 +00:00
def send_undo_block_via_server(base_dir: str, session,
2021-12-31 15:16:53 +00:00
from_nickname: str, password: str,
from_domain: str, from_port: int,
http_prefix: str, blocked_url: str,
2021-12-29 21:55:09 +00:00
cached_webfingers: {}, person_cache: {},
debug: bool, project_version: str,
signing_priv_key_pem: str) -> {}:
2020-04-01 20:13:42 +00:00
"""Creates a block via c2s
"""
if not session:
2021-12-29 21:55:09 +00:00
print('WARN: No session for send_block_via_server')
2020-04-01 20:13:42 +00:00
return 6
2021-12-31 15:16:53 +00:00
from_domain_full = get_full_domain(from_domain, from_port)
2020-04-01 20:13:42 +00:00
2021-12-31 15:16:53 +00:00
block_actor = local_actor_url(http_prefix, from_nickname, from_domain_full)
to_url = 'https://www.w3.org/ns/activitystreams#Public'
cc_url = block_actor + '/followers'
2020-04-01 20:13:42 +00:00
2021-12-31 15:16:53 +00:00
new_block_json = {
2020-04-01 20:13:42 +00:00
"@context": "https://www.w3.org/ns/activitystreams",
'type': 'Undo',
2021-12-31 15:16:53 +00:00
'actor': block_actor,
2020-04-01 20:13:42 +00:00
'object': {
'type': 'Block',
2021-12-31 15:16:53 +00:00
'actor': block_actor,
'object': blocked_url,
'to': [to_url],
'cc': [cc_url]
2020-04-01 20:13:42 +00:00
}
}
2021-12-31 15:16:53 +00:00
handle = http_prefix + '://' + from_domain_full + '/@' + from_nickname
2020-04-01 20:13:42 +00:00
# lookup the inbox for the To handle
2021-12-31 15:16:53 +00:00
wf_request = webfinger_handle(session, handle, http_prefix,
cached_webfingers,
from_domain, project_version, debug, False,
signing_priv_key_pem)
if not wf_request:
2020-04-01 20:13:42 +00:00
if debug:
2021-03-18 10:01:01 +00:00
print('DEBUG: unblock webfinger failed for ' + handle)
2020-04-01 20:13:42 +00:00
return 1
2021-12-31 15:16:53 +00:00
if not isinstance(wf_request, dict):
2021-03-18 10:01:01 +00:00
print('WARN: unblock webfinger for ' + handle +
2021-12-31 15:16:53 +00:00
' did not return a dict. ' + str(wf_request))
2020-06-23 10:41:12 +00:00
return 1
2020-04-01 20:13:42 +00:00
2021-12-31 15:16:53 +00:00
post_to_box = 'outbox'
2020-04-01 20:13:42 +00:00
# get the actor inbox for the To handle
2021-12-31 15:16:53 +00:00
origin_domain = from_domain
2021-12-31 17:38:22 +00:00
(inbox_url, _, _, from_person_id, _, _,
_, _) = get_person_box(signing_priv_key_pem,
origin_domain,
base_dir, session, wf_request,
person_cache,
project_version, http_prefix,
from_nickname,
from_domain, post_to_box, 53892)
2021-12-31 15:16:53 +00:00
if not inbox_url:
2020-04-01 20:13:42 +00:00
if debug:
2021-12-31 15:16:53 +00:00
print('DEBUG: unblock no ' + post_to_box +
2021-03-18 10:01:01 +00:00
' was found for ' + handle)
2020-04-01 20:13:42 +00:00
return 3
2021-12-31 15:16:53 +00:00
if not from_person_id:
2020-04-01 20:13:42 +00:00
if debug:
2021-03-18 10:01:01 +00:00
print('DEBUG: unblock no actor was found for ' + handle)
2020-04-01 20:13:42 +00:00
return 4
2021-12-31 15:16:53 +00:00
auth_header = create_basic_auth_header(from_nickname, password)
2020-04-01 20:13:42 +00:00
headers = {
2021-12-31 15:16:53 +00:00
'host': from_domain,
2020-04-01 20:13:42 +00:00
'Content-type': 'application/json',
2021-12-31 15:16:53 +00:00
'Authorization': auth_header
2020-04-01 20:13:42 +00:00
}
2021-12-31 15:16:53 +00:00
post_result = post_json(http_prefix, from_domain_full,
session, new_block_json, [], inbox_url,
headers, 30, True)
if not post_result:
2021-03-18 10:01:01 +00:00
print('WARN: unblock unable to post')
2020-04-01 20:13:42 +00:00
if debug:
2021-03-18 10:01:01 +00:00
print('DEBUG: c2s POST unblock success')
2020-04-01 20:13:42 +00:00
2021-12-31 15:16:53 +00:00
return new_block_json
2020-11-09 19:41:01 +00:00
2021-12-29 21:55:09 +00:00
def post_is_muted(base_dir: str, nickname: str, domain: str,
2021-12-31 15:16:53 +00:00
post_json_object: {}, message_id: str) -> bool:
2020-11-09 19:41:01 +00:00
""" Returns true if the given post is muted
"""
2021-12-29 21:55:09 +00:00
is_muted = None
2021-12-25 22:09:19 +00:00
if 'muted' in post_json_object:
2021-12-29 21:55:09 +00:00
is_muted = post_json_object['muted']
if is_muted is True or is_muted is False:
return is_muted
2021-09-27 22:06:37 +00:00
2021-12-29 21:55:09 +00:00
is_muted = False
2021-12-31 15:16:53 +00:00
post_dir = acct_dir(base_dir, nickname, domain)
mute_filename = \
post_dir + '/inbox/' + message_id.replace('/', '#') + '.json.muted'
if os.path.isfile(mute_filename):
2021-12-29 21:55:09 +00:00
is_muted = True
2021-09-27 22:06:37 +00:00
else:
2021-12-31 15:16:53 +00:00
mute_filename = \
post_dir + '/outbox/' + \
message_id.replace('/', '#') + '.json.muted'
if os.path.isfile(mute_filename):
2021-12-29 21:55:09 +00:00
is_muted = True
2021-09-27 22:06:37 +00:00
else:
2021-12-31 15:16:53 +00:00
mute_filename = \
2021-12-25 16:17:53 +00:00
base_dir + '/accounts/cache/announce/' + nickname + \
2021-12-31 15:16:53 +00:00
'/' + message_id.replace('/', '#') + '.json.muted'
if os.path.isfile(mute_filename):
2021-12-29 21:55:09 +00:00
is_muted = True
return is_muted
2021-03-18 11:03:39 +00:00
2022-05-31 14:00:49 +00:00
def c2s_box_json(session, nickname: str, password: str,
2021-12-29 21:55:09 +00:00
domain: str, port: int,
http_prefix: str,
2021-12-31 15:16:53 +00:00
box_name: str, page_number: int,
2021-12-29 21:55:09 +00:00
debug: bool, signing_priv_key_pem: str) -> {}:
2021-03-18 11:03:39 +00:00
"""C2S Authenticated GET of posts for a timeline
"""
if not session:
2021-12-29 21:55:09 +00:00
print('WARN: No session for c2s_box_json')
2021-03-18 11:03:39 +00:00
return None
2021-12-26 12:45:03 +00:00
domain_full = get_full_domain(domain, port)
2021-12-26 10:19:59 +00:00
actor = local_actor_url(http_prefix, nickname, domain_full)
2021-03-18 11:03:39 +00:00
2021-12-31 15:16:53 +00:00
auth_header = create_basic_auth_header(nickname, password)
2021-03-18 11:03:39 +00:00
2021-12-31 15:16:53 +00:00
profile_str = 'https://www.w3.org/ns/activitystreams'
2021-03-18 11:03:39 +00:00
headers = {
'host': domain,
'Content-type': 'application/json',
2021-12-31 15:16:53 +00:00
'Authorization': auth_header,
'Accept': 'application/ld+json; profile="' + profile_str + '"'
2021-03-18 11:03:39 +00:00
}
# GET json
2021-12-31 15:16:53 +00:00
url = actor + '/' + box_name + '?page=' + str(page_number)
box_json = get_json(signing_priv_key_pem, session, url, headers, None,
debug, __version__, http_prefix, None)
2021-03-18 11:03:39 +00:00
2021-12-31 15:16:53 +00:00
if box_json is not None and debug:
2021-12-29 21:55:09 +00:00
print('DEBUG: GET c2s_box_json success')
2021-03-18 11:03:39 +00:00
2021-12-31 15:16:53 +00:00
return box_json
2021-10-14 15:12:35 +00:00
2021-12-29 21:55:09 +00:00
def seconds_between_published(published1: str, published2: str) -> int:
2021-10-14 15:12:35 +00:00
"""Returns the number of seconds between two published dates
"""
try:
2021-12-31 15:16:53 +00:00
published1_time = \
2021-10-14 15:12:35 +00:00
datetime.datetime.strptime(published1, '%Y-%m-%dT%H:%M:%SZ')
except BaseException:
2021-12-29 21:55:09 +00:00
print('EX: seconds_between_published unable to parse date 1 ' +
2021-10-29 18:48:15 +00:00
str(published1))
2021-10-14 15:12:35 +00:00
return -1
try:
2021-12-31 15:16:53 +00:00
published2_time = \
2021-10-14 15:12:35 +00:00
datetime.datetime.strptime(published2, '%Y-%m-%dT%H:%M:%SZ')
except BaseException:
2021-12-29 21:55:09 +00:00
print('EX: seconds_between_published unable to parse date 2 ' +
2021-10-29 18:48:15 +00:00
str(published2))
2021-10-14 15:12:35 +00:00
return -1
2021-12-31 15:16:53 +00:00
return (published2_time - published1_time).seconds
2021-10-14 15:12:35 +00:00
2021-12-29 21:55:09 +00:00
def edited_post_filename(base_dir: str, nickname: str, domain: str,
post_json_object: {}, debug: bool,
max_time_diff_seconds: int,
system_language: str) -> (str, {}):
2021-10-14 15:12:35 +00:00
"""Returns the filename of the edited post
"""
2021-12-26 10:57:03 +00:00
if not has_object_dict(post_json_object):
return '', None
2021-12-25 22:09:19 +00:00
if not post_json_object.get('type'):
return '', None
2021-12-25 22:09:19 +00:00
if not post_json_object['object'].get('type'):
return '', None
2021-12-25 22:09:19 +00:00
if not post_json_object['object'].get('published'):
return '', None
2021-12-25 22:09:19 +00:00
if not post_json_object['object'].get('id'):
return '', None
2021-12-25 22:09:19 +00:00
if not post_json_object['object'].get('content'):
return '', None
2021-12-25 22:09:19 +00:00
if not post_json_object['object'].get('attributedTo'):
return '', None
2021-12-25 22:09:19 +00:00
if not isinstance(post_json_object['object']['attributedTo'], str):
return '', None
2021-12-25 22:09:19 +00:00
actor = post_json_object['object']['attributedTo']
2021-12-31 17:38:22 +00:00
actor_filename = \
2021-12-26 12:02:29 +00:00
acct_dir(base_dir, nickname, domain) + '/lastpost/' + \
actor.replace('/', '#')
2021-12-31 17:38:22 +00:00
if not os.path.isfile(actor_filename):
return '', None
2021-12-27 11:20:57 +00:00
post_id = remove_id_ending(post_json_object['object']['id'])
2021-12-26 19:47:06 +00:00
lastpost_id = None
try:
2022-06-09 14:46:30 +00:00
with open(actor_filename, 'r',
encoding='utf-8') as fp_actor:
2021-12-31 15:16:53 +00:00
lastpost_id = fp_actor.read()
2021-11-25 22:22:54 +00:00
except OSError:
2021-12-31 17:38:22 +00:00
print('EX: edited_post_filename unable to read ' + actor_filename)
return '', None
2021-12-26 19:47:06 +00:00
if not lastpost_id:
return '', None
2021-12-26 19:47:06 +00:00
if lastpost_id == post_id:
return '', None
2021-12-26 23:41:34 +00:00
lastpost_filename = \
2021-12-26 20:36:08 +00:00
locate_post(base_dir, nickname, domain, lastpost_id, False)
2021-12-26 23:41:34 +00:00
if not lastpost_filename:
return '', None
2021-12-29 21:55:09 +00:00
lastpost_json = load_json(lastpost_filename, 0)
if not lastpost_json:
return '', None
2021-12-29 21:55:09 +00:00
if not lastpost_json.get('type'):
return '', None
2021-12-29 21:55:09 +00:00
if lastpost_json['type'] != post_json_object['type']:
return '', None
2021-12-29 21:55:09 +00:00
if not lastpost_json['object'].get('type'):
return '', None
2021-12-29 21:55:09 +00:00
if lastpost_json['object']['type'] != post_json_object['object']['type']:
return '', None
2021-12-29 21:55:09 +00:00
if not lastpost_json['object'].get('published'):
return '', None
2021-12-29 21:55:09 +00:00
if not lastpost_json['object'].get('id'):
return '', None
2021-12-29 21:55:09 +00:00
if not lastpost_json['object'].get('content'):
return '', None
2021-12-29 21:55:09 +00:00
if not lastpost_json['object'].get('attributedTo'):
return '', None
2021-12-29 21:55:09 +00:00
if not isinstance(lastpost_json['object']['attributedTo'], str):
return '', None
2021-12-31 15:16:53 +00:00
time_diff_seconds = \
2021-12-29 21:55:09 +00:00
seconds_between_published(lastpost_json['object']['published'],
post_json_object['object']['published'])
2021-12-31 15:16:53 +00:00
if time_diff_seconds > max_time_diff_seconds:
return '', None
2021-10-14 15:12:35 +00:00
if debug:
2021-12-26 19:47:06 +00:00
print(post_id + ' might be an edit of ' + lastpost_id)
lastpost_content = lastpost_json['object']['content']
if lastpost_json['object'].get('contentMap'):
if lastpost_json['object']['contentMap'].get(system_language):
lastpost_content = \
lastpost_json['object']['contentMap'][system_language]
content = post_json_object['object']['content']
if post_json_object['object'].get('contentMap'):
if post_json_object['object']['contentMap'].get(system_language):
content = \
post_json_object['object']['contentMap'][system_language]
if words_similarity(lastpost_content, content, 10) < 70:
return '', None
2021-12-26 19:47:06 +00:00
print(post_id + ' is an edit of ' + lastpost_id)
return lastpost_filename, lastpost_json
2021-12-31 15:16:53 +00:00
def get_original_post_from_announce_url(announce_url: str, base_dir: str,
2021-12-28 18:13:52 +00:00
nickname: str,
domain: str) -> (str, str, str):
2021-10-17 11:35:47 +00:00
"""From the url of an announce this returns the actor, url and
filename (if available) of the original post being announced
"""
2021-12-31 15:16:53 +00:00
post_filename = locate_post(base_dir, nickname, domain, announce_url)
2021-12-26 23:41:34 +00:00
if not post_filename:
2021-10-17 11:35:47 +00:00
return None, None, None
2021-12-31 15:16:53 +00:00
announce_post_json = load_json(post_filename, 0, 1)
if not announce_post_json:
2021-12-26 23:41:34 +00:00
return None, None, post_filename
2021-12-31 15:16:53 +00:00
if not announce_post_json.get('type'):
2021-12-26 23:41:34 +00:00
return None, None, post_filename
2021-12-31 15:16:53 +00:00
if announce_post_json['type'] != 'Announce':
2021-12-26 23:41:34 +00:00
return None, None, post_filename
2021-12-31 15:16:53 +00:00
if not announce_post_json.get('object'):
2021-12-26 23:41:34 +00:00
return None, None, post_filename
2021-12-31 15:16:53 +00:00
if not isinstance(announce_post_json['object'], str):
2021-12-26 23:41:34 +00:00
return None, None, post_filename
2021-10-17 12:33:02 +00:00
actor = url = None
# do we have the original post?
2021-12-31 15:16:53 +00:00
orig_post_id = announce_post_json['object']
orig_filename = locate_post(base_dir, nickname, domain, orig_post_id)
if orig_filename:
# we have the original post
2021-12-31 15:16:53 +00:00
orig_post_json = load_json(orig_filename, 0, 1)
if orig_post_json:
if has_object_dict(orig_post_json):
if orig_post_json['object'].get('attributedTo'):
attrib = orig_post_json['object']['attributedTo']
if isinstance(attrib, str):
actor = orig_post_json['object']['attributedTo']
url = orig_post_id
elif orig_post_json['object'].get('actor'):
actor = orig_post_json['actor']
url = orig_post_id
else:
# we don't have the original post
2021-12-31 15:16:53 +00:00
if has_users_path(orig_post_id):
# get the actor from the original post url
2021-12-31 15:16:53 +00:00
orig_nick = get_nickname_from_actor(orig_post_id)
2021-12-31 17:38:22 +00:00
orig_domain, _ = get_domain_from_actor(orig_post_id)
2021-12-31 15:16:53 +00:00
if orig_nick and orig_domain:
actor = \
2021-12-31 15:16:53 +00:00
orig_post_id.split('/' + orig_nick + '/')[0] + \
'/' + orig_nick
url = orig_post_id
2021-10-17 16:49:34 +00:00
2021-12-31 15:16:53 +00:00
return actor, url, orig_filename