epicyon/conversation.py

474 lines
18 KiB
Python
Raw Normal View History

2021-08-12 10:26:24 +00:00
__filename__ = "conversation.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
2024-12-22 23:37:30 +00:00
__version__ = "1.6.0"
2021-08-12 10:26:24 +00:00
__maintainer__ = "Bob Mottram"
2021-09-10 16:14:50 +00:00
__email__ = "bob@libreserver.org"
2021-08-12 10:26:24 +00:00
__status__ = "Production"
__module_group__ = "Timeline"
import os
2021-12-26 10:57:03 +00:00
from utils import has_object_dict
2021-12-26 12:02:29 +00:00
from utils import acct_dir
2021-12-27 11:20:57 +00:00
from utils import remove_id_ending
2022-06-10 11:43:33 +00:00
from utils import text_in_file
2022-12-28 10:18:13 +00:00
from utils import locate_post
from utils import load_json
2023-01-07 11:45:19 +00:00
from utils import harmless_markup
from utils import get_attributed_to
from utils import get_reply_to
2024-05-29 13:37:07 +00:00
from utils import resembles_url
2022-12-28 10:18:13 +00:00
from keys import get_instance_actor_key
from session import get_json
2023-08-13 09:58:02 +00:00
from session import get_json_valid
2021-08-12 10:26:24 +00:00
2021-12-29 21:55:09 +00:00
def _get_conversation_filename(base_dir: str, nickname: str, domain: str,
post_json_object: {}) -> str:
2021-10-14 15:12:35 +00:00
"""Returns the conversation filename
Due to lack of AP specification maintenance, a conversation can also be
referred to as a thread or (confusingly) "context"
2021-08-12 10:26:24 +00:00
"""
2021-12-26 10:57:03 +00:00
if not has_object_dict(post_json_object):
2021-10-14 15:12:35 +00:00
return None
if not post_json_object['object'].get('conversation') and \
not post_json_object['object'].get('thread') and \
not post_json_object['object'].get('context'):
2021-10-14 15:12:35 +00:00
return None
2021-12-25 22:09:19 +00:00
if not post_json_object['object'].get('id'):
2021-10-14 15:12:35 +00:00
return None
2021-12-30 20:32:19 +00:00
conversation_dir = acct_dir(base_dir, nickname, domain) + '/conversation'
if not os.path.isdir(conversation_dir):
os.mkdir(conversation_dir)
if post_json_object['object'].get('conversation'):
conversation_id = post_json_object['object']['conversation']
2024-10-06 16:22:13 +00:00
elif post_json_object['object'].get('context'):
conversation_id = post_json_object['object']['context']
2024-10-06 16:22:13 +00:00
else:
conversation_id = post_json_object['object']['thread']
if not isinstance(conversation_id, str):
return None
2021-12-30 20:32:19 +00:00
conversation_id = conversation_id.replace('/', '#')
return conversation_dir + '/' + conversation_id
2021-10-14 15:12:35 +00:00
2021-12-29 21:55:09 +00:00
def update_conversation(base_dir: str, nickname: str, domain: str,
post_json_object: {}) -> bool:
2022-12-23 18:27:27 +00:00
"""Adds a post to a conversation index in the /conversation subdirectory
2021-10-14 15:12:35 +00:00
"""
2021-12-30 20:32:19 +00:00
conversation_filename = \
2021-12-29 21:55:09 +00:00
_get_conversation_filename(base_dir, nickname, domain,
post_json_object)
2021-12-30 20:32:19 +00:00
if not conversation_filename:
2021-10-14 15:12:35 +00:00
return False
2021-12-27 11:20:57 +00:00
post_id = remove_id_ending(post_json_object['object']['id'])
2021-12-30 20:32:19 +00:00
if not os.path.isfile(conversation_filename):
2021-08-12 10:26:24 +00:00
try:
2022-06-09 14:46:30 +00:00
with open(conversation_filename, 'w+',
2024-07-14 13:01:46 +00:00
encoding='utf-8') as fp_conv:
fp_conv.write(post_id + '\n')
2021-08-12 10:26:24 +00:00
return True
2021-11-25 18:42:38 +00:00
except OSError:
2021-12-29 21:55:09 +00:00
print('EX: update_conversation ' +
2021-12-30 20:32:19 +00:00
'unable to write to ' + conversation_filename)
2022-06-10 11:43:33 +00:00
elif not text_in_file(post_id + '\n', conversation_filename):
2021-08-12 10:26:24 +00:00
try:
2022-06-09 14:46:30 +00:00
with open(conversation_filename, 'a+',
2024-07-14 13:01:46 +00:00
encoding='utf-8') as fp_conv:
fp_conv.write(post_id + '\n')
2021-08-12 10:26:24 +00:00
return True
2021-11-25 18:42:38 +00:00
except OSError:
2021-12-29 21:55:09 +00:00
print('EX: update_conversation 2 ' +
2021-12-30 20:32:19 +00:00
'unable to write to ' + conversation_filename)
2021-08-12 10:26:24 +00:00
return False
2021-12-29 21:55:09 +00:00
def mute_conversation(base_dir: str, nickname: str, domain: str,
2021-12-30 20:32:19 +00:00
conversation_id: str) -> None:
2021-08-12 10:26:24 +00:00
"""Mutes the given conversation
"""
if not isinstance(conversation_id, str):
return
2021-12-30 20:32:19 +00:00
conversation_dir = acct_dir(base_dir, nickname, domain) + '/conversation'
conversation_filename = \
conversation_dir + '/' + conversation_id.replace('/', '#')
if not os.path.isfile(conversation_filename):
2021-08-12 10:26:24 +00:00
return
2021-12-30 20:32:19 +00:00
if os.path.isfile(conversation_filename + '.muted'):
2021-08-12 10:26:24 +00:00
return
2021-11-25 18:42:38 +00:00
try:
2022-06-09 14:46:30 +00:00
with open(conversation_filename + '.muted', 'w+',
2024-07-14 13:01:46 +00:00
encoding='utf-8') as fp_conv:
fp_conv.write('\n')
2021-11-25 18:42:38 +00:00
except OSError:
2021-12-30 20:32:19 +00:00
print('EX: unable to write mute ' + conversation_filename)
2021-08-12 10:26:24 +00:00
2021-12-29 21:55:09 +00:00
def unmute_conversation(base_dir: str, nickname: str, domain: str,
2021-12-30 20:32:19 +00:00
conversation_id: str) -> None:
2021-08-12 10:26:24 +00:00
"""Unmutes the given conversation
"""
if not isinstance(conversation_id, str):
return
2021-12-30 20:32:19 +00:00
conversation_dir = acct_dir(base_dir, nickname, domain) + '/conversation'
conversation_filename = \
conversation_dir + '/' + conversation_id.replace('/', '#')
if not os.path.isfile(conversation_filename):
2021-08-12 10:26:24 +00:00
return
2021-12-30 20:32:19 +00:00
if not os.path.isfile(conversation_filename + '.muted'):
2021-08-12 10:26:24 +00:00
return
try:
2021-12-30 20:32:19 +00:00
os.remove(conversation_filename + '.muted')
2021-11-25 18:42:38 +00:00
except OSError:
2021-12-29 21:55:09 +00:00
print('EX: unmute_conversation unable to delete ' +
2021-12-30 20:32:19 +00:00
conversation_filename + '.muted')
2022-12-28 10:18:13 +00:00
def _get_replies_to_post(post_json_object: {},
signing_priv_key_pem: str,
session, as_header, debug: bool,
http_prefix: str,
base_dir: str, nickname: str,
2024-12-17 13:50:48 +00:00
domain: str, depth: int, ids: [],
mitm_servers: []) -> []:
"""Returns a list of reply posts to the given post as json
"""
2024-12-23 15:39:55 +00:00
result: list[dict] = []
post_obj = post_json_object
if has_object_dict(post_json_object):
post_obj = post_json_object['object']
if not post_obj.get('replies'):
return result
# get the replies collection url
2024-05-29 13:54:43 +00:00
replies_collection_id = None
if isinstance(post_obj['replies'], dict):
if post_obj['replies'].get('id'):
2024-05-29 13:54:43 +00:00
replies_collection_id = post_obj['replies']['id']
elif isinstance(post_obj['replies'], str):
2024-05-29 13:54:43 +00:00
replies_collection_id = post_obj['replies']
2024-05-29 13:54:43 +00:00
if replies_collection_id:
2024-05-29 14:37:16 +00:00
if debug:
print('DEBUG: get_replies_to_post replies_collection_id ' +
str(replies_collection_id))
2024-05-29 13:52:09 +00:00
replies_collection = \
2024-05-29 13:54:43 +00:00
get_json(signing_priv_key_pem, session, replies_collection_id,
2024-12-17 13:50:48 +00:00
as_header, None, debug, mitm_servers, __version__,
http_prefix, domain)
if not get_json_valid(replies_collection):
return result
2024-05-29 14:37:16 +00:00
if debug:
print('DEBUG: get_replies_to_post replies_collection ' +
str(replies_collection))
# get the list of replies
if not replies_collection.get('first'):
return result
if not isinstance(replies_collection['first'], dict):
return result
if not replies_collection['first'].get('items'):
2024-05-29 13:37:07 +00:00
if not replies_collection['first'].get('next'):
return result
2024-12-23 15:39:55 +00:00
items_list: list[dict] = []
2024-05-29 14:00:58 +00:00
if replies_collection['first'].get('items'):
items_list = replies_collection['first']['items']
2024-05-29 13:37:07 +00:00
if not items_list:
# if there are no items try the next one
next_page_id = replies_collection['first']['next']
if not isinstance(next_page_id, str):
return result
replies_collection = \
get_json(signing_priv_key_pem, session, next_page_id,
2024-12-17 13:50:48 +00:00
as_header, None, debug, mitm_servers, __version__,
2024-05-29 13:37:07 +00:00
http_prefix, domain)
2024-05-29 14:37:16 +00:00
if debug:
print('DEBUG: get_replies_to_post next replies_collection ' +
str(replies_collection))
2024-05-29 13:37:07 +00:00
if not get_json_valid(replies_collection):
return result
2024-05-29 14:13:45 +00:00
if not replies_collection.get('items'):
2024-05-29 13:37:07 +00:00
return result
2024-05-29 14:13:45 +00:00
if not isinstance(replies_collection['items'], list):
2024-05-29 13:37:07 +00:00
return result
2024-05-29 14:13:45 +00:00
items_list = replies_collection['items']
2024-05-29 13:37:07 +00:00
2024-05-29 14:37:16 +00:00
if debug:
print('DEBUG: get_replies_to_post items_list ' +
str(items_list))
2024-05-29 13:52:09 +00:00
2024-05-29 13:37:07 +00:00
if not isinstance(items_list, list):
return result
# check each item in the list
for item in items_list:
2024-05-29 13:37:07 +00:00
# download the item if needed
if isinstance(item, str):
if resembles_url(item):
if debug:
print('Downloading conversation item ' + item)
item_dict = \
get_json(signing_priv_key_pem, session, item,
2024-12-17 13:50:48 +00:00
as_header, None, debug, mitm_servers,
__version__, http_prefix, domain)
2024-05-29 13:37:07 +00:00
if not get_json_valid(item_dict):
continue
item = item_dict
if not isinstance(item, dict):
continue
if not has_object_dict(item):
if not item.get('attributedTo'):
continue
attrib_str = get_attributed_to(item['attributedTo'])
if not attrib_str:
continue
if not item.get('published'):
continue
if not item.get('id'):
continue
if not isinstance(item['id'], str):
continue
if not item.get('to'):
continue
if not isinstance(item['to'], list):
continue
if 'cc' not in item:
continue
if not isinstance(item['cc'], list):
continue
wrapped_post = {
"@context": [
'https://www.w3.org/ns/activitystreams',
'https://w3id.org/security/v1'
],
'id': item['id'] + '/activity',
'type': 'Create',
'actor': attrib_str,
'published': item['published'],
'to': item['to'],
'cc': item['cc'],
'object': item
}
item = wrapped_post
if not item['object'].get('published'):
continue
# render harmless any dangerous markup
harmless_markup(item)
# keep a list of ids encountered, to avoid circularity
reply_post_id = None
if item.get('id'):
if isinstance(item['id'], str):
reply_post_id = item['id']
if reply_post_id in ids:
continue
ids.append(reply_post_id)
# add it to the list
result.append(item)
2024-05-29 14:24:15 +00:00
update_conversation(base_dir, nickname, domain,
item)
if depth < 10 and reply_post_id:
result += \
_get_replies_to_post(item,
signing_priv_key_pem,
session, as_header,
debug,
http_prefix, base_dir,
nickname, domain,
2024-12-17 13:50:48 +00:00
depth + 1, ids,
mitm_servers)
return result
def download_conversation_posts(authorized: bool, session,
http_prefix: str, base_dir: str,
2022-12-28 10:18:13 +00:00
nickname: str, domain: str,
2024-12-17 13:50:48 +00:00
post_id: str, debug: bool,
mitm_servers: []) -> []:
2022-12-28 10:18:13 +00:00
"""Downloads all posts for a conversation and returns a list of the
json objects
"""
if '://' not in post_id:
return []
profile_str = 'https://www.w3.org/ns/activitystreams'
as_header = {
'Accept': 'application/ld+json; profile="' + profile_str + '"'
}
2024-12-23 15:39:55 +00:00
conversation_view: list[dict] = []
2022-12-28 10:18:13 +00:00
signing_priv_key_pem = get_instance_actor_key(base_dir, domain)
post_id = remove_id_ending(post_id)
post_filename = \
locate_post(base_dir, nickname, domain, post_id)
post_json_object = None
2024-08-22 09:09:52 +00:00
if authorized:
if post_filename:
post_json_object = load_json(post_filename)
else:
post_json_object = \
get_json(signing_priv_key_pem, session, post_id,
2024-12-17 13:50:48 +00:00
as_header, None, debug, mitm_servers,
__version__, http_prefix, domain)
2022-12-28 10:18:13 +00:00
if debug:
2023-08-13 09:58:02 +00:00
if not get_json_valid(post_json_object):
2022-12-28 10:18:13 +00:00
print(post_id + ' returned no json')
if post_json_object:
update_conversation(base_dir, nickname, domain,
post_json_object)
# get any replies
2024-12-23 15:39:55 +00:00
replies_to_post: list[dict] = []
if get_json_valid(post_json_object):
replies_to_post = \
_get_replies_to_post(post_json_object,
signing_priv_key_pem,
session, as_header, debug,
http_prefix, base_dir, nickname,
2024-12-17 13:50:48 +00:00
domain, 0, [], mitm_servers)
2024-12-23 15:39:55 +00:00
ids: list[str] = []
2023-08-13 09:58:02 +00:00
while get_json_valid(post_json_object):
if not isinstance(post_json_object, dict):
2022-12-28 10:28:15 +00:00
break
if not has_object_dict(post_json_object):
if not post_json_object.get('id'):
break
if not isinstance(post_json_object['id'], str):
break
if not post_json_object.get('attributedTo'):
2022-12-28 10:18:13 +00:00
if debug:
print(str(post_json_object))
print(post_json_object['id'] + ' has no attributedTo')
2022-12-28 10:18:13 +00:00
break
attrib_str = get_attributed_to(post_json_object['attributedTo'])
if not attrib_str:
2022-12-28 10:18:13 +00:00
break
if not post_json_object.get('published'):
2022-12-28 10:18:13 +00:00
if debug:
print(str(post_json_object))
print(post_json_object['id'] + ' has no published date')
2022-12-28 10:18:13 +00:00
break
if not post_json_object.get('to'):
2022-12-28 10:18:13 +00:00
if debug:
print(str(post_json_object))
print(post_json_object['id'] + ' has no "to" list')
2022-12-28 10:18:13 +00:00
break
if not isinstance(post_json_object['to'], list):
2022-12-28 10:18:13 +00:00
break
if 'cc' not in post_json_object:
2022-12-28 10:18:13 +00:00
if debug:
print(str(post_json_object))
print(post_json_object['id'] + ' has no "cc" list')
2022-12-28 10:18:13 +00:00
break
if not isinstance(post_json_object['cc'], list):
2022-12-28 10:18:13 +00:00
break
wrapped_post = {
"@context": [
'https://www.w3.org/ns/activitystreams',
'https://w3id.org/security/v1'
],
'id': post_json_object['id'] + '/activity',
2022-12-28 10:18:13 +00:00
'type': 'Create',
'actor': attrib_str,
'published': post_json_object['published'],
'to': post_json_object['to'],
'cc': post_json_object['cc'],
'object': post_json_object
2022-12-28 10:18:13 +00:00
}
post_json_object = wrapped_post
if not post_json_object['object'].get('published'):
2022-12-30 17:32:49 +00:00
break
# avoid any circularity in previous conversation posts
if post_json_object.get('id'):
if isinstance(post_json_object['id'], str):
if post_json_object['id'] in ids:
break
ids.append(post_json_object['id'])
2023-01-07 11:45:19 +00:00
# render harmless any dangerous markup
harmless_markup(post_json_object)
conversation_view = [post_json_object] + conversation_view
update_conversation(base_dir, nickname, domain,
post_json_object)
if not authorized:
# only show a single post to non-authorized viewers
break
post_id = get_reply_to(post_json_object['object'])
if not post_id:
2022-12-28 10:18:13 +00:00
if debug:
print(post_id + ' is not a reply')
break
post_id = remove_id_ending(post_id)
post_filename = \
locate_post(base_dir, nickname, domain, post_id)
post_json_object = None
2022-12-28 10:18:13 +00:00
if post_filename:
post_json_object = load_json(post_filename)
2022-12-28 10:18:13 +00:00
else:
if authorized:
post_json_object = \
get_json(signing_priv_key_pem, session, post_id,
2024-12-17 13:50:48 +00:00
as_header, None, debug, mitm_servers,
__version__, http_prefix, domain)
2022-12-28 10:18:13 +00:00
if debug:
if get_json_valid(post_json_object):
2022-12-28 10:18:13 +00:00
print(post_id + ' returned no json')
return conversation_view + replies_to_post
2024-10-06 12:22:52 +00:00
def conversation_tag_to_convthread_id(tag: str) -> str:
"""Converts a converation tag, such as
tag:domain,2024-09-28:objectId=647832678:objectType=Conversation
2024-10-06 12:22:52 +00:00
into a convthread id such as 20240928647832678
"""
if not isinstance(tag, str):
return ''
2024-10-06 12:22:52 +00:00
convthread_id = ''
for tag_chr in tag:
if tag_chr.isdigit():
2024-10-06 12:22:52 +00:00
convthread_id += tag_chr
return convthread_id
2024-10-06 14:09:54 +00:00
def convthread_id_to_conversation_tag(domain: str,
convthread_id: str) -> str:
"""Converts a convthread id such as 20240928647832678
into a converation tag, such as
tag:domain,2024-09-28:objectId=647832678:objectType=Conversation
"""
if len(convthread_id) < 10:
return ''
year = convthread_id[:4]
month = convthread_id[4:][:2]
day = convthread_id[6:][:2]
post_id = convthread_id[8:]
conversation_id = \
'tag:' + domain + ',' + year + '-' + month + '-' + day + \
':objectId=' + post_id + ':objectType=Conversation'
return conversation_id
2024-10-06 19:20:47 +00:00
def post_id_to_convthread_id(post_id: str, published: str) -> str:
"""Converts a post ID into a conversation thread ID
"""
if '/statuses/' not in post_id or len(published) < 10:
return post_id
date_prefix = published[:10].replace('-', '')
convthread_id = post_id.replace('/statuses/', '/thread/' + date_prefix)
return convthread_id