epicyon/speaker.py

609 lines
23 KiB
Python
Raw Normal View History

2021-03-01 19:16:33 +00:00
__filename__ = "speaker.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
2024-01-21 19:01:20 +00:00
__version__ = "1.5.0"
2021-03-01 19:16:33 +00:00
__maintainer__ = "Bob Mottram"
2021-09-10 16:14:50 +00:00
__email__ = "bob@libreserver.org"
2021-03-01 19:16:33 +00:00
__status__ = "Production"
2021-06-26 11:27:14 +00:00
__module_group__ = "Accessibility"
2021-03-01 19:16:33 +00:00
2021-03-02 12:39:18 +00:00
import os
2021-03-03 19:06:18 +00:00
import html
2021-03-01 19:16:33 +00:00
import random
2021-03-03 19:06:18 +00:00
import urllib.parse
2022-05-12 10:13:55 +00:00
from utils import get_cached_post_filename
2021-12-27 11:20:57 +00:00
from utils import remove_id_ending
2021-12-26 20:12:18 +00:00
from utils import is_dm
2021-12-26 19:36:40 +00:00
from utils import is_reply
2021-12-26 20:39:35 +00:00
from utils import camel_case_split
2021-12-27 19:05:25 +00:00
from utils import get_domain_from_actor
2021-12-27 22:19:18 +00:00
from utils import get_nickname_from_actor
2021-12-27 22:12:29 +00:00
from utils import get_gender_from_bio
2021-12-27 21:59:07 +00:00
from utils import get_display_name
2021-12-27 15:43:22 +00:00
from utils import remove_html
2021-12-26 15:13:34 +00:00
from utils import load_json
2021-12-26 14:47:21 +00:00
from utils import save_json
2021-12-26 19:15:36 +00:00
from utils import is_pgp_encrypted
2021-12-26 10:57:03 +00:00
from utils import has_object_dict
2021-12-26 12:02:29 +00:00
from utils import acct_dir
2021-12-26 10:19:59 +00:00
from utils import local_actor_url
2024-01-09 16:59:23 +00:00
from utils import get_actor_from_post
2021-12-29 21:55:09 +00:00
from content import html_replace_quote_marks
2021-03-01 19:16:33 +00:00
2022-01-03 18:06:04 +00:00
SPEAKER_REMOVE_CHARS = ('.\n', '. ', ',', ';', '?', '!')
2021-03-03 12:34:46 +00:00
2021-03-01 19:16:33 +00:00
2023-12-23 20:18:57 +00:00
def get_speaker_pitch(display_name: str, screenreader: str,
gender: str) -> int:
2021-03-01 19:16:33 +00:00
"""Returns the speech synthesis pitch for the given name
"""
2022-01-03 18:06:04 +00:00
random.seed(display_name)
range_min = 1
range_max = 100
if 'She' in gender:
2022-01-03 18:06:04 +00:00
range_min = 50
elif 'Him' in gender:
2022-01-03 18:06:04 +00:00
range_max = 50
2021-03-02 15:13:10 +00:00
if screenreader == 'picospeaker':
2022-01-03 18:06:04 +00:00
range_min = -6
range_max = 3
if 'She' in gender:
2022-01-03 18:06:04 +00:00
range_min = -1
elif 'Him' in gender:
2022-01-03 18:06:04 +00:00
range_max = -1
return random.randint(range_min, range_max)
2021-03-01 19:16:33 +00:00
2022-01-03 18:06:04 +00:00
def get_speaker_rate(display_name: str, screenreader: str) -> int:
2021-03-01 19:16:33 +00:00
"""Returns the speech synthesis rate for the given name
"""
2022-01-03 18:06:04 +00:00
random.seed(display_name)
2021-03-02 15:13:10 +00:00
if screenreader == 'picospeaker':
2021-03-03 21:21:17 +00:00
return random.randint(-40, -20)
2021-03-01 19:16:33 +00:00
return random.randint(50, 120)
2022-01-03 18:06:04 +00:00
def get_speaker_range(display_name: str) -> int:
2021-03-01 19:16:33 +00:00
"""Returns the speech synthesis range for the given name
"""
2022-01-03 18:06:04 +00:00
random.seed(display_name)
2021-03-01 19:16:33 +00:00
return random.randint(300, 800)
2022-01-03 18:06:04 +00:00
def _speaker_pronounce(base_dir: str, say_text: str, translate: {}) -> str:
2021-03-02 12:39:18 +00:00
"""Screen readers may not always pronounce correctly, so you
can have a file which specifies conversions. File should contain
line items such as:
Epicyon -> Epi-cyon
"""
2022-01-03 18:06:04 +00:00
pronounce_filename = base_dir + '/accounts/speaker_pronounce.txt'
convert_dict = {}
2021-03-03 19:15:32 +00:00
if translate:
2022-01-03 18:06:04 +00:00
convert_dict = {
2021-03-03 19:15:32 +00:00
"Epicyon": "Epi-cyon",
"espeak": "e-speak",
"emoji": "emowji",
"clearnet": "clear-net",
"https": "H-T-T-P-S",
"HTTPS": "H-T-T-P-S",
2021-03-03 20:41:22 +00:00
"XMPP": "X-M-P-P",
"xmpp": "X-M-P-P",
2021-03-03 22:26:54 +00:00
"sql": "S-Q-L",
2021-03-08 22:02:54 +00:00
".js": " dot J-S",
2021-03-04 11:51:30 +00:00
"PSQL": "Postgres S-Q-L",
2021-03-03 22:26:54 +00:00
"SQL": "S-Q-L",
2021-03-11 12:44:47 +00:00
"gdpr": "G-D-P-R",
2021-03-13 10:07:57 +00:00
"kde": "K-D-E",
2021-03-14 11:17:37 +00:00
"AGPL": "Affearo G-P-L",
"agpl": "Affearo G-P-L",
"GPL": "G-P-L",
"gpl": "G-P-L",
2021-03-04 15:54:52 +00:00
"coop": "co-op",
2021-03-04 11:51:30 +00:00
"KMail": "K-Mail",
2021-03-13 10:10:02 +00:00
"kmail": "K-Mail",
2021-03-04 11:51:30 +00:00
"gmail": "G-mail",
"Gmail": "G-mail",
"OpenPGP": "Open P-G-P",
2021-03-03 19:15:32 +00:00
"Tor": "Toor",
2021-03-10 18:44:44 +00:00
"memes": "meemes",
"Memes": "Meemes",
2022-04-21 13:36:39 +00:00
"rofl": translate["laughing"],
"ROFL": translate["laughing"],
"lmao": translate["laughing"],
"LMAO": translate["laughing"],
2021-03-10 13:45:34 +00:00
"fwiw": "for what it's worth",
"fyi": "for your information",
2021-03-10 21:08:18 +00:00
"irl": "in real life",
"IRL": "in real life",
2021-03-10 13:45:34 +00:00
"imho": "in my opinion",
2022-04-21 13:19:12 +00:00
"afaik": "as far as I know",
"AFAIK": "as far as I know",
2021-03-09 20:06:26 +00:00
"fediverse": "fediiverse",
"Fediverse": "Fediiverse",
2021-03-04 16:46:21 +00:00
" foss ": " free and open source software ",
" floss ": " free libre and open source software ",
" FOSS ": "free and open source software",
" FLOSS ": "free libre and open source software",
" oss ": " open source software ",
" OSS ": " open source software ",
2021-03-03 19:15:32 +00:00
"🤔": ". " + translate["thinking emoji"],
"RT @": "Re-Tweet ",
2021-03-03 20:56:35 +00:00
"#nowplaying": translate["hashtag"] + " now-playing",
"#NowPlaying": translate["hashtag"] + " now-playing",
2021-03-03 20:59:21 +00:00
"#": translate["hashtag"] + ' ',
2022-04-21 13:36:39 +00:00
"¯\\_(ツ)_/¯": translate["shrug"],
2021-03-03 19:15:32 +00:00
":D": '. ' + translate["laughing"],
":-D": '. ' + translate["laughing"],
":)": '. ' + translate["smile"],
";)": '. ' + translate["wink"],
":(": '. ' + translate["sad face"],
":-)": '. ' + translate["smile"],
":-(": '. ' + translate["sad face"],
";-)": '. ' + translate["wink"],
2021-03-10 10:43:53 +00:00
":O": '. ' + translate['shocked'],
2021-03-04 15:54:52 +00:00
"?": "? ",
2021-03-04 16:20:24 +00:00
'"': "'",
2021-03-03 21:30:01 +00:00
"*": "",
"(": ",",
")": ","
2021-03-03 19:15:32 +00:00
}
2022-01-03 18:06:04 +00:00
if os.path.isfile(pronounce_filename):
2022-06-09 14:46:30 +00:00
with open(pronounce_filename, 'r', encoding='utf-8') as fp_pro:
2022-01-03 18:06:04 +00:00
pronounce_list = fp_pro.readlines()
for conversion in pronounce_list:
2021-03-02 12:39:18 +00:00
separator = None
if '->' in conversion:
separator = '->'
elif ';' in conversion:
separator = ';'
elif ':' in conversion:
separator = ':'
elif ',' in conversion:
separator = ','
if not separator:
continue
text = conversion.split(separator)[0].strip()
converted = conversion.split(separator)[1].strip()
2022-01-03 18:06:04 +00:00
convert_dict[text] = converted
for text, converted in convert_dict.items():
if text in say_text:
say_text = say_text.replace(text, converted)
return say_text
2021-03-02 12:39:18 +00:00
2023-01-06 20:39:33 +00:00
def speaker_replace_links(http_prefix: str, nickname: str,
orig_domain: str, orig_domain_full: str,
say_text: str, translate: {},
2022-01-03 18:06:04 +00:00
detected_links: []) -> str:
"""Replaces any links in the given text with "link to [domain]".
Instead of reading out potentially very long and meaningless links
"""
2022-01-03 18:06:04 +00:00
text = say_text
2021-03-18 18:37:55 +00:00
text = text.replace('?v=', '__v=')
2022-01-03 18:06:04 +00:00
for char in SPEAKER_REMOVE_CHARS:
text = text.replace(char, ' ')
2021-03-18 18:37:55 +00:00
text = text.replace('__v=', '?v=')
replacements = {}
2023-01-06 20:39:33 +00:00
replacements_hashtags = {}
2022-01-03 18:06:04 +00:00
words_list = text.split(' ')
2021-03-03 19:15:32 +00:00
if translate.get('Linked'):
2022-01-03 18:06:04 +00:00
linked_str = translate['Linked']
2021-03-03 19:15:32 +00:00
else:
2022-01-03 18:06:04 +00:00
linked_str = 'Linked'
prev_word = ''
for word in words_list:
2021-03-11 11:35:03 +00:00
if word.startswith('v='):
2021-03-11 12:24:20 +00:00
replacements[word] = ''
2021-03-02 16:50:32 +00:00
if word.startswith(':'):
if word.endswith(':'):
2021-03-02 17:18:47 +00:00
replacements[word] = ', emowji ' + word.replace(':', '') + ','
2021-03-02 16:50:32 +00:00
continue
2022-01-03 18:06:04 +00:00
if word.startswith('@') and not prev_word.endswith('RT'):
2021-03-11 11:35:03 +00:00
# replace mentions, but not re-tweets
2021-03-03 19:15:32 +00:00
if translate.get('mentioning'):
replacements[word] = \
2021-03-04 11:52:22 +00:00
translate['mentioning'] + ' ' + word[1:] + ', '
2022-01-03 18:06:04 +00:00
prev_word = word
domain = None
2021-12-26 10:00:46 +00:00
domain_full = None
if 'https://' in word:
domain = word.split('https://')[1]
2021-12-26 10:00:46 +00:00
domain_full = 'https://' + domain
elif 'http://' in word:
domain = word.split('http://')[1]
2021-12-26 10:00:46 +00:00
domain_full = 'http://' + domain
if not domain:
continue
if '/' in domain:
domain = domain.split('/')[0]
if domain.startswith('www.'):
domain = domain.replace('www.', '')
2022-01-03 18:06:04 +00:00
replacements[domain_full] = '. ' + linked_str + ' ' + domain + '.'
2023-01-06 20:39:33 +00:00
if '/tags/' in domain_full and domain != orig_domain:
remote_hashtag_link = \
http_prefix + '://' + orig_domain_full + '/users/' + \
nickname + '?remotetag=' + domain_full.replace('/', '--')
detected_links.append(remote_hashtag_link)
else:
detected_links.append(domain_full)
2022-01-03 18:06:04 +00:00
for replace_str, new_str in replacements.items():
say_text = say_text.replace(replace_str, new_str)
2023-01-06 20:39:33 +00:00
for replace_str, new_str in replacements_hashtags.items():
say_text = say_text.replace(replace_str, new_str)
2022-01-03 18:06:04 +00:00
return say_text.replace('..', '.')
2023-01-06 17:57:02 +00:00
def _add_ssml_emphasis(say_text: str) -> str:
2021-03-03 12:34:46 +00:00
"""Adds emphasis to *emphasised* text
"""
2022-01-03 18:06:04 +00:00
if '*' not in say_text:
return say_text
text = say_text
for char in SPEAKER_REMOVE_CHARS:
text = text.replace(char, ' ')
words_list = text.split(' ')
2021-03-03 12:34:46 +00:00
replacements = {}
2022-01-03 18:06:04 +00:00
for word in words_list:
2021-03-03 12:34:46 +00:00
if word.startswith('*'):
if word.endswith('*'):
replacements[word] = \
'<emphasis level="strong">' + \
word.replace('*', '') + \
'</emphasis>'
2022-01-03 18:06:04 +00:00
for replace_str, new_str in replacements.items():
say_text = say_text.replace(replace_str, new_str)
return say_text
2021-03-03 12:34:46 +00:00
2022-01-03 18:06:04 +00:00
def _remove_emoji_from_text(say_text: str) -> str:
2021-03-03 18:24:37 +00:00
"""Removes :emoji: from the given text
"""
2022-01-03 18:06:04 +00:00
if ':' not in say_text:
return say_text
text = say_text
for char in SPEAKER_REMOVE_CHARS:
text = text.replace(char, ' ')
words_list = text.split(' ')
2021-03-03 18:24:37 +00:00
replacements = {}
2022-01-03 18:06:04 +00:00
for word in words_list:
2021-03-03 18:24:37 +00:00
if word.startswith(':'):
if word.endswith(':'):
2021-03-03 22:05:56 +00:00
replacements[word] = ''
2022-01-03 18:06:04 +00:00
for replace_str, new_str in replacements.items():
say_text = say_text.replace(replace_str, new_str)
return say_text.replace(' ', ' ').strip()
2021-03-03 18:24:37 +00:00
2022-01-03 18:06:04 +00:00
def _speaker_endpoint_json(display_name: str, summary: str,
content: str, say_content: str,
image_description: str,
2021-12-29 21:55:09 +00:00
links: [], gender: str, post_id: str,
2022-01-03 18:06:04 +00:00
post_dm: bool, post_reply: bool,
follow_requests_exist: bool,
follow_requests_list: [],
liked_by: str, published: str, post_cal: bool,
post_share: bool, theme_name: str,
is_direct: bool, reply_to_you: bool) -> {}:
2021-03-03 12:34:46 +00:00
"""Returns a json endpoint for the TTS speaker
"""
2022-01-03 18:06:04 +00:00
speaker_json = {
"name": display_name,
2021-03-03 12:34:46 +00:00
"summary": summary,
2021-03-11 10:10:56 +00:00
"content": content,
2022-01-03 18:06:04 +00:00
"say": say_content,
"published": published,
2022-01-03 18:06:04 +00:00
"imageDescription": image_description,
"detectedLinks": links,
2021-12-26 19:47:06 +00:00
"id": post_id,
2022-01-03 18:06:04 +00:00
"direct": is_direct,
"replyToYou": reply_to_you,
"notify": {
2021-12-25 23:35:50 +00:00
"theme": theme_name,
2022-01-03 18:06:04 +00:00
"dm": post_dm,
"reply": post_reply,
"followRequests": follow_requests_exist,
"followRequestsList": follow_requests_list,
"likedBy": liked_by,
"calendar": post_cal,
"share": post_share
}
2021-03-03 12:34:46 +00:00
}
2021-03-03 13:02:47 +00:00
if gender:
2022-01-03 18:06:04 +00:00
speaker_json['gender'] = gender
return speaker_json
2021-03-03 12:34:46 +00:00
2022-05-12 17:41:40 +00:00
def _ssml_header(system_language: str, box_name: str, summary: str) -> str:
2021-03-04 10:25:36 +00:00
"""Returns a header for an SSML document
"""
2022-05-12 17:41:40 +00:00
if summary:
summary = ': ' + summary
2021-03-04 10:25:36 +00:00
return '<?xml version="1.0"?>\n' + \
'<speak xmlns="http://www.w3.org/2001/10/synthesis"\n' + \
' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n' + \
' xsi:schemaLocation="http://www.w3.org/2001/10/synthesis\n' + \
' http://www.w3.org/TR/speech-synthesis11/synthesis.xsd"\n' + \
' version="1.1">\n' + \
' <metadata>\n' + \
2021-12-25 23:03:28 +00:00
' <dc:title xml:lang="' + system_language + '">' + \
2022-05-12 17:41:40 +00:00
box_name + summary + '</dc:title>\n' + \
2021-03-04 10:25:36 +00:00
' </metadata>\n'
2022-01-03 18:06:04 +00:00
def _speaker_endpoint_ssml(display_name: str, summary: str,
2022-06-14 10:24:29 +00:00
content: str, language: str,
2022-05-12 11:56:45 +00:00
gender: str, box_name: str) -> str:
2021-03-03 12:34:46 +00:00
"""Returns an SSML endpoint for the TTS speaker
https://en.wikipedia.org/wiki/Speech_Synthesis_Markup_Language
https://www.w3.org/TR/speech-synthesis/
"""
2022-01-03 18:06:04 +00:00
lang_short = 'en'
2021-03-03 12:34:46 +00:00
if language:
2022-01-03 18:06:04 +00:00
lang_short = language[:2]
2021-03-03 12:34:46 +00:00
if not gender:
gender = 'neutral'
else:
2022-01-03 18:06:04 +00:00
if lang_short == 'en':
2021-03-03 12:34:46 +00:00
gender = gender.lower()
2021-03-03 13:02:47 +00:00
if 'he/him' in gender:
2021-03-03 12:34:46 +00:00
gender = 'male'
2021-03-03 13:02:47 +00:00
elif 'she/her' in gender:
2021-03-03 12:34:46 +00:00
gender = 'female'
else:
gender = 'neutral'
2023-01-06 17:57:02 +00:00
content = _add_ssml_emphasis(content)
2022-01-03 18:06:04 +00:00
voice_params = 'name="' + display_name + '" gender="' + gender + '"'
2022-05-12 17:41:40 +00:00
if summary is None:
summary = ''
return _ssml_header(lang_short, box_name, summary) + \
2021-03-03 12:34:46 +00:00
' <p>\n' + \
' <s xml:lang="' + language + '">\n' + \
2022-01-03 18:06:04 +00:00
' <voice ' + voice_params + '>\n' + \
2021-03-03 12:34:46 +00:00
' ' + content + '\n' + \
' </voice>\n' + \
' </s>\n' + \
' </p>\n' + \
'</speak>\n'
2022-02-26 22:52:20 +00:00
def get_ssml_box(base_dir: str, path: str,
2021-12-29 21:55:09 +00:00
domain: str,
system_language: str,
2022-01-03 18:06:04 +00:00
box_name: str) -> str:
2021-03-03 12:34:46 +00:00
"""Returns SSML for the given timeline
"""
nickname = path.split('/users/')[1]
if '/' in nickname:
nickname = nickname.split('/')[0]
2022-01-03 18:06:04 +00:00
speaker_filename = \
2021-12-26 12:02:29 +00:00
acct_dir(base_dir, nickname, domain) + '/speaker.json'
2022-01-03 18:06:04 +00:00
if not os.path.isfile(speaker_filename):
2021-03-03 12:34:46 +00:00
return None
2022-01-03 18:06:04 +00:00
speaker_json = load_json(speaker_filename)
if not speaker_json:
2021-03-03 12:34:46 +00:00
return None
gender = None
2022-01-03 18:06:04 +00:00
if speaker_json.get('gender'):
gender = speaker_json['gender']
return _speaker_endpoint_ssml(speaker_json['name'],
speaker_json['summary'],
speaker_json['say'],
2021-12-29 21:55:09 +00:00
system_language,
2022-05-12 11:56:45 +00:00
gender, box_name)
2021-03-03 19:06:18 +00:00
2023-01-06 20:39:33 +00:00
def speakable_text(http_prefix: str,
nickname: str, domain: str, domain_full: str,
base_dir: str, content: str, translate: {}) -> (str, []):
2021-03-18 17:27:46 +00:00
"""Convert the given text to a speakable version
2023-01-06 20:39:33 +00:00
which includes changes for pronunciation
2021-03-18 17:27:46 +00:00
"""
2021-03-25 14:51:41 +00:00
content = str(content)
2021-12-26 19:15:36 +00:00
if is_pgp_encrypted(content):
2021-03-18 19:04:58 +00:00
return content, []
2021-03-18 17:27:46 +00:00
# replace some emoji before removing html
if ' <3' in content:
content = content.replace(' <3', ' ' + translate['heart'])
2021-12-29 21:55:09 +00:00
content = remove_html(html_replace_quote_marks(content))
2022-01-03 18:06:04 +00:00
detected_links = []
2023-01-06 20:39:33 +00:00
content = speaker_replace_links(http_prefix,
nickname, domain, domain_full,
content, translate, detected_links)
2021-03-18 17:27:46 +00:00
# replace all double spaces
while ' ' in content:
content = content.replace(' ', ' ')
content = content.replace(' . ', '. ').strip()
2022-01-03 18:06:04 +00:00
say_content = _speaker_pronounce(base_dir, content, translate)
2021-03-18 17:27:46 +00:00
# replace all double spaces
2022-01-03 18:06:04 +00:00
while ' ' in say_content:
say_content = say_content.replace(' ', ' ')
return say_content.replace(' . ', '. ').strip(), detected_links
2021-03-18 17:27:46 +00:00
2021-12-29 21:55:09 +00:00
def _post_to_speaker_json(base_dir: str, http_prefix: str,
2023-01-06 17:51:45 +00:00
nickname: str, domain: str, domain_full: str,
2021-12-29 21:55:09 +00:00
post_json_object: {}, person_cache: {},
2022-01-03 18:06:04 +00:00
translate: {}, announcing_actor: str,
2021-12-29 21:55:09 +00:00
theme_name: str) -> {}:
2021-03-04 10:11:30 +00:00
"""Converts an ActivityPub post into some Json containing
speech synthesis parameters.
NOTE: There currently appears to be no standardized json
format for speech synthesis
2021-03-03 19:06:18 +00:00
"""
2021-12-26 10:57:03 +00:00
if not has_object_dict(post_json_object):
2021-03-03 19:06:18 +00:00
return
2021-12-25 22:09:19 +00:00
if not post_json_object['object'].get('content'):
2021-03-03 19:06:18 +00:00
return
2021-12-25 22:09:19 +00:00
if not isinstance(post_json_object['object']['content'], str):
2021-03-03 19:06:18 +00:00
return
2022-01-03 18:06:04 +00:00
detected_links = []
2021-12-25 22:09:19 +00:00
content = urllib.parse.unquote_plus(post_json_object['object']['content'])
2021-03-03 19:06:18 +00:00
content = html.unescape(content)
content = content.replace('<p>', '').replace('</p>', ' ')
2021-12-26 19:15:36 +00:00
if not is_pgp_encrypted(content):
2021-03-11 17:15:32 +00:00
# replace some emoji before removing html
if ' <3' in content:
content = content.replace(' <3', ' ' + translate['heart'])
2021-12-29 21:55:09 +00:00
content = remove_html(html_replace_quote_marks(content))
2023-01-06 20:39:33 +00:00
content = speaker_replace_links(http_prefix,
nickname, domain, domain_full,
content, translate, detected_links)
2021-03-11 17:15:32 +00:00
# replace all double spaces
while ' ' in content:
content = content.replace(' ', ' ')
content = content.replace(' . ', '. ').strip()
2022-01-03 18:06:04 +00:00
say_content = content
say_content = _speaker_pronounce(base_dir, content, translate)
2021-03-11 17:15:32 +00:00
# replace all double spaces
2022-01-03 18:06:04 +00:00
while ' ' in say_content:
say_content = say_content.replace(' ', ' ')
say_content = say_content.replace(' . ', '. ').strip()
2021-03-11 17:15:32 +00:00
else:
2022-01-03 18:06:04 +00:00
say_content = content
2021-03-03 19:06:18 +00:00
2022-01-03 18:06:04 +00:00
image_description = ''
2021-12-25 22:09:19 +00:00
if post_json_object['object'].get('attachment'):
2022-01-03 18:06:04 +00:00
attach_list = post_json_object['object']['attachment']
if isinstance(attach_list, list):
for img in attach_list:
2021-03-03 19:06:18 +00:00
if not isinstance(img, dict):
continue
if img.get('name'):
if isinstance(img['name'], str):
2022-01-03 18:06:04 +00:00
image_description += \
remove_html(img['name']) + '. '
2021-03-03 19:06:18 +00:00
2022-01-03 18:06:04 +00:00
is_direct = is_dm(post_json_object)
2021-12-26 10:19:59 +00:00
actor = local_actor_url(http_prefix, nickname, domain_full)
2022-01-03 18:06:04 +00:00
reply_to_you = is_reply(post_json_object, actor)
published = ''
2021-12-25 22:09:19 +00:00
if post_json_object['object'].get('published'):
published = post_json_object['object']['published']
2021-03-03 19:06:18 +00:00
summary = ''
2021-12-25 22:09:19 +00:00
if post_json_object['object'].get('summary'):
if isinstance(post_json_object['object']['summary'], str):
post_json_object_summary = post_json_object['object']['summary']
2021-03-03 19:06:18 +00:00
summary = \
2021-12-25 22:09:19 +00:00
urllib.parse.unquote_plus(post_json_object_summary)
2021-03-03 19:06:18 +00:00
summary = html.unescape(summary)
2024-01-09 16:59:23 +00:00
actor_url = get_actor_from_post(post_json_object)
2022-01-03 18:06:04 +00:00
speaker_name = \
2024-01-09 16:59:23 +00:00
get_display_name(base_dir, actor_url, person_cache)
2022-01-03 18:06:04 +00:00
if not speaker_name:
2021-03-03 19:06:18 +00:00
return
2022-01-03 18:06:04 +00:00
speaker_name = _remove_emoji_from_text(speaker_name)
speaker_name = speaker_name.replace('_', ' ')
speaker_name = camel_case_split(speaker_name)
2024-01-09 16:59:23 +00:00
actor_url = get_actor_from_post(post_json_object)
gender = get_gender_from_bio(base_dir, actor_url,
2021-12-27 22:12:29 +00:00
person_cache, translate)
2022-01-03 18:06:04 +00:00
if announcing_actor:
announced_nickname = get_nickname_from_actor(announcing_actor)
announced_domain, _ = \
get_domain_from_actor(announcing_actor)
if announced_nickname and announced_domain:
announced_handle = announced_nickname + '@' + announced_domain
say_content = \
2021-03-11 10:10:56 +00:00
translate['announces'] + ' ' + \
2022-01-03 18:06:04 +00:00
announced_handle + '. ' + say_content
content = \
translate['announces'] + ' ' + \
2022-01-03 18:06:04 +00:00
announced_handle + '. ' + content
2021-12-26 19:47:06 +00:00
post_id = None
2021-12-25 22:09:19 +00:00
if post_json_object['object'].get('id'):
2021-12-27 11:20:57 +00:00
post_id = remove_id_ending(post_json_object['object']['id'])
2022-01-03 18:06:04 +00:00
follow_requests_exist = False
follow_requests_list = []
accounts_dir = acct_dir(base_dir, nickname, domain_full)
approve_follows_filename = accounts_dir + '/followrequests.txt'
if os.path.isfile(approve_follows_filename):
2022-06-09 14:46:30 +00:00
with open(approve_follows_filename, 'r', encoding='utf-8') as fp_foll:
2022-01-03 18:06:04 +00:00
follows = fp_foll.readlines()
2021-03-09 15:01:35 +00:00
if len(follows) > 0:
2022-01-03 18:06:04 +00:00
follow_requests_exist = True
2022-01-08 10:58:54 +00:00
for i, _ in enumerate(follows):
2021-03-15 20:45:41 +00:00
follows[i] = follows[i].strip()
2022-01-03 18:06:04 +00:00
follow_requests_list = follows
post_dm = False
dm_filename = accounts_dir + '/.newDM'
if os.path.isfile(dm_filename):
post_dm = True
post_reply = False
reply_filename = accounts_dir + '/.newReply'
if os.path.isfile(reply_filename):
post_reply = True
liked_by = ''
like_filename = accounts_dir + '/.newLike'
if os.path.isfile(like_filename):
2022-06-09 14:46:30 +00:00
with open(like_filename, 'r', encoding='utf-8') as fp_like:
2022-01-03 18:06:04 +00:00
liked_by = fp_like.read()
calendar_filename = accounts_dir + '/.newCalendar'
post_cal = os.path.isfile(calendar_filename)
share_filename = accounts_dir + '/.newShare'
post_share = os.path.isfile(share_filename)
return _speaker_endpoint_json(speaker_name, summary,
content, say_content, image_description,
detected_links, gender, post_id,
post_dm, post_reply,
follow_requests_exist,
follow_requests_list,
liked_by, published,
post_cal, post_share, theme_name,
is_direct, reply_to_you)
2021-12-29 21:55:09 +00:00
def update_speaker(base_dir: str, http_prefix: str,
nickname: str, domain: str, domain_full: str,
post_json_object: {}, person_cache: {},
2022-01-03 18:06:04 +00:00
translate: {}, announcing_actor: str,
2022-05-12 10:13:55 +00:00
theme_name: str,
2022-05-12 11:56:45 +00:00
system_language: str, box_name: str) -> None:
2021-03-04 10:11:30 +00:00
""" Generates a json file which can be used for TTS announcement
of incoming inbox posts
"""
2022-01-03 18:06:04 +00:00
speaker_json = \
2021-12-29 21:55:09 +00:00
_post_to_speaker_json(base_dir, http_prefix,
2023-01-06 17:51:45 +00:00
nickname, domain, domain_full,
2021-12-29 21:55:09 +00:00
post_json_object, person_cache,
2022-01-03 18:06:04 +00:00
translate, announcing_actor,
2021-12-29 21:55:09 +00:00
theme_name)
2022-05-12 12:21:03 +00:00
if not speaker_json:
return
2022-05-12 10:13:55 +00:00
account_dir = acct_dir(base_dir, nickname, domain)
speaker_filename = account_dir + '/speaker.json'
2022-01-03 18:06:04 +00:00
save_json(speaker_json, speaker_filename)
2022-05-12 10:13:55 +00:00
# save the ssml
cached_ssml_filename = \
get_cached_post_filename(base_dir, nickname,
domain, post_json_object)
if not cached_ssml_filename:
return
cached_ssml_filename = cached_ssml_filename.replace('.html', '.ssml')
2022-05-12 11:56:45 +00:00
if box_name == 'outbox':
cached_ssml_filename = \
cached_ssml_filename.replace('/postcache/', '/outbox/')
2022-05-12 10:13:55 +00:00
gender = None
if speaker_json.get('gender'):
gender = speaker_json['gender']
ssml_str = \
_speaker_endpoint_ssml(speaker_json['name'],
speaker_json['summary'],
speaker_json['say'],
system_language,
2022-05-12 11:56:45 +00:00
gender, box_name)
2022-05-12 10:13:55 +00:00
try:
2022-06-09 14:46:30 +00:00
with open(cached_ssml_filename, 'w+', encoding='utf-8') as fp_ssml:
2022-05-12 10:13:55 +00:00
fp_ssml.write(ssml_str)
except OSError:
print('EX: unable to write ssml ' + cached_ssml_filename)