mirror of https://gitlab.com/bashrc2/epicyon
559 lines
21 KiB
Python
559 lines
21 KiB
Python
__filename__ = "speaker.py"
|
|
__author__ = "Bob Mottram"
|
|
__license__ = "AGPL3+"
|
|
__version__ = "1.3.0"
|
|
__maintainer__ = "Bob Mottram"
|
|
__email__ = "bob@libreserver.org"
|
|
__status__ = "Production"
|
|
__module_group__ = "Accessibility"
|
|
|
|
import os
|
|
import html
|
|
import random
|
|
import urllib.parse
|
|
from utils import remove_id_ending
|
|
from utils import is_dm
|
|
from utils import is_reply
|
|
from utils import camel_case_split
|
|
from utils import get_domain_from_actor
|
|
from utils import get_nickname_from_actor
|
|
from utils import get_gender_from_bio
|
|
from utils import get_display_name
|
|
from utils import remove_html
|
|
from utils import load_json
|
|
from utils import save_json
|
|
from utils import is_pgp_encrypted
|
|
from utils import has_object_dict
|
|
from utils import acct_dir
|
|
from utils import local_actor_url
|
|
from content import html_replace_quote_marks
|
|
|
|
SPEAKER_REMOVE_CHARS = ('.\n', '. ', ',', ';', '?', '!')
|
|
|
|
|
|
def get_speaker_pitch(display_name: str, screenreader: str, gender) -> int:
|
|
"""Returns the speech synthesis pitch for the given name
|
|
"""
|
|
random.seed(display_name)
|
|
range_min = 1
|
|
range_max = 100
|
|
if 'She' in gender:
|
|
range_min = 50
|
|
elif 'Him' in gender:
|
|
range_max = 50
|
|
if screenreader == 'picospeaker':
|
|
range_min = -6
|
|
range_max = 3
|
|
if 'She' in gender:
|
|
range_min = -1
|
|
elif 'Him' in gender:
|
|
range_max = -1
|
|
return random.randint(range_min, range_max)
|
|
|
|
|
|
def get_speaker_rate(display_name: str, screenreader: str) -> int:
|
|
"""Returns the speech synthesis rate for the given name
|
|
"""
|
|
random.seed(display_name)
|
|
if screenreader == 'picospeaker':
|
|
return random.randint(-40, -20)
|
|
return random.randint(50, 120)
|
|
|
|
|
|
def get_speaker_range(display_name: str) -> int:
|
|
"""Returns the speech synthesis range for the given name
|
|
"""
|
|
random.seed(display_name)
|
|
return random.randint(300, 800)
|
|
|
|
|
|
def _speaker_pronounce(base_dir: str, say_text: str, translate: {}) -> str:
|
|
"""Screen readers may not always pronounce correctly, so you
|
|
can have a file which specifies conversions. File should contain
|
|
line items such as:
|
|
Epicyon -> Epi-cyon
|
|
"""
|
|
pronounce_filename = base_dir + '/accounts/speaker_pronounce.txt'
|
|
convert_dict = {}
|
|
if translate:
|
|
convert_dict = {
|
|
"Epicyon": "Epi-cyon",
|
|
"espeak": "e-speak",
|
|
"emoji": "emowji",
|
|
"clearnet": "clear-net",
|
|
"https": "H-T-T-P-S",
|
|
"HTTPS": "H-T-T-P-S",
|
|
"XMPP": "X-M-P-P",
|
|
"xmpp": "X-M-P-P",
|
|
"sql": "S-Q-L",
|
|
".js": " dot J-S",
|
|
"PSQL": "Postgres S-Q-L",
|
|
"SQL": "S-Q-L",
|
|
"gdpr": "G-D-P-R",
|
|
"kde": "K-D-E",
|
|
"AGPL": "Affearo G-P-L",
|
|
"agpl": "Affearo G-P-L",
|
|
"GPL": "G-P-L",
|
|
"gpl": "G-P-L",
|
|
"coop": "co-op",
|
|
"KMail": "K-Mail",
|
|
"kmail": "K-Mail",
|
|
"gmail": "G-mail",
|
|
"Gmail": "G-mail",
|
|
"OpenPGP": "Open P-G-P",
|
|
"Tor": "Toor",
|
|
"memes": "meemes",
|
|
"Memes": "Meemes",
|
|
"rofl": translate["laughing"],
|
|
"ROFL": translate["laughing"],
|
|
"lmao": translate["laughing"],
|
|
"LMAO": translate["laughing"],
|
|
"fwiw": "for what it's worth",
|
|
"fyi": "for your information",
|
|
"irl": "in real life",
|
|
"IRL": "in real life",
|
|
"imho": "in my opinion",
|
|
"afaik": "as far as I know",
|
|
"AFAIK": "as far as I know",
|
|
"fediverse": "fediiverse",
|
|
"Fediverse": "Fediiverse",
|
|
" foss ": " free and open source software ",
|
|
" floss ": " free libre and open source software ",
|
|
" FOSS ": "free and open source software",
|
|
" FLOSS ": "free libre and open source software",
|
|
" oss ": " open source software ",
|
|
" OSS ": " open source software ",
|
|
"🤔": ". " + translate["thinking emoji"],
|
|
"RT @": "Re-Tweet ",
|
|
"#nowplaying": translate["hashtag"] + " now-playing",
|
|
"#NowPlaying": translate["hashtag"] + " now-playing",
|
|
"#": translate["hashtag"] + ' ',
|
|
"¯\\_(ツ)_/¯": translate["shrug"],
|
|
":D": '. ' + translate["laughing"],
|
|
":-D": '. ' + translate["laughing"],
|
|
":)": '. ' + translate["smile"],
|
|
";)": '. ' + translate["wink"],
|
|
":(": '. ' + translate["sad face"],
|
|
":-)": '. ' + translate["smile"],
|
|
":-(": '. ' + translate["sad face"],
|
|
";-)": '. ' + translate["wink"],
|
|
":O": '. ' + translate['shocked'],
|
|
"?": "? ",
|
|
'"': "'",
|
|
"*": "",
|
|
"(": ",",
|
|
")": ","
|
|
}
|
|
if os.path.isfile(pronounce_filename):
|
|
with open(pronounce_filename, 'r') as fp_pro:
|
|
pronounce_list = fp_pro.readlines()
|
|
for conversion in pronounce_list:
|
|
separator = None
|
|
if '->' in conversion:
|
|
separator = '->'
|
|
elif ';' in conversion:
|
|
separator = ';'
|
|
elif ':' in conversion:
|
|
separator = ':'
|
|
elif ',' in conversion:
|
|
separator = ','
|
|
if not separator:
|
|
continue
|
|
|
|
text = conversion.split(separator)[0].strip()
|
|
converted = conversion.split(separator)[1].strip()
|
|
convert_dict[text] = converted
|
|
for text, converted in convert_dict.items():
|
|
if text in say_text:
|
|
say_text = say_text.replace(text, converted)
|
|
return say_text
|
|
|
|
|
|
def speaker_replace_links(say_text: str, translate: {},
|
|
detected_links: []) -> str:
|
|
"""Replaces any links in the given text with "link to [domain]".
|
|
Instead of reading out potentially very long and meaningless links
|
|
"""
|
|
text = say_text
|
|
text = text.replace('?v=', '__v=')
|
|
for char in SPEAKER_REMOVE_CHARS:
|
|
text = text.replace(char, ' ')
|
|
text = text.replace('__v=', '?v=')
|
|
replacements = {}
|
|
words_list = text.split(' ')
|
|
if translate.get('Linked'):
|
|
linked_str = translate['Linked']
|
|
else:
|
|
linked_str = 'Linked'
|
|
prev_word = ''
|
|
for word in words_list:
|
|
if word.startswith('v='):
|
|
replacements[word] = ''
|
|
if word.startswith(':'):
|
|
if word.endswith(':'):
|
|
replacements[word] = ', emowji ' + word.replace(':', '') + ','
|
|
continue
|
|
if word.startswith('@') and not prev_word.endswith('RT'):
|
|
# replace mentions, but not re-tweets
|
|
if translate.get('mentioning'):
|
|
replacements[word] = \
|
|
translate['mentioning'] + ' ' + word[1:] + ', '
|
|
prev_word = word
|
|
|
|
domain = None
|
|
domain_full = None
|
|
if 'https://' in word:
|
|
domain = word.split('https://')[1]
|
|
domain_full = 'https://' + domain
|
|
elif 'http://' in word:
|
|
domain = word.split('http://')[1]
|
|
domain_full = 'http://' + domain
|
|
if not domain:
|
|
continue
|
|
if '/' in domain:
|
|
domain = domain.split('/')[0]
|
|
if domain.startswith('www.'):
|
|
domain = domain.replace('www.', '')
|
|
replacements[domain_full] = '. ' + linked_str + ' ' + domain + '.'
|
|
detected_links.append(domain_full)
|
|
for replace_str, new_str in replacements.items():
|
|
say_text = say_text.replace(replace_str, new_str)
|
|
return say_text.replace('..', '.')
|
|
|
|
|
|
def _add_ssm_lemphasis(say_text: str) -> str:
|
|
"""Adds emphasis to *emphasised* text
|
|
"""
|
|
if '*' not in say_text:
|
|
return say_text
|
|
text = say_text
|
|
for char in SPEAKER_REMOVE_CHARS:
|
|
text = text.replace(char, ' ')
|
|
words_list = text.split(' ')
|
|
replacements = {}
|
|
for word in words_list:
|
|
if word.startswith('*'):
|
|
if word.endswith('*'):
|
|
replacements[word] = \
|
|
'<emphasis level="strong">' + \
|
|
word.replace('*', '') + \
|
|
'</emphasis>'
|
|
for replace_str, new_str in replacements.items():
|
|
say_text = say_text.replace(replace_str, new_str)
|
|
return say_text
|
|
|
|
|
|
def _remove_emoji_from_text(say_text: str) -> str:
|
|
"""Removes :emoji: from the given text
|
|
"""
|
|
if ':' not in say_text:
|
|
return say_text
|
|
text = say_text
|
|
for char in SPEAKER_REMOVE_CHARS:
|
|
text = text.replace(char, ' ')
|
|
words_list = text.split(' ')
|
|
replacements = {}
|
|
for word in words_list:
|
|
if word.startswith(':'):
|
|
if word.endswith(':'):
|
|
replacements[word] = ''
|
|
for replace_str, new_str in replacements.items():
|
|
say_text = say_text.replace(replace_str, new_str)
|
|
return say_text.replace(' ', ' ').strip()
|
|
|
|
|
|
def _speaker_endpoint_json(display_name: str, summary: str,
|
|
content: str, say_content: str,
|
|
image_description: str,
|
|
links: [], gender: str, post_id: str,
|
|
post_dm: bool, post_reply: bool,
|
|
follow_requests_exist: bool,
|
|
follow_requests_list: [],
|
|
liked_by: str, published: str, post_cal: bool,
|
|
post_share: bool, theme_name: str,
|
|
is_direct: bool, reply_to_you: bool) -> {}:
|
|
"""Returns a json endpoint for the TTS speaker
|
|
"""
|
|
speaker_json = {
|
|
"name": display_name,
|
|
"summary": summary,
|
|
"content": content,
|
|
"say": say_content,
|
|
"published": published,
|
|
"imageDescription": image_description,
|
|
"detectedLinks": links,
|
|
"id": post_id,
|
|
"direct": is_direct,
|
|
"replyToYou": reply_to_you,
|
|
"notify": {
|
|
"theme": theme_name,
|
|
"dm": post_dm,
|
|
"reply": post_reply,
|
|
"followRequests": follow_requests_exist,
|
|
"followRequestsList": follow_requests_list,
|
|
"likedBy": liked_by,
|
|
"calendar": post_cal,
|
|
"share": post_share
|
|
}
|
|
}
|
|
if gender:
|
|
speaker_json['gender'] = gender
|
|
return speaker_json
|
|
|
|
|
|
def _ssm_lheader(system_language: str, instance_title: str) -> str:
|
|
"""Returns a header for an SSML document
|
|
"""
|
|
return '<?xml version="1.0"?>\n' + \
|
|
'<speak xmlns="http://www.w3.org/2001/10/synthesis"\n' + \
|
|
' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n' + \
|
|
' xsi:schemaLocation="http://www.w3.org/2001/10/synthesis\n' + \
|
|
' http://www.w3.org/TR/speech-synthesis11/synthesis.xsd"\n' + \
|
|
' version="1.1">\n' + \
|
|
' <metadata>\n' + \
|
|
' <dc:title xml:lang="' + system_language + '">' + \
|
|
instance_title + ' inbox</dc:title>\n' + \
|
|
' </metadata>\n'
|
|
|
|
|
|
def _speaker_endpoint_ssml(display_name: str, summary: str,
|
|
content: str, image_description: str,
|
|
links: [], language: str,
|
|
instance_title: str,
|
|
gender: str) -> str:
|
|
"""Returns an SSML endpoint for the TTS speaker
|
|
https://en.wikipedia.org/wiki/Speech_Synthesis_Markup_Language
|
|
https://www.w3.org/TR/speech-synthesis/
|
|
"""
|
|
lang_short = 'en'
|
|
if language:
|
|
lang_short = language[:2]
|
|
if not gender:
|
|
gender = 'neutral'
|
|
else:
|
|
if lang_short == 'en':
|
|
gender = gender.lower()
|
|
if 'he/him' in gender:
|
|
gender = 'male'
|
|
elif 'she/her' in gender:
|
|
gender = 'female'
|
|
else:
|
|
gender = 'neutral'
|
|
|
|
content = _add_ssm_lemphasis(content)
|
|
voice_params = 'name="' + display_name + '" gender="' + gender + '"'
|
|
return _ssm_lheader(lang_short, instance_title) + \
|
|
' <p>\n' + \
|
|
' <s xml:lang="' + language + '">\n' + \
|
|
' <voice ' + voice_params + '>\n' + \
|
|
' ' + content + '\n' + \
|
|
' </voice>\n' + \
|
|
' </s>\n' + \
|
|
' </p>\n' + \
|
|
'</speak>\n'
|
|
|
|
|
|
def get_ssml_box(base_dir: str, path: str,
|
|
domain: str,
|
|
system_language: str,
|
|
instance_title: str,
|
|
box_name: str) -> str:
|
|
"""Returns SSML for the given timeline
|
|
"""
|
|
nickname = path.split('/users/')[1]
|
|
if '/' in nickname:
|
|
nickname = nickname.split('/')[0]
|
|
speaker_filename = \
|
|
acct_dir(base_dir, nickname, domain) + '/speaker.json'
|
|
if not os.path.isfile(speaker_filename):
|
|
return None
|
|
speaker_json = load_json(speaker_filename)
|
|
if not speaker_json:
|
|
return None
|
|
gender = None
|
|
if speaker_json.get('gender'):
|
|
gender = speaker_json['gender']
|
|
return _speaker_endpoint_ssml(speaker_json['name'],
|
|
speaker_json['summary'],
|
|
speaker_json['say'],
|
|
speaker_json['imageDescription'],
|
|
speaker_json['detectedLinks'],
|
|
system_language,
|
|
instance_title, gender)
|
|
|
|
|
|
def speakable_text(base_dir: str, content: str, translate: {}) -> (str, []):
|
|
"""Convert the given text to a speakable version
|
|
which includes changes for prononciation
|
|
"""
|
|
content = str(content)
|
|
if is_pgp_encrypted(content):
|
|
return content, []
|
|
|
|
# replace some emoji before removing html
|
|
if ' <3' in content:
|
|
content = content.replace(' <3', ' ' + translate['heart'])
|
|
content = remove_html(html_replace_quote_marks(content))
|
|
detected_links = []
|
|
content = speaker_replace_links(content, translate, detected_links)
|
|
# replace all double spaces
|
|
while ' ' in content:
|
|
content = content.replace(' ', ' ')
|
|
content = content.replace(' . ', '. ').strip()
|
|
say_content = _speaker_pronounce(base_dir, content, translate)
|
|
# replace all double spaces
|
|
while ' ' in say_content:
|
|
say_content = say_content.replace(' ', ' ')
|
|
return say_content.replace(' . ', '. ').strip(), detected_links
|
|
|
|
|
|
def _post_to_speaker_json(base_dir: str, http_prefix: str,
|
|
nickname: str, domain: str, domain_full: str,
|
|
post_json_object: {}, person_cache: {},
|
|
translate: {}, announcing_actor: str,
|
|
theme_name: str) -> {}:
|
|
"""Converts an ActivityPub post into some Json containing
|
|
speech synthesis parameters.
|
|
NOTE: There currently appears to be no standardized json
|
|
format for speech synthesis
|
|
"""
|
|
if not has_object_dict(post_json_object):
|
|
return
|
|
if not post_json_object['object'].get('content'):
|
|
return
|
|
if not isinstance(post_json_object['object']['content'], str):
|
|
return
|
|
detected_links = []
|
|
content = urllib.parse.unquote_plus(post_json_object['object']['content'])
|
|
content = html.unescape(content)
|
|
content = content.replace('<p>', '').replace('</p>', ' ')
|
|
if not is_pgp_encrypted(content):
|
|
# replace some emoji before removing html
|
|
if ' <3' in content:
|
|
content = content.replace(' <3', ' ' + translate['heart'])
|
|
content = remove_html(html_replace_quote_marks(content))
|
|
content = speaker_replace_links(content, translate, detected_links)
|
|
# replace all double spaces
|
|
while ' ' in content:
|
|
content = content.replace(' ', ' ')
|
|
content = content.replace(' . ', '. ').strip()
|
|
say_content = content
|
|
say_content = _speaker_pronounce(base_dir, content, translate)
|
|
# replace all double spaces
|
|
while ' ' in say_content:
|
|
say_content = say_content.replace(' ', ' ')
|
|
say_content = say_content.replace(' . ', '. ').strip()
|
|
else:
|
|
say_content = content
|
|
|
|
image_description = ''
|
|
if post_json_object['object'].get('attachment'):
|
|
attach_list = post_json_object['object']['attachment']
|
|
if isinstance(attach_list, list):
|
|
for img in attach_list:
|
|
if not isinstance(img, dict):
|
|
continue
|
|
if img.get('name'):
|
|
if isinstance(img['name'], str):
|
|
image_description += \
|
|
img['name'] + '. '
|
|
|
|
is_direct = is_dm(post_json_object)
|
|
actor = local_actor_url(http_prefix, nickname, domain_full)
|
|
reply_to_you = is_reply(post_json_object, actor)
|
|
|
|
published = ''
|
|
if post_json_object['object'].get('published'):
|
|
published = post_json_object['object']['published']
|
|
|
|
summary = ''
|
|
if post_json_object['object'].get('summary'):
|
|
if isinstance(post_json_object['object']['summary'], str):
|
|
post_json_object_summary = post_json_object['object']['summary']
|
|
summary = \
|
|
urllib.parse.unquote_plus(post_json_object_summary)
|
|
summary = html.unescape(summary)
|
|
|
|
speaker_name = \
|
|
get_display_name(base_dir, post_json_object['actor'], person_cache)
|
|
if not speaker_name:
|
|
return
|
|
speaker_name = _remove_emoji_from_text(speaker_name)
|
|
speaker_name = speaker_name.replace('_', ' ')
|
|
speaker_name = camel_case_split(speaker_name)
|
|
gender = get_gender_from_bio(base_dir, post_json_object['actor'],
|
|
person_cache, translate)
|
|
if announcing_actor:
|
|
announced_nickname = get_nickname_from_actor(announcing_actor)
|
|
announced_domain, _ = \
|
|
get_domain_from_actor(announcing_actor)
|
|
if announced_nickname and announced_domain:
|
|
announced_handle = announced_nickname + '@' + announced_domain
|
|
say_content = \
|
|
translate['announces'] + ' ' + \
|
|
announced_handle + '. ' + say_content
|
|
content = \
|
|
translate['announces'] + ' ' + \
|
|
announced_handle + '. ' + content
|
|
post_id = None
|
|
if post_json_object['object'].get('id'):
|
|
post_id = remove_id_ending(post_json_object['object']['id'])
|
|
|
|
follow_requests_exist = False
|
|
follow_requests_list = []
|
|
accounts_dir = acct_dir(base_dir, nickname, domain_full)
|
|
approve_follows_filename = accounts_dir + '/followrequests.txt'
|
|
if os.path.isfile(approve_follows_filename):
|
|
with open(approve_follows_filename, 'r') as fp_foll:
|
|
follows = fp_foll.readlines()
|
|
if len(follows) > 0:
|
|
follow_requests_exist = True
|
|
for i, _ in enumerate(follows):
|
|
follows[i] = follows[i].strip()
|
|
follow_requests_list = follows
|
|
post_dm = False
|
|
dm_filename = accounts_dir + '/.newDM'
|
|
if os.path.isfile(dm_filename):
|
|
post_dm = True
|
|
post_reply = False
|
|
reply_filename = accounts_dir + '/.newReply'
|
|
if os.path.isfile(reply_filename):
|
|
post_reply = True
|
|
liked_by = ''
|
|
like_filename = accounts_dir + '/.newLike'
|
|
if os.path.isfile(like_filename):
|
|
with open(like_filename, 'r') as fp_like:
|
|
liked_by = fp_like.read()
|
|
calendar_filename = accounts_dir + '/.newCalendar'
|
|
post_cal = os.path.isfile(calendar_filename)
|
|
share_filename = accounts_dir + '/.newShare'
|
|
post_share = os.path.isfile(share_filename)
|
|
|
|
return _speaker_endpoint_json(speaker_name, summary,
|
|
content, say_content, image_description,
|
|
detected_links, gender, post_id,
|
|
post_dm, post_reply,
|
|
follow_requests_exist,
|
|
follow_requests_list,
|
|
liked_by, published,
|
|
post_cal, post_share, theme_name,
|
|
is_direct, reply_to_you)
|
|
|
|
|
|
def update_speaker(base_dir: str, http_prefix: str,
|
|
nickname: str, domain: str, domain_full: str,
|
|
post_json_object: {}, person_cache: {},
|
|
translate: {}, announcing_actor: str,
|
|
theme_name: str) -> None:
|
|
""" Generates a json file which can be used for TTS announcement
|
|
of incoming inbox posts
|
|
"""
|
|
speaker_json = \
|
|
_post_to_speaker_json(base_dir, http_prefix,
|
|
nickname, domain, domain_full,
|
|
post_json_object, person_cache,
|
|
translate, announcing_actor,
|
|
theme_name)
|
|
speaker_filename = acct_dir(base_dir, nickname, domain) + '/speaker.json'
|
|
save_json(speaker_json, speaker_filename)
|