mirror of https://gitlab.com/bashrc2/epicyon
436 lines
15 KiB
Python
436 lines
15 KiB
Python
__filename__ = "speaker.py"
|
|
__author__ = "Bob Mottram"
|
|
__license__ = "AGPL3+"
|
|
__version__ = "1.2.0"
|
|
__maintainer__ = "Bob Mottram"
|
|
__email__ = "bob@freedombone.net"
|
|
__status__ = "Production"
|
|
|
|
import os
|
|
import html
|
|
import random
|
|
import urllib.parse
|
|
from auth import createBasicAuthHeader
|
|
from session import getJson
|
|
from utils import camelCaseSplit
|
|
from utils import getDomainFromActor
|
|
from utils import getNicknameFromActor
|
|
from utils import getGenderFromBio
|
|
from utils import getDisplayName
|
|
from utils import removeHtml
|
|
from utils import loadJson
|
|
from utils import saveJson
|
|
from utils import getFullDomain
|
|
from content import htmlReplaceQuoteMarks
|
|
|
|
speakerRemoveChars = ('.\n', '. ', ',', ';', '?', '!')
|
|
|
|
|
|
def getSpeakerPitch(displayName: str, screenreader: str, gender) -> int:
|
|
"""Returns the speech synthesis pitch for the given name
|
|
"""
|
|
random.seed(displayName)
|
|
rangeMin = 1
|
|
rangeMax = 100
|
|
if 'She' in gender:
|
|
rangeMin = 50
|
|
elif 'Him' in gender:
|
|
rangeMax = 50
|
|
if screenreader == 'picospeaker':
|
|
rangeMin = -6
|
|
rangeMax = 3
|
|
if 'She' in gender:
|
|
rangeMin = -1
|
|
elif 'Him' in gender:
|
|
rangeMax = -1
|
|
return random.randint(rangeMin, rangeMax)
|
|
|
|
|
|
def getSpeakerRate(displayName: str, screenreader: str) -> int:
|
|
"""Returns the speech synthesis rate for the given name
|
|
"""
|
|
random.seed(displayName)
|
|
if screenreader == 'picospeaker':
|
|
return random.randint(-40, -20)
|
|
return random.randint(50, 120)
|
|
|
|
|
|
def getSpeakerRange(displayName: str) -> int:
|
|
"""Returns the speech synthesis range for the given name
|
|
"""
|
|
random.seed(displayName)
|
|
return random.randint(300, 800)
|
|
|
|
|
|
def _speakerPronounce(baseDir: str, sayText: str, translate: {}) -> str:
|
|
"""Screen readers may not always pronounce correctly, so you
|
|
can have a file which specifies conversions. File should contain
|
|
line items such as:
|
|
Epicyon -> Epi-cyon
|
|
"""
|
|
pronounceFilename = baseDir + '/accounts/speaker_pronounce.txt'
|
|
convertDict = {}
|
|
if translate:
|
|
convertDict = {
|
|
"Epicyon": "Epi-cyon",
|
|
"espeak": "e-speak",
|
|
"emoji": "emowji",
|
|
"clearnet": "clear-net",
|
|
"https": "H-T-T-P-S",
|
|
"HTTPS": "H-T-T-P-S",
|
|
"XMPP": "X-M-P-P",
|
|
"xmpp": "X-M-P-P",
|
|
"sql": "S-Q-L",
|
|
"PSQL": "Postgres S-Q-L",
|
|
"SQL": "S-Q-L",
|
|
"coop": "co-op",
|
|
"KMail": "K-Mail",
|
|
"gmail": "G-mail",
|
|
"Gmail": "G-mail",
|
|
"OpenPGP": "Open P-G-P",
|
|
"Tor": "Toor",
|
|
"🤔": ". " + translate["thinking emoji"],
|
|
"RT @": "Re-Tweet ",
|
|
"#nowplaying": translate["hashtag"] + " now-playing",
|
|
"#NowPlaying": translate["hashtag"] + " now-playing",
|
|
"#": translate["hashtag"] + ' ',
|
|
":D": '. ' + translate["laughing"],
|
|
":-D": '. ' + translate["laughing"],
|
|
":)": '. ' + translate["smile"],
|
|
";)": '. ' + translate["wink"],
|
|
":(": '. ' + translate["sad face"],
|
|
":-)": '. ' + translate["smile"],
|
|
":-(": '. ' + translate["sad face"],
|
|
";-)": '. ' + translate["wink"],
|
|
"?": "? ",
|
|
"*": "",
|
|
"(": ",",
|
|
")": ","
|
|
}
|
|
if os.path.isfile(pronounceFilename):
|
|
with open(pronounceFilename, 'r') as fp:
|
|
pronounceList = fp.readlines()
|
|
for conversion in pronounceList:
|
|
separator = None
|
|
if '->' in conversion:
|
|
separator = '->'
|
|
elif ';' in conversion:
|
|
separator = ';'
|
|
elif ':' in conversion:
|
|
separator = ':'
|
|
elif ',' in conversion:
|
|
separator = ','
|
|
if not separator:
|
|
continue
|
|
|
|
text = conversion.split(separator)[0].strip()
|
|
converted = conversion.split(separator)[1].strip()
|
|
convertDict[text] = converted
|
|
for text, converted in convertDict.items():
|
|
if text in sayText:
|
|
sayText = sayText.replace(text, converted)
|
|
return sayText
|
|
|
|
|
|
def speakerReplaceLinks(sayText: str, translate: {},
|
|
detectedLinks: []) -> str:
|
|
"""Replaces any links in the given text with "link to [domain]".
|
|
Instead of reading out potentially very long and meaningless links
|
|
"""
|
|
text = sayText
|
|
for ch in speakerRemoveChars:
|
|
text = text.replace(ch, ' ')
|
|
replacements = {}
|
|
wordsList = text.split(' ')
|
|
if translate.get('Linked'):
|
|
linkedStr = translate['Linked']
|
|
else:
|
|
linkedStr = 'Linked'
|
|
prevWord = ''
|
|
for word in wordsList:
|
|
if word.startswith(':'):
|
|
if word.endswith(':'):
|
|
replacements[word] = ', emowji ' + word.replace(':', '') + ','
|
|
continue
|
|
# replace mentions, but not re-tweets
|
|
if word.startswith('@') and not prevWord.endswith('RT'):
|
|
if translate.get('mentioning'):
|
|
replacements[word] = \
|
|
translate['mentioning'] + ' ' + word[1:] + ', '
|
|
prevWord = word
|
|
|
|
domain = None
|
|
domainFull = None
|
|
if 'https://' in word:
|
|
domain = word.split('https://')[1]
|
|
domainFull = 'https://' + domain
|
|
elif 'http://' in word:
|
|
domain = word.split('http://')[1]
|
|
domainFull = 'http://' + domain
|
|
if not domain:
|
|
continue
|
|
if '/' in domain:
|
|
domain = domain.split('/')[0]
|
|
if domain.startswith('www.'):
|
|
domain = domain.replace('www.', '')
|
|
replacements[domainFull] = '. ' + linkedStr + ' ' + domain + '.'
|
|
detectedLinks.append(domainFull)
|
|
for replaceStr, newStr in replacements.items():
|
|
sayText = sayText.replace(replaceStr, newStr)
|
|
return sayText.replace('..', '.')
|
|
|
|
|
|
def _addSSMLemphasis(sayText: str) -> str:
|
|
"""Adds emphasis to *emphasised* text
|
|
"""
|
|
if '*' not in sayText:
|
|
return sayText
|
|
text = sayText
|
|
for ch in speakerRemoveChars:
|
|
text = text.replace(ch, ' ')
|
|
wordsList = text.split(' ')
|
|
replacements = {}
|
|
for word in wordsList:
|
|
if word.startswith('*'):
|
|
if word.endswith('*'):
|
|
replacements[word] = \
|
|
'<emphasis level="strong">' + \
|
|
word.replace('*', '') + \
|
|
'</emphasis>'
|
|
for replaceStr, newStr in replacements.items():
|
|
sayText = sayText.replace(replaceStr, newStr)
|
|
return sayText
|
|
|
|
|
|
def _removeEmojiFromText(sayText: str) -> str:
|
|
"""Removes :emoji: from the given text
|
|
"""
|
|
if ':' not in sayText:
|
|
return sayText
|
|
text = sayText
|
|
for ch in speakerRemoveChars:
|
|
text = text.replace(ch, ' ')
|
|
wordsList = text.split(' ')
|
|
replacements = {}
|
|
for word in wordsList:
|
|
if word.startswith(':'):
|
|
if word.endswith(':'):
|
|
replacements[word] = ''
|
|
for replaceStr, newStr in replacements.items():
|
|
sayText = sayText.replace(replaceStr, newStr)
|
|
return sayText.replace(' ', ' ').strip()
|
|
|
|
|
|
def getSpeakerFromServer(baseDir: str, session,
|
|
nickname: str, password: str,
|
|
domain: str, port: int,
|
|
httpPrefix: str,
|
|
debug: bool, projectVersion: str) -> {}:
|
|
"""Returns some json which contains the latest inbox
|
|
entry in a minimal format suitable for a text-to-speech reader
|
|
"""
|
|
if not session:
|
|
print('WARN: No session for getSpeakerFromServer')
|
|
return 6
|
|
|
|
domainFull = getFullDomain(domain, port)
|
|
|
|
authHeader = createBasicAuthHeader(nickname, password)
|
|
|
|
headers = {
|
|
'host': domain,
|
|
'Content-type': 'application/json',
|
|
'Authorization': authHeader
|
|
}
|
|
|
|
url = \
|
|
httpPrefix + '://' + \
|
|
domainFull + '/users/' + nickname + '/speaker'
|
|
|
|
speakerJson = \
|
|
getJson(session, url, headers, None,
|
|
__version__, httpPrefix, domain)
|
|
return speakerJson
|
|
|
|
|
|
def _speakerEndpointJson(displayName: str, summary: str,
|
|
content: str, imageDescription: str,
|
|
links: [], gender: str, postId: str) -> {}:
|
|
"""Returns a json endpoint for the TTS speaker
|
|
"""
|
|
speakerJson = {
|
|
"name": displayName,
|
|
"summary": summary,
|
|
"say": content,
|
|
"imageDescription": imageDescription,
|
|
"detectedLinks": links,
|
|
"id": postId
|
|
}
|
|
if gender:
|
|
speakerJson['gender'] = gender
|
|
return speakerJson
|
|
|
|
|
|
def _SSMLheader(systemLanguage: str, instanceTitle: str) -> str:
|
|
"""Returns a header for an SSML document
|
|
"""
|
|
return '<?xml version="1.0"?>\n' + \
|
|
'<speak xmlns="http://www.w3.org/2001/10/synthesis"\n' + \
|
|
' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n' + \
|
|
' xsi:schemaLocation="http://www.w3.org/2001/10/synthesis\n' + \
|
|
' http://www.w3.org/TR/speech-synthesis11/synthesis.xsd"\n' + \
|
|
' version="1.1">\n' + \
|
|
' <metadata>\n' + \
|
|
' <dc:title xml:lang="' + systemLanguage + '">' + \
|
|
instanceTitle + ' inbox</dc:title>\n' + \
|
|
' </metadata>\n'
|
|
|
|
|
|
def _speakerEndpointSSML(displayName: str, summary: str,
|
|
content: str, imageDescription: str,
|
|
links: [], language: str,
|
|
instanceTitle: str,
|
|
gender: str) -> str:
|
|
"""Returns an SSML endpoint for the TTS speaker
|
|
https://en.wikipedia.org/wiki/Speech_Synthesis_Markup_Language
|
|
https://www.w3.org/TR/speech-synthesis/
|
|
"""
|
|
langShort = 'en'
|
|
if language:
|
|
langShort = language[:2]
|
|
if not gender:
|
|
gender = 'neutral'
|
|
else:
|
|
if langShort == 'en':
|
|
gender = gender.lower()
|
|
if 'he/him' in gender:
|
|
gender = 'male'
|
|
elif 'she/her' in gender:
|
|
gender = 'female'
|
|
else:
|
|
gender = 'neutral'
|
|
|
|
content = _addSSMLemphasis(content)
|
|
voiceParams = 'name="' + displayName + '" gender="' + gender + '"'
|
|
return _SSMLheader(langShort, instanceTitle) + \
|
|
' <p>\n' + \
|
|
' <s xml:lang="' + language + '">\n' + \
|
|
' <voice ' + voiceParams + '>\n' + \
|
|
' ' + content + '\n' + \
|
|
' </voice>\n' + \
|
|
' </s>\n' + \
|
|
' </p>\n' + \
|
|
'</speak>\n'
|
|
|
|
|
|
def getSSMLbox(baseDir: str, path: str,
|
|
domain: str,
|
|
systemLanguage: str,
|
|
instanceTitle: str,
|
|
boxName: str) -> str:
|
|
"""Returns SSML for the given timeline
|
|
"""
|
|
nickname = path.split('/users/')[1]
|
|
if '/' in nickname:
|
|
nickname = nickname.split('/')[0]
|
|
speakerFilename = \
|
|
baseDir + '/accounts/' + nickname + '@' + domain + '/speaker.json'
|
|
if not os.path.isfile(speakerFilename):
|
|
return None
|
|
speakerJson = loadJson(speakerFilename)
|
|
if not speakerJson:
|
|
return None
|
|
gender = None
|
|
if speakerJson.get('gender'):
|
|
gender = speakerJson['gender']
|
|
return _speakerEndpointSSML(speakerJson['name'],
|
|
speakerJson['summary'],
|
|
speakerJson['say'],
|
|
speakerJson['imageDescription'],
|
|
speakerJson['detectedLinks'],
|
|
systemLanguage,
|
|
instanceTitle, gender)
|
|
|
|
|
|
def _postToSpeakerJson(baseDir: str, nickname: str, domain: str,
|
|
postJsonObject: {}, personCache: {},
|
|
translate: {}, announcingActor: str) -> {}:
|
|
"""Converts an ActivityPub post into some Json containing
|
|
speech synthesis parameters.
|
|
NOTE: There currently appears to be no standardized json
|
|
format for speech synthesis
|
|
"""
|
|
if not postJsonObject.get('object'):
|
|
return
|
|
if not isinstance(postJsonObject['object'], dict):
|
|
return
|
|
if not postJsonObject['object'].get('content'):
|
|
return
|
|
if not isinstance(postJsonObject['object']['content'], str):
|
|
return
|
|
detectedLinks = []
|
|
content = urllib.parse.unquote_plus(postJsonObject['object']['content'])
|
|
content = html.unescape(content)
|
|
content = content.replace('<p>', '').replace('</p>', ' ')
|
|
content = removeHtml(htmlReplaceQuoteMarks(content))
|
|
content = speakerReplaceLinks(content, translate, detectedLinks)
|
|
content = _speakerPronounce(baseDir, content, translate)
|
|
content = content.replace(' ', ' ').replace(' . ', '. ')
|
|
|
|
imageDescription = ''
|
|
if postJsonObject['object'].get('attachment'):
|
|
attachList = postJsonObject['object']['attachment']
|
|
if isinstance(attachList, list):
|
|
for img in attachList:
|
|
if not isinstance(img, dict):
|
|
continue
|
|
if img.get('name'):
|
|
if isinstance(img['name'], str):
|
|
imageDescription += \
|
|
img['name'] + '. '
|
|
|
|
summary = ''
|
|
if postJsonObject['object'].get('summary'):
|
|
if isinstance(postJsonObject['object']['summary'], str):
|
|
summary = \
|
|
urllib.parse.unquote_plus(postJsonObject['object']['summary'])
|
|
summary = html.unescape(summary)
|
|
|
|
speakerName = \
|
|
getDisplayName(baseDir, postJsonObject['actor'], personCache)
|
|
if not speakerName:
|
|
return
|
|
speakerName = _removeEmojiFromText(speakerName)
|
|
speakerName = speakerName.replace('_', ' ')
|
|
speakerName = camelCaseSplit(speakerName)
|
|
gender = getGenderFromBio(baseDir, postJsonObject['actor'],
|
|
personCache, translate)
|
|
if announcingActor:
|
|
announcedNickname = getNicknameFromActor(announcingActor)
|
|
announcedDomain, announcedport = getDomainFromActor(announcingActor)
|
|
if announcedNickname and announcedDomain:
|
|
announcedHandle = announcedNickname + '@' + announcedDomain
|
|
content = \
|
|
translate['announces'] + ' ' + announcedHandle + '. ' + content
|
|
postId = None
|
|
if postJsonObject['object'].get('id'):
|
|
postId = postJsonObject['object']['id']
|
|
return _speakerEndpointJson(speakerName, summary,
|
|
content, imageDescription,
|
|
detectedLinks, gender, postId)
|
|
|
|
|
|
def updateSpeaker(baseDir: str, nickname: str, domain: str,
|
|
postJsonObject: {}, personCache: {},
|
|
translate: {}, announcingActor: str) -> None:
|
|
""" Generates a json file which can be used for TTS announcement
|
|
of incoming inbox posts
|
|
"""
|
|
speakerJson = \
|
|
_postToSpeakerJson(baseDir, nickname, domain,
|
|
postJsonObject, personCache,
|
|
translate, announcingActor)
|
|
speakerFilename = \
|
|
baseDir + '/accounts/' + nickname + '@' + domain + '/speaker.json'
|
|
saveJson(speakerJson, speakerFilename)
|