epicyon/speaker.py

296 lines
9.7 KiB
Python
Raw Normal View History

2021-03-01 19:16:33 +00:00
__filename__ = "speaker.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
__version__ = "1.2.0"
__maintainer__ = "Bob Mottram"
__email__ = "bob@freedombone.net"
__status__ = "Production"
2021-03-02 12:39:18 +00:00
import os
2021-03-01 19:16:33 +00:00
import random
from auth import createBasicAuthHeader
from session import getJson
2021-03-03 12:34:46 +00:00
from utils import loadJson
2021-03-01 19:16:33 +00:00
from utils import getFullDomain
2021-03-03 12:34:46 +00:00
speakerRemoveChars = ('.\n', '. ', ',', ';', '?', '!')
2021-03-01 19:16:33 +00:00
def getSpeakerPitch(displayName: str, screenreader: str, gender) -> int:
2021-03-01 19:16:33 +00:00
"""Returns the speech synthesis pitch for the given name
"""
random.seed(displayName)
rangeMin = 1
rangeMax = 100
if 'She' in gender:
rangeMin = 50
elif 'Him' in gender:
rangeMax = 50
2021-03-02 15:13:10 +00:00
if screenreader == 'picospeaker':
rangeMin = -8
rangeMax = 3
if 'She' in gender:
rangeMin = -1
elif 'Him' in gender:
rangeMax = -1
return random.randint(rangeMin, rangeMax)
2021-03-01 19:16:33 +00:00
2021-03-02 15:13:10 +00:00
def getSpeakerRate(displayName: str, screenreader: str) -> int:
2021-03-01 19:16:33 +00:00
"""Returns the speech synthesis rate for the given name
"""
random.seed(displayName)
2021-03-02 15:13:10 +00:00
if screenreader == 'picospeaker':
2021-03-02 15:27:31 +00:00
return random.randint(0, 10)
2021-03-01 19:16:33 +00:00
return random.randint(50, 120)
def getSpeakerRange(displayName: str) -> int:
"""Returns the speech synthesis range for the given name
"""
random.seed(displayName)
return random.randint(300, 800)
2021-03-02 12:39:18 +00:00
def speakerPronounce(baseDir: str, sayText: str, translate: {}) -> str:
"""Screen readers may not always pronounce correctly, so you
can have a file which specifies conversions. File should contain
line items such as:
Epicyon -> Epi-cyon
"""
pronounceFilename = baseDir + '/accounts/speaker_pronounce.txt'
convertDict = {
"Epicyon": "Epi-cyon",
"espeak": "e-speak",
2021-03-02 17:18:47 +00:00
"emoji": "emowji",
2021-03-02 12:50:07 +00:00
"clearnet": "clear-net",
2021-03-02 18:10:57 +00:00
"https": "H-T-T-P-S",
"HTTPS": "H-T-T-P-S",
2021-03-02 18:18:45 +00:00
"Tor": "Toor",
2021-03-02 19:39:46 +00:00
"🤔": ". " + translate["thinking emoji"],
2021-03-02 14:05:43 +00:00
"RT @": "Re-Tweet ",
2021-03-02 12:39:18 +00:00
"#": translate["hashtag"],
2021-03-02 19:39:46 +00:00
":D": '. ' + translate["laughing"],
":-D": '. ' + translate["laughing"],
":)": '. ' + translate["smile"],
";)": '. ' + translate["wink"],
":(": '. ' + translate["sad face"],
":-)": '. ' + translate["smile"],
":-(": '. ' + translate["sad face"],
";-)": '. ' + translate["wink"],
2021-03-02 12:39:18 +00:00
"*": ""
}
if os.path.isfile(pronounceFilename):
with open(pronounceFilename, 'r') as fp:
pronounceList = fp.readlines()
for conversion in pronounceList:
separator = None
if '->' in conversion:
separator = '->'
elif ';' in conversion:
separator = ';'
elif ':' in conversion:
separator = ':'
elif ',' in conversion:
separator = ','
if not separator:
continue
text = conversion.split(separator)[0].strip()
converted = conversion.split(separator)[1].strip()
convertDict[text] = converted
for text, converted in convertDict.items():
if text in sayText:
sayText = sayText.replace(text, converted)
return sayText
def speakerReplaceLinks(sayText: str, translate: {},
detectedLinks: []) -> str:
"""Replaces any links in the given text with "link to [domain]".
Instead of reading out potentially very long and meaningless links
"""
text = sayText
2021-03-03 12:34:46 +00:00
for ch in speakerRemoveChars:
text = text.replace(ch, ' ')
replacements = {}
wordsList = text.split(' ')
linkedStr = translate['Linked']
2021-03-02 14:05:43 +00:00
prevWord = ''
for word in wordsList:
2021-03-02 16:50:32 +00:00
if word.startswith(':'):
if word.endswith(':'):
2021-03-02 17:18:47 +00:00
replacements[word] = ', emowji ' + word.replace(':', '') + ','
2021-03-02 16:50:32 +00:00
continue
2021-03-02 14:05:43 +00:00
# replace mentions, but not re-tweets
2021-03-02 14:09:51 +00:00
if word.startswith('@') and not prevWord.endswith('RT'):
replacements[word] = \
2021-03-02 13:55:54 +00:00
translate['mentioning'] + ' ' + word[1:] + ','
2021-03-02 14:05:43 +00:00
prevWord = word
domain = None
domainFull = None
if 'https://' in word:
domain = word.split('https://')[1]
domainFull = 'https://' + domain
elif 'http://' in word:
domain = word.split('http://')[1]
domainFull = 'http://' + domain
if not domain:
continue
if '/' in domain:
domain = domain.split('/')[0]
if domain.startswith('www.'):
domain = domain.replace('www.', '')
replacements[domainFull] = '. ' + linkedStr + ' ' + domain + '.'
detectedLinks.append(domainFull)
for replaceStr, newStr in replacements.items():
sayText = sayText.replace(replaceStr, newStr)
return sayText.replace('..', '.')
2021-03-03 12:34:46 +00:00
def _addSSMLemphasis(sayText: str) -> str:
"""Adds emphasis to *emphasised* text
"""
if '*' not in sayText:
return sayText
text = sayText
for ch in speakerRemoveChars:
text = text.replace(ch, ' ')
wordsList = text.split(' ')
replacements = {}
for word in wordsList:
if word.startswith('*'):
if word.endswith('*'):
replacements[word] = \
'<emphasis level="strong">' + \
word.replace('*', '') + \
'</emphasis>'
for replaceStr, newStr in replacements.items():
sayText = sayText.replace(replaceStr, newStr)
return sayText
2021-03-01 19:16:33 +00:00
def getSpeakerFromServer(baseDir: str, session,
nickname: str, password: str,
domain: str, port: int,
httpPrefix: str,
debug: bool, projectVersion: str) -> {}:
"""Returns some json which contains the latest inbox
entry in a minimal format suitable for a text-to-speech reader
"""
if not session:
print('WARN: No session for getSpeakerFromServer')
return 6
domainFull = getFullDomain(domain, port)
authHeader = createBasicAuthHeader(nickname, password)
headers = {
'host': domain,
'Content-type': 'application/json',
'Authorization': authHeader
}
url = \
httpPrefix + '://' + \
domainFull + '/users/' + nickname + '/speaker'
speakerJson = \
getJson(session, url, headers, None,
__version__, httpPrefix, domain)
return speakerJson
2021-03-03 12:34:46 +00:00
def speakerEndpointJson(displayName: str, summary: str,
content: str, imageDescription: str,
2021-03-03 13:02:47 +00:00
links: [], gender: str) -> {}:
2021-03-03 12:34:46 +00:00
"""Returns a json endpoint for the TTS speaker
"""
2021-03-03 13:02:47 +00:00
speakerJson = {
2021-03-03 12:34:46 +00:00
"name": displayName,
"summary": summary,
"say": content,
"imageDescription": imageDescription,
"detectedLinks": links
}
2021-03-03 13:02:47 +00:00
if gender:
speakerJson['gender'] = gender
return speakerJson
2021-03-03 12:34:46 +00:00
def _speakerEndpointSSML(displayName: str, summary: str,
content: str, imageDescription: str,
links: [], language: str,
instanceTitle: str,
gender: str) -> str:
"""Returns an SSML endpoint for the TTS speaker
https://en.wikipedia.org/wiki/Speech_Synthesis_Markup_Language
https://www.w3.org/TR/speech-synthesis/
"""
langShort = 'en'
if language:
langShort = language[:2]
if not gender:
gender = 'neutral'
else:
if langShort == 'en':
gender = gender.lower()
2021-03-03 13:02:47 +00:00
if 'he/him' in gender:
2021-03-03 12:34:46 +00:00
gender = 'male'
2021-03-03 13:02:47 +00:00
elif 'she/her' in gender:
2021-03-03 12:34:46 +00:00
gender = 'female'
else:
gender = 'neutral'
content = _addSSMLemphasis(content)
voiceParams = 'name="' + displayName + '" gender="' + gender + '"'
return '<?xml version="1.0"?>\n' + \
'<speak xmlns="http://www.w3.org/2001/10/synthesis"\n' + \
' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n' + \
' xsi:schemaLocation="http://www.w3.org/2001/10/synthesis\n' + \
' http://www.w3.org/TR/speech-synthesis11/synthesis.xsd"\n' + \
' version="1.1">\n' + \
' <metadata>\n' + \
' <dc:title xml:lang="' + langShort + '">' + \
instanceTitle + ' inbox</dc:title>\n' + \
' </metadata>\n' + \
' <p>\n' + \
' <s xml:lang="' + language + '">\n' + \
' <voice ' + voiceParams + '>\n' + \
' ' + content + '\n' + \
' </voice>\n' + \
' </s>\n' + \
' </p>\n' + \
'</speak>\n'
def getSSMLbox(baseDir: str, path: str,
domain: str,
systemLanguage: str,
instanceTitle: str,
boxName: str) -> str:
"""Returns SSML for the given timeline
"""
nickname = path.split('/users/')[1]
if '/' in nickname:
nickname = nickname.split('/')[0]
speakerFilename = \
baseDir + '/accounts/' + nickname + '@' + domain + '/speaker.json'
if not os.path.isfile(speakerFilename):
return None
speakerJson = loadJson(speakerFilename)
if not speakerJson:
return None
gender = None
if speakerJson.get('gender'):
gender = speakerJson['gender']
return _speakerEndpointSSML(speakerJson['name'],
speakerJson['summary'],
speakerJson['say'],
speakerJson['imageDescription'],
speakerJson['detectedLinks'],
systemLanguage,
instanceTitle, gender)