epicyon/speaker.py

__filename__ = "speaker.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
__version__ = "1.2.0"
__maintainer__ = "Bob Mottram"
__email__ = "bob@freedombone.net"
__status__ = "Production"

import os
import random
from auth import createBasicAuthHeader
from session import getJson
from utils import loadJson
from utils import getFullDomain

speakerRemoveChars = ('.\n', '. ', ',', ';', '?', '!')


def getSpeakerPitch(displayName: str, screenreader: str, gender) -> int:
    """Returns the speech synthesis pitch for the given name
    """
    random.seed(displayName)
    rangeMin = 1
    rangeMax = 100
    if 'She' in gender:
        rangeMin = 50
    elif 'Him' in gender:
        rangeMax = 50
    if screenreader == 'picospeaker':
        rangeMin = -8
        rangeMax = 3
        if 'She' in gender:
            rangeMin = -1
        elif 'Him' in gender:
            rangeMax = -1
    return random.randint(rangeMin, rangeMax)


def getSpeakerRate(displayName: str, screenreader: str) -> int:
    """Returns the speech synthesis rate for the given name
    """
    random.seed(displayName)
    if screenreader == 'picospeaker':
        return random.randint(0, 10)
    return random.randint(50, 120)


def getSpeakerRange(displayName: str) -> int:
    """Returns the speech synthesis range for the given name
    """
    random.seed(displayName)
    return random.randint(300, 800)


def speakerPronounce(baseDir: str, sayText: str, translate: {}) -> str:
    """Screen readers may not always pronounce correctly, so you
    can have a file which specifies conversions. File should contain
    line items such as:
    Epicyon -> Epi-cyon
    """
    pronounceFilename = baseDir + '/accounts/speaker_pronounce.txt'
    convertDict = {
        "Epicyon": "Epi-cyon",
        "espeak": "e-speak",
        "emoji": "emowji",
        "clearnet": "clear-net",
        "https": "H-T-T-P-S",
        "HTTPS": "H-T-T-P-S",
        "Tor": "Toor",
        "🤔": ". " + translate["thinking emoji"],
        "RT @": "Re-Tweet ",
        "#": translate["hashtag"],
        ":D": '. ' + translate["laughing"],
        ":-D": '. ' + translate["laughing"],
        ":)": '. ' + translate["smile"],
        ";)": '. ' + translate["wink"],
        ":(": '. ' + translate["sad face"],
        ":-)": '. ' + translate["smile"],
        ":-(": '. ' + translate["sad face"],
        ";-)": '. ' + translate["wink"],
        "*": ""
    }
    if os.path.isfile(pronounceFilename):
        with open(pronounceFilename, 'r') as fp:
            pronounceList = fp.readlines()
            for conversion in pronounceList:
                separator = None
                if '->' in conversion:
                    separator = '->'
                elif ';' in conversion:
                    separator = ';'
                elif ':' in conversion:
                    separator = ':'
                elif ',' in conversion:
                    separator = ','
                if not separator:
                    continue

                text = conversion.split(separator)[0].strip()
                converted = conversion.split(separator)[1].strip()
                convertDict[text] = converted
    for text, converted in convertDict.items():
        if text in sayText:
            sayText = sayText.replace(text, converted)
    return sayText


def speakerReplaceLinks(sayText: str, translate: {},
                        detectedLinks: []) -> str:
    """Replaces any links in the given text with "link to [domain]".
    Instead of reading out potentially very long and meaningless links
    """
    text = sayText
    for ch in speakerRemoveChars:
        text = text.replace(ch, ' ')
    replacements = {}
    wordsList = text.split(' ')
    linkedStr = translate['Linked']
    prevWord = ''
    for word in wordsList:
        if word.startswith(':'):
            if word.endswith(':'):
                replacements[word] = ', emowji ' + word.replace(':', '') + ','
                continue
        # replace mentions, but not re-tweets
        if word.startswith('@') and not prevWord.endswith('RT'):
            replacements[word] = \
                translate['mentioning'] + ' ' + word[1:] + ','
        prevWord = word

        domain = None
        domainFull = None
        if 'https://' in word:
            domain = word.split('https://')[1]
            domainFull = 'https://' + domain
        elif 'http://' in word:
            domain = word.split('http://')[1]
            domainFull = 'http://' + domain
        if not domain:
            continue
        if '/' in domain:
            domain = domain.split('/')[0]
        if domain.startswith('www.'):
            domain = domain.replace('www.', '')
        replacements[domainFull] = '. ' + linkedStr + ' ' + domain + '.'
        detectedLinks.append(domainFull)
    for replaceStr, newStr in replacements.items():
        sayText = sayText.replace(replaceStr, newStr)
    return sayText.replace('..', '.')


def _addSSMLemphasis(sayText: str) -> str:
    """Adds emphasis to *emphasised* text
    """
    if '*' not in sayText:
        return sayText
    text = sayText
    for ch in speakerRemoveChars:
        text = text.replace(ch, ' ')
    wordsList = text.split(' ')
    replacements = {}
    for word in wordsList:
        if word.startswith('*'):
            if word.endswith('*'):
                replacements[word] = \
                    '<emphasis level="strong">' + \
                    word.replace('*', '') + \
                    '</emphasis>'
    for replaceStr, newStr in replacements.items():
        sayText = sayText.replace(replaceStr, newStr)
    return sayText


def getSpeakerFromServer(baseDir: str, session,
                         nickname: str, password: str,
                         domain: str, port: int,
                         httpPrefix: str,
                         debug: bool, projectVersion: str) -> {}:
    """Returns some json which contains the latest inbox
    entry in a minimal format suitable for a text-to-speech reader
    """
    if not session:
        print('WARN: No session for getSpeakerFromServer')
        return 6

    domainFull = getFullDomain(domain, port)

    authHeader = createBasicAuthHeader(nickname, password)

    headers = {
        'host': domain,
        'Content-type': 'application/json',
        'Authorization': authHeader
    }

    url = \
        httpPrefix + '://' + \
        domainFull + '/users/' + nickname + '/speaker'

    speakerJson = \
        getJson(session, url, headers, None,
                __version__, httpPrefix, domain)
    return speakerJson


def speakerEndpointJson(displayName: str, summary: str,
                        content: str, imageDescription: str,
                        links: [], gender: str) -> {}:
    """Returns a json endpoint for the TTS speaker
    """
    speakerJson = {
        "name": displayName,
        "summary": summary,
        "say": content,
        "imageDescription": imageDescription,
        "detectedLinks": links
    }
    if gender:
        speakerJson['gender'] = gender
    return speakerJson


def _speakerEndpointSSML(displayName: str, summary: str,
                         content: str, imageDescription: str,
                         links: [], language: str,
                         instanceTitle: str,
                         gender: str) -> str:
    """Returns an SSML endpoint for the TTS speaker
    https://en.wikipedia.org/wiki/Speech_Synthesis_Markup_Language
    https://www.w3.org/TR/speech-synthesis/
    """
    langShort = 'en'
    if language:
        langShort = language[:2]
    if not gender:
        gender = 'neutral'
    else:
        if langShort == 'en':
            gender = gender.lower()
            if 'he/him' in gender:
                gender = 'male'
            elif 'she/her' in gender:
                gender = 'female'
            else:
                gender = 'neutral'

    content = _addSSMLemphasis(content)
    voiceParams = 'name="' + displayName + '" gender="' + gender + '"'
    return '<?xml version="1.0"?>\n' + \
        '<speak xmlns="http://www.w3.org/2001/10/synthesis"\n' + \
        '       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n' + \
        '       xsi:schemaLocation="http://www.w3.org/2001/10/synthesis\n' + \
        '         http://www.w3.org/TR/speech-synthesis11/synthesis.xsd"\n' + \
        '       version="1.1">\n' + \
        '  <metadata>\n' + \
        '    <dc:title xml:lang="' + langShort + '">' + \
        instanceTitle + ' inbox</dc:title>\n' + \
        '  </metadata>\n' + \
        '  <p>\n' + \
        '    <s xml:lang="' + language + '">\n' + \
        '      <voice ' + voiceParams + '>\n' + \
        '        ' + content + '\n' + \
        '      </voice>\n' + \
        '    </s>\n' + \
        '  </p>\n' + \
        '</speak>\n'


def getSSMLbox(baseDir: str, path: str,
               domain: str,
               systemLanguage: str,
               instanceTitle: str,
               boxName: str) -> str:
    """Returns SSML for the given timeline
    """
    nickname = path.split('/users/')[1]
    if '/' in nickname:
        nickname = nickname.split('/')[0]
    speakerFilename = \
        baseDir + '/accounts/' + nickname + '@' + domain + '/speaker.json'
    if not os.path.isfile(speakerFilename):
        return None
    speakerJson = loadJson(speakerFilename)
    if not speakerJson:
        return None
    gender = None
    if speakerJson.get('gender'):
        gender = speakerJson['gender']
    return _speakerEndpointSSML(speakerJson['name'],
                                speakerJson['summary'],
                                speakerJson['say'],
                                speakerJson['imageDescription'],
                                speakerJson['detectedLinks'],
                                systemLanguage,
                                instanceTitle, gender)
Speaker option 2021-03-01 19:16:33 +00:00			`__filename__ = "speaker.py"`
			`__author__ = "Bob Mottram"`
			`__license__ = "AGPL3+"`
			`__version__ = "1.2.0"`
			`__maintainer__ = "Bob Mottram"`
			`__email__ = "bob@freedombone.net"`
			`__status__ = "Production"`

TTS custom pronounce 2021-03-02 12:39:18 +00:00			`import os`
Speaker option 2021-03-01 19:16:33 +00:00			`import random`
			`from auth import createBasicAuthHeader`
			`from session import getJson`
SSML inbox endpoint 2021-03-03 12:34:46 +00:00			`from utils import loadJson`
Speaker option 2021-03-01 19:16:33 +00:00			`from utils import getFullDomain`

SSML inbox endpoint 2021-03-03 12:34:46 +00:00			`speakerRemoveChars = ('.\n', '. ', ',', ';', '?', '!')`

Speaker option 2021-03-01 19:16:33 +00:00
Alter TTS pitch based upon speaker gender 2021-03-03 14:02:14 +00:00			`def getSpeakerPitch(displayName: str, screenreader: str, gender) -> int:`
Speaker option 2021-03-01 19:16:33 +00:00			`"""Returns the speech synthesis pitch for the given name`
			`"""`
			`random.seed(displayName)`
Alter TTS pitch based upon speaker gender 2021-03-03 14:02:14 +00:00			`rangeMin = 1`
			`rangeMax = 100`
			`if 'She' in gender:`
			`rangeMin = 50`
			`elif 'Him' in gender:`
			`rangeMax = 50`
Speech parameters for picospeaker 2021-03-02 15:13:10 +00:00			`if screenreader == 'picospeaker':`
Alter TTS pitch based upon speaker gender 2021-03-03 14:02:14 +00:00			`rangeMin = -8`
			`rangeMax = 3`
			`if 'She' in gender:`
			`rangeMin = -1`
			`elif 'Him' in gender:`
			`rangeMax = -1`
			`return random.randint(rangeMin, rangeMax)`
Speaker option 2021-03-01 19:16:33 +00:00

Speech parameters for picospeaker 2021-03-02 15:13:10 +00:00			`def getSpeakerRate(displayName: str, screenreader: str) -> int:`
Speaker option 2021-03-01 19:16:33 +00:00			`"""Returns the speech synthesis rate for the given name`
			`"""`
			`random.seed(displayName)`
Speech parameters for picospeaker 2021-03-02 15:13:10 +00:00			`if screenreader == 'picospeaker':`
Slower rates 2021-03-02 15:27:31 +00:00			`return random.randint(0, 10)`
Speaker option 2021-03-01 19:16:33 +00:00			`return random.randint(50, 120)`


			`def getSpeakerRange(displayName: str) -> int:`
			`"""Returns the speech synthesis range for the given name`
			`"""`
			`random.seed(displayName)`
			`return random.randint(300, 800)`


TTS custom pronounce 2021-03-02 12:39:18 +00:00			`def speakerPronounce(baseDir: str, sayText: str, translate: {}) -> str:`
			`"""Screen readers may not always pronounce correctly, so you`
			`can have a file which specifies conversions. File should contain`
			`line items such as:`
			`Epicyon -> Epi-cyon`
			`"""`
			`pronounceFilename = baseDir + '/accounts/speaker_pronounce.txt'`
			`convertDict = {`
			`"Epicyon": "Epi-cyon",`
			`"espeak": "e-speak",`
Pronounce emoji 2021-03-02 17:18:47 +00:00			`"emoji": "emowji",`
clearnet 2021-03-02 12:50:07 +00:00			`"clearnet": "clear-net",`
Replacement 2021-03-02 18:10:57 +00:00			`"https": "H-T-T-P-S",`
			`"HTTPS": "H-T-T-P-S",`
Pronounce Tor 2021-03-02 18:18:45 +00:00			`"Tor": "Toor",`
Extra verbal emoji 2021-03-02 19:39:46 +00:00			`"🤔": ". " + translate["thinking emoji"],`
Say re-tweets 2021-03-02 14:05:43 +00:00			`"RT @": "Re-Tweet ",`
TTS custom pronounce 2021-03-02 12:39:18 +00:00			`"#": translate["hashtag"],`
Extra verbal emoji 2021-03-02 19:39:46 +00:00			`":D": '. ' + translate["laughing"],`
			`":-D": '. ' + translate["laughing"],`
			`":)": '. ' + translate["smile"],`
			`";)": '. ' + translate["wink"],`
			`":(": '. ' + translate["sad face"],`
			`":-)": '. ' + translate["smile"],`
			`":-(": '. ' + translate["sad face"],`
			`";-)": '. ' + translate["wink"],`
TTS custom pronounce 2021-03-02 12:39:18 +00:00			`"*": ""`
			`}`
			`if os.path.isfile(pronounceFilename):`
			`with open(pronounceFilename, 'r') as fp:`
			`pronounceList = fp.readlines()`
			`for conversion in pronounceList:`
			`separator = None`
			`if '->' in conversion:`
			`separator = '->'`
			`elif ';' in conversion:`
			`separator = ';'`
			`elif ':' in conversion:`
			`separator = ':'`
			`elif ',' in conversion:`
			`separator = ','`
			`if not separator:`
			`continue`

			`text = conversion.split(separator)[0].strip()`
			`converted = conversion.split(separator)[1].strip()`
			`convertDict[text] = converted`
			`for text, converted in convertDict.items():`
			`if text in sayText:`
			`sayText = sayText.replace(text, converted)`
			`return sayText`


Include any detected links within speaker endpoint 2021-03-01 21:26:34 +00:00			`def speakerReplaceLinks(sayText: str, translate: {},`
			`detectedLinks: []) -> str:`
Replace web links with shorter versions for the speaker interface 2021-03-01 21:20:06 +00:00			`"""Replaces any links in the given text with "link to [domain]".`
			`Instead of reading out potentially very long and meaningless links`
			`"""`
			`text = sayText`
SSML inbox endpoint 2021-03-03 12:34:46 +00:00			`for ch in speakerRemoveChars:`
Replace web links with shorter versions for the speaker interface 2021-03-01 21:20:06 +00:00			`text = text.replace(ch, ' ')`
			`replacements = {}`
			`wordsList = text.split(' ')`
			`linkedStr = translate['Linked']`
Say re-tweets 2021-03-02 14:05:43 +00:00			`prevWord = ''`
Replace web links with shorter versions for the speaker interface 2021-03-01 21:20:06 +00:00			`for word in wordsList:`
Replace emoji for speaker 2021-03-02 16:50:32 +00:00			`if word.startswith(':'):`
			`if word.endswith(':'):`
Pronounce emoji 2021-03-02 17:18:47 +00:00			`replacements[word] = ', emowji ' + word.replace(':', '') + ','`
Replace emoji for speaker 2021-03-02 16:50:32 +00:00			`continue`
Say re-tweets 2021-03-02 14:05:43 +00:00			`# replace mentions, but not re-tweets`
More robust retweet detection 2021-03-02 14:09:51 +00:00			`if word.startswith('@') and not prevWord.endswith('RT'):`
speaker says 'mentioning name' rather than @name 2021-03-02 13:54:22 +00:00			`replacements[word] = \`
Extraneous space 2021-03-02 13:55:54 +00:00			`translate['mentioning'] + ' ' + word[1:] + ','`
Say re-tweets 2021-03-02 14:05:43 +00:00			`prevWord = word`
speaker says 'mentioning name' rather than @name 2021-03-02 13:54:22 +00:00
Replace web links with shorter versions for the speaker interface 2021-03-01 21:20:06 +00:00			`domain = None`
			`domainFull = None`
			`if 'https://' in word:`
			`domain = word.split('https://')[1]`
			`domainFull = 'https://' + domain`
			`elif 'http://' in word:`
			`domain = word.split('http://')[1]`
			`domainFull = 'http://' + domain`
			`if not domain:`
			`continue`
			`if '/' in domain:`
			`domain = domain.split('/')[0]`
Test for web link replacement in the speaker interface 2021-03-01 21:46:44 +00:00			`if domain.startswith('www.'):`
			`domain = domain.replace('www.', '')`
Replace web links with shorter versions for the speaker interface 2021-03-01 21:20:06 +00:00			`replacements[domainFull] = '. ' + linkedStr + ' ' + domain + '.'`
Include any detected links within speaker endpoint 2021-03-01 21:26:34 +00:00			`detectedLinks.append(domainFull)`
Replace web links with shorter versions for the speaker interface 2021-03-01 21:20:06 +00:00			`for replaceStr, newStr in replacements.items():`
			`sayText = sayText.replace(replaceStr, newStr)`
Test for web link replacement in the speaker interface 2021-03-01 21:46:44 +00:00			`return sayText.replace('..', '.')`
Replace web links with shorter versions for the speaker interface 2021-03-01 21:20:06 +00:00

SSML inbox endpoint 2021-03-03 12:34:46 +00:00			`def _addSSMLemphasis(sayText: str) -> str:`
			`"""Adds emphasis to emphasised text`
			`"""`
			`if '*' not in sayText:`
			`return sayText`
			`text = sayText`
			`for ch in speakerRemoveChars:`
			`text = text.replace(ch, ' ')`
			`wordsList = text.split(' ')`
			`replacements = {}`
			`for word in wordsList:`
			`if word.startswith('*'):`
			`if word.endswith('*'):`
			`replacements[word] = \`
			`'<emphasis level="strong">' + \`
			`word.replace('*', '') + \`
			`'</emphasis>'`
			`for replaceStr, newStr in replacements.items():`
			`sayText = sayText.replace(replaceStr, newStr)`
			`return sayText`


Speaker option 2021-03-01 19:16:33 +00:00			`def getSpeakerFromServer(baseDir: str, session,`
			`nickname: str, password: str,`
			`domain: str, port: int,`
			`httpPrefix: str,`
			`debug: bool, projectVersion: str) -> {}:`
			`"""Returns some json which contains the latest inbox`
			`entry in a minimal format suitable for a text-to-speech reader`
			`"""`
			`if not session:`
			`print('WARN: No session for getSpeakerFromServer')`
			`return 6`

			`domainFull = getFullDomain(domain, port)`

			`authHeader = createBasicAuthHeader(nickname, password)`

			`headers = {`
			`'host': domain,`
			`'Content-type': 'application/json',`
			`'Authorization': authHeader`
			`}`

			`url = \`
			`httpPrefix + '://' + \`
			`domainFull + '/users/' + nickname + '/speaker'`

			`speakerJson = \`
			`getJson(session, url, headers, None,`
			`__version__, httpPrefix, domain)`
			`return speakerJson`
SSML inbox endpoint 2021-03-03 12:34:46 +00:00

			`def speakerEndpointJson(displayName: str, summary: str,`
			`content: str, imageDescription: str,`
Gender detaction for SSML 2021-03-03 13:02:47 +00:00			`links: [], gender: str) -> {}:`
SSML inbox endpoint 2021-03-03 12:34:46 +00:00			`"""Returns a json endpoint for the TTS speaker`
			`"""`
Gender detaction for SSML 2021-03-03 13:02:47 +00:00			`speakerJson = {`
SSML inbox endpoint 2021-03-03 12:34:46 +00:00			`"name": displayName,`
			`"summary": summary,`
			`"say": content,`
			`"imageDescription": imageDescription,`
			`"detectedLinks": links`
			`}`
Gender detaction for SSML 2021-03-03 13:02:47 +00:00			`if gender:`
			`speakerJson['gender'] = gender`
			`return speakerJson`
SSML inbox endpoint 2021-03-03 12:34:46 +00:00

			`def _speakerEndpointSSML(displayName: str, summary: str,`
			`content: str, imageDescription: str,`
			`links: [], language: str,`
			`instanceTitle: str,`
			`gender: str) -> str:`
			`"""Returns an SSML endpoint for the TTS speaker`
			`https://en.wikipedia.org/wiki/Speech_Synthesis_Markup_Language`
			`https://www.w3.org/TR/speech-synthesis/`
			`"""`
			`langShort = 'en'`
			`if language:`
			`langShort = language[:2]`
			`if not gender:`
			`gender = 'neutral'`
			`else:`
			`if langShort == 'en':`
			`gender = gender.lower()`
Gender detaction for SSML 2021-03-03 13:02:47 +00:00			`if 'he/him' in gender:`
SSML inbox endpoint 2021-03-03 12:34:46 +00:00			`gender = 'male'`
Gender detaction for SSML 2021-03-03 13:02:47 +00:00			`elif 'she/her' in gender:`
SSML inbox endpoint 2021-03-03 12:34:46 +00:00			`gender = 'female'`
			`else:`
			`gender = 'neutral'`

			`content = _addSSMLemphasis(content)`
			`voiceParams = 'name="' + displayName + '" gender="' + gender + '"'`
			`return '<?xml version="1.0"?>\n' + \`
			`'<speak xmlns="http://www.w3.org/2001/10/synthesis"\n' + \`
			`' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n' + \`
			`' xsi:schemaLocation="http://www.w3.org/2001/10/synthesis\n' + \`
			`' http://www.w3.org/TR/speech-synthesis11/synthesis.xsd"\n' + \`
			`' version="1.1">\n' + \`
			`' <metadata>\n' + \`
			`' <dc:title xml:lang="' + langShort + '">' + \`
			`instanceTitle + ' inbox</dc:title>\n' + \`
			`' </metadata>\n' + \`
			`' <p>\n' + \`
			`' <s xml:lang="' + language + '">\n' + \`
			`' <voice ' + voiceParams + '>\n' + \`
			`' ' + content + '\n' + \`
			`' </voice>\n' + \`
			`' </s>\n' + \`
			`' </p>\n' + \`
			`'</speak>\n'`


			`def getSSMLbox(baseDir: str, path: str,`
			`domain: str,`
			`systemLanguage: str,`
			`instanceTitle: str,`
			`boxName: str) -> str:`
			`"""Returns SSML for the given timeline`
			`"""`
			`nickname = path.split('/users/')[1]`
			`if '/' in nickname:`
			`nickname = nickname.split('/')[0]`
			`speakerFilename = \`
			`baseDir + '/accounts/' + nickname + '@' + domain + '/speaker.json'`
			`if not os.path.isfile(speakerFilename):`
			`return None`
			`speakerJson = loadJson(speakerFilename)`
			`if not speakerJson:`
			`return None`
			`gender = None`
			`if speakerJson.get('gender'):`
			`gender = speakerJson['gender']`
			`return _speakerEndpointSSML(speakerJson['name'],`
			`speakerJson['summary'],`
			`speakerJson['say'],`
			`speakerJson['imageDescription'],`
			`speakerJson['detectedLinks'],`
			`systemLanguage,`
			`instanceTitle, gender)`