__filename__ = "speaker.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
__version__ = "1.2.0"
__maintainer__ = "Bob Mottram"
__email__ = "bob@libreserver.org"
__status__ = "Production"
__module_group__ = "Accessibility"
import os
import html
import random
import urllib.parse
from utils import remove_id_ending
from utils import is_dm
from utils import is_reply
from utils import camel_case_split
from utils import get_domain_from_actor
from utils import get_nickname_from_actor
from utils import get_gender_from_bio
from utils import get_display_name
from utils import remove_html
from utils import load_json
from utils import save_json
from utils import is_pgp_encrypted
from utils import has_object_dict
from utils import acct_dir
from utils import local_actor_url
from content import htmlReplaceQuoteMarks
speakerRemoveChars = ('.\n', '. ', ',', ';', '?', '!')
def getSpeakerPitch(displayName: str, screenreader: str, gender) -> int:
"""Returns the speech synthesis pitch for the given name
"""
random.seed(displayName)
rangeMin = 1
rangeMax = 100
if 'She' in gender:
rangeMin = 50
elif 'Him' in gender:
rangeMax = 50
if screenreader == 'picospeaker':
rangeMin = -6
rangeMax = 3
if 'She' in gender:
rangeMin = -1
elif 'Him' in gender:
rangeMax = -1
return random.randint(rangeMin, rangeMax)
def getSpeakerRate(displayName: str, screenreader: str) -> int:
"""Returns the speech synthesis rate for the given name
"""
random.seed(displayName)
if screenreader == 'picospeaker':
return random.randint(-40, -20)
return random.randint(50, 120)
def getSpeakerRange(displayName: str) -> int:
"""Returns the speech synthesis range for the given name
"""
random.seed(displayName)
return random.randint(300, 800)
def _speakerPronounce(base_dir: str, sayText: str, translate: {}) -> str:
"""Screen readers may not always pronounce correctly, so you
can have a file which specifies conversions. File should contain
line items such as:
Epicyon -> Epi-cyon
"""
pronounceFilename = base_dir + '/accounts/speaker_pronounce.txt'
convertDict = {}
if translate:
convertDict = {
"Epicyon": "Epi-cyon",
"espeak": "e-speak",
"emoji": "emowji",
"clearnet": "clear-net",
"https": "H-T-T-P-S",
"HTTPS": "H-T-T-P-S",
"XMPP": "X-M-P-P",
"xmpp": "X-M-P-P",
"sql": "S-Q-L",
".js": " dot J-S",
"PSQL": "Postgres S-Q-L",
"SQL": "S-Q-L",
"gdpr": "G-D-P-R",
"kde": "K-D-E",
"AGPL": "Affearo G-P-L",
"agpl": "Affearo G-P-L",
"GPL": "G-P-L",
"gpl": "G-P-L",
"coop": "co-op",
"KMail": "K-Mail",
"kmail": "K-Mail",
"gmail": "G-mail",
"Gmail": "G-mail",
"OpenPGP": "Open P-G-P",
"Tor": "Toor",
"memes": "meemes",
"Memes": "Meemes",
"rofl": "roll on the floor laughing",
"ROFL": "roll on the floor laughing",
"fwiw": "for what it's worth",
"fyi": "for your information",
"irl": "in real life",
"IRL": "in real life",
"imho": "in my opinion",
"fediverse": "fediiverse",
"Fediverse": "Fediiverse",
" foss ": " free and open source software ",
" floss ": " free libre and open source software ",
" FOSS ": "free and open source software",
" FLOSS ": "free libre and open source software",
" oss ": " open source software ",
" OSS ": " open source software ",
"🤔": ". " + translate["thinking emoji"],
"RT @": "Re-Tweet ",
"#nowplaying": translate["hashtag"] + " now-playing",
"#NowPlaying": translate["hashtag"] + " now-playing",
"#": translate["hashtag"] + ' ',
":D": '. ' + translate["laughing"],
":-D": '. ' + translate["laughing"],
":)": '. ' + translate["smile"],
";)": '. ' + translate["wink"],
":(": '. ' + translate["sad face"],
":-)": '. ' + translate["smile"],
":-(": '. ' + translate["sad face"],
";-)": '. ' + translate["wink"],
":O": '. ' + translate['shocked'],
"?": "? ",
'"': "'",
"*": "",
"(": ",",
")": ","
}
if os.path.isfile(pronounceFilename):
with open(pronounceFilename, 'r') as fp:
pronounceList = fp.readlines()
for conversion in pronounceList:
separator = None
if '->' in conversion:
separator = '->'
elif ';' in conversion:
separator = ';'
elif ':' in conversion:
separator = ':'
elif ',' in conversion:
separator = ','
if not separator:
continue
text = conversion.split(separator)[0].strip()
converted = conversion.split(separator)[1].strip()
convertDict[text] = converted
for text, converted in convertDict.items():
if text in sayText:
sayText = sayText.replace(text, converted)
return sayText
def speakerReplaceLinks(sayText: str, translate: {},
detectedLinks: []) -> str:
"""Replaces any links in the given text with "link to [domain]".
Instead of reading out potentially very long and meaningless links
"""
text = sayText
text = text.replace('?v=', '__v=')
for ch in speakerRemoveChars:
text = text.replace(ch, ' ')
text = text.replace('__v=', '?v=')
replacements = {}
wordsList = text.split(' ')
if translate.get('Linked'):
linkedStr = translate['Linked']
else:
linkedStr = 'Linked'
prevWord = ''
for word in wordsList:
if word.startswith('v='):
replacements[word] = ''
if word.startswith(':'):
if word.endswith(':'):
replacements[word] = ', emowji ' + word.replace(':', '') + ','
continue
if word.startswith('@') and not prevWord.endswith('RT'):
# replace mentions, but not re-tweets
if translate.get('mentioning'):
replacements[word] = \
translate['mentioning'] + ' ' + word[1:] + ', '
prevWord = word
domain = None
domain_full = None
if 'https://' in word:
domain = word.split('https://')[1]
domain_full = 'https://' + domain
elif 'http://' in word:
domain = word.split('http://')[1]
domain_full = 'http://' + domain
if not domain:
continue
if '/' in domain:
domain = domain.split('/')[0]
if domain.startswith('www.'):
domain = domain.replace('www.', '')
replacements[domain_full] = '. ' + linkedStr + ' ' + domain + '.'
detectedLinks.append(domain_full)
for replaceStr, newStr in replacements.items():
sayText = sayText.replace(replaceStr, newStr)
return sayText.replace('..', '.')
def _addSSMLemphasis(sayText: str) -> str:
"""Adds emphasis to *emphasised* text
"""
if '*' not in sayText:
return sayText
text = sayText
for ch in speakerRemoveChars:
text = text.replace(ch, ' ')
wordsList = text.split(' ')
replacements = {}
for word in wordsList:
if word.startswith('*'):
if word.endswith('*'):
replacements[word] = \
' \n' + \
' \n' + \
' \n' + \
'
', '').replace('
', ' ') if not is_pgp_encrypted(content): # replace some emoji before removing html if ' <3' in content: content = content.replace(' <3', ' ' + translate['heart']) content = remove_html(htmlReplaceQuoteMarks(content)) content = speakerReplaceLinks(content, translate, detectedLinks) # replace all double spaces while ' ' in content: content = content.replace(' ', ' ') content = content.replace(' . ', '. ').strip() sayContent = content sayContent = _speakerPronounce(base_dir, content, translate) # replace all double spaces while ' ' in sayContent: sayContent = sayContent.replace(' ', ' ') sayContent = sayContent.replace(' . ', '. ').strip() else: sayContent = content imageDescription = '' if post_json_object['object'].get('attachment'): attachList = post_json_object['object']['attachment'] if isinstance(attachList, list): for img in attachList: if not isinstance(img, dict): continue if img.get('name'): if isinstance(img['name'], str): imageDescription += \ img['name'] + '. ' isDirect = is_dm(post_json_object) actor = local_actor_url(http_prefix, nickname, domain_full) replyToYou = is_reply(post_json_object, actor) published = '' if post_json_object['object'].get('published'): published = post_json_object['object']['published'] summary = '' if post_json_object['object'].get('summary'): if isinstance(post_json_object['object']['summary'], str): post_json_object_summary = post_json_object['object']['summary'] summary = \ urllib.parse.unquote_plus(post_json_object_summary) summary = html.unescape(summary) speakerName = \ get_display_name(base_dir, post_json_object['actor'], person_cache) if not speakerName: return speakerName = _removeEmojiFromText(speakerName) speakerName = speakerName.replace('_', ' ') speakerName = camel_case_split(speakerName) gender = get_gender_from_bio(base_dir, post_json_object['actor'], person_cache, translate) if announcingActor: announcedNickname = get_nickname_from_actor(announcingActor) announcedDomain, announcedport = get_domain_from_actor(announcingActor) if announcedNickname and announcedDomain: announcedHandle = announcedNickname + '@' + announcedDomain sayContent = \ translate['announces'] + ' ' + \ announcedHandle + '. ' + sayContent content = \ translate['announces'] + ' ' + \ announcedHandle + '. ' + content post_id = None if post_json_object['object'].get('id'): post_id = remove_id_ending(post_json_object['object']['id']) followRequestsExist = False followRequestsList = [] accountsDir = acct_dir(base_dir, nickname, domain_full) approveFollowsFilename = accountsDir + '/followrequests.txt' if os.path.isfile(approveFollowsFilename): with open(approveFollowsFilename, 'r') as fp: follows = fp.readlines() if len(follows) > 0: followRequestsExist = True for i in range(len(follows)): follows[i] = follows[i].strip() followRequestsList = follows postDM = False dmFilename = accountsDir + '/.newDM' if os.path.isfile(dmFilename): postDM = True postReply = False replyFilename = accountsDir + '/.newReply' if os.path.isfile(replyFilename): postReply = True likedBy = '' likeFilename = accountsDir + '/.newLike' if os.path.isfile(likeFilename): with open(likeFilename, 'r') as fp: likedBy = fp.read() calendarFilename = accountsDir + '/.newCalendar' postCal = os.path.isfile(calendarFilename) shareFilename = accountsDir + '/.newShare' postShare = os.path.isfile(shareFilename) return _speakerEndpointJson(speakerName, summary, content, sayContent, imageDescription, detectedLinks, gender, post_id, postDM, postReply, followRequestsExist, followRequestsList, likedBy, published, postCal, postShare, theme_name, isDirect, replyToYou) def updateSpeaker(base_dir: str, http_prefix: str, nickname: str, domain: str, domain_full: str, post_json_object: {}, person_cache: {}, translate: {}, announcingActor: str, theme_name: str) -> None: """ Generates a json file which can be used for TTS announcement of incoming inbox posts """ speakerJson = \ _postToSpeakerJson(base_dir, http_prefix, nickname, domain, domain_full, post_json_object, person_cache, translate, announcingActor, theme_name) speakerFilename = acct_dir(base_dir, nickname, domain) + '/speaker.json' save_json(speakerJson, speakerFilename)