epicyon/content.py

__filename__ = "content.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
__version__ = "1.0.0"
__maintainer__ = "Bob Mottram"
__email__ = "bob@freedombone.net"
__status__ = "Production"

import os
import time
import commentjson
from shutil import copyfile

def replaceEmojiFromTags(content: str,tag: [],messageType: str) -> str:
    """Uses the tags to replace :emoji: with html image markup
    """
    for tagItem in tag:
        if not tagItem.get('type'):
            continue
        if tagItem['type']!='Emoji':
            continue
        if not tagItem.get('name'):
            continue
        if not tagItem.get('icon'):
            continue
        if not tagItem['icon'].get('url'):
            continue
        if tagItem['name'] not in content:
            continue
        htmlClass='emoji'
        if messageType=='post header':
            htmlClass='emojiheader'            
        if messageType=='profile':
            htmlClass='emojiprofile'
        emojiHtml="<img src=\""+tagItem['icon']['url']+"\" alt=\""+tagItem['name'].replace(':','')+"\" align=\"middle\" class=\""+htmlClass+"\"/>"
        content=content.replace(tagItem['name'],emojiHtml)
    return content

def addMusicTag(content: str,tag: str) -> str:
    """If a music link is found then ensure that the post is tagged appropriately
    """
    if '#' not in tag:
        tag='#'+tag
    if tag in content:
        return content
    musicSites=['soundcloud.com','bandcamp.com']
    musicSiteFound=False
    for site in musicSites:
        if site+'/' in content:
            musicSiteFound=True
            break
    if not musicSiteFound:
        return content
    return ':music: '+content+' '+tag+' '

def addWebLinks(content: str) -> str:
    """Adds markup for web links
    """
    if not ('https://' in content or 'http://' in content):
        return content

    maxLinkLength=40
    words=content.replace('\n',' --linebreak-- ').split(' ')
    replaceDict={}
    for w in words:
        if w.startswith('https://') or \
           w.startswith('http://') or \
           w.startswith('dat://'):
            if w.endswith('.') or w.endswith(';'):
                w=w[:-1]
            markup='<a href="'+w+'" rel="nofollow noopener" target="_blank">'
            if w.startswith('https://'):
                markup+='<span class="invisible">https://</span>'
            elif w.startswith('http://'):
                markup+='<span class="invisible">http://</span>'
            elif w.startswith('dat://'):
                markup+='<span class="invisible">dat://</span>'
            linkText=w.replace('https://','').replace('http://','').replace('dat://','')
            # prevent links from becoming too long
            if len(linkText)>maxLinkLength:
                markup+='<span class="ellipsis">'+linkText[:maxLinkLength]+'</span>'
                markup+='<span class="invisible">'+linkText[maxLinkLength:]+'</span></a>'
            else:
                markup+='<span class="ellipsis">'+linkText+'</span></a>'
            replaceDict[w]=markup
    for url,markup in replaceDict.items():
        content=content.replace(url,markup)
    content=content.replace(' --linebreak-- ','<br>')
    return content

def validHashTag(hashtag: str) -> bool:
    """Returns true if the give hashtag contains valid characters
    """
    validChars = set('0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')
    if set(hashtag).issubset(validChars):
        return True
    return False

def addHashTags(wordStr: str,httpPrefix: str,domain: str,replaceHashTags: {},postHashtags: {}) -> bool:
    """Detects hashtags and adds them to the replacements dict
    Also updates the hashtags list to be added to the post
    """
    if replaceHashTags.get(wordStr):
       return True
    hashtag=wordStr[1:]
    if not validHashTag(hashtag):
        return False
    hashtagUrl=httpPrefix+"://"+domain+"/tags/"+hashtag
    postHashtags[hashtag]= {
        'href': hashtagUrl,
        'name': '#'+hashtag,
        'type': 'Hashtag'
    }
    replaceHashTags[wordStr]= \
        "<a href=\""+hashtagUrl+"\" class=\"mention hashtag\" rel=\"tag\">#<span>"+hashtag+"</span></a>"
    return True

def loadEmojiDict(emojiDataFilename: str,emojiDict: {}) -> None:
    """Creates an emoji dictionary based on emoji/emoji-data.txt
    """
    if not os.path.isfile(emojiDataFilename):
        return
    with open (emojiDataFilename, "r") as fileHandler:
        for line in fileHandler:
            if len(line)<5:
                continue
            if line.startswith('#'):
                continue
            if '; Emoji' not in line:
                continue
            if ')' not in line:
                continue
            emojiUnicode=line.split(' ')[0]
            if len(emojiUnicode)<4:
                continue
            if '..' in emojiUnicode:
                emojiUnicode=emojiUnicode.split('..')[0]
            emojiName=line.split(')',1)[1].strip().replace('\n','').replace(' ','').replace('-','')
            if '..' in emojiName:
                emojiName=emojiName.split('..')[0]
            emojiDict[emojiName.lower()]=emojiUnicode

def addEmoji(baseDir: str,wordStr: str,httpPrefix: str,domain: str,replaceEmoji: {},postTags: {},emojiDict: {}) -> bool:
    """Detects Emoji and adds them to the replacements dict
    Also updates the tags list to be added to the post
    """
    if not wordStr.startswith(':'):
        return False
    if not wordStr.endswith(':'):
        return False
    if len(wordStr)<3:
        return False
    if replaceEmoji.get(wordStr):
       return True
    # remove leading and trailing : characters
    emoji=wordStr[1:]
    emoji=emoji[:-1]
    # is the text of the emoji valid?
    if not validHashTag(emoji):
        return False
    if not emojiDict.get(emoji):
        return False
    emojiFilename=baseDir+'/emoji/'+emojiDict[emoji]+'.png'
    if not os.path.isfile(emojiFilename):
        return False
    emojiUrl=httpPrefix+"://"+domain+"/emoji/"+emojiDict[emoji]+'.png'
    postTags[emoji]= {
        'icon': {
            'mediaType': 'image/png',
            'type': 'Image',
            'url': emojiUrl
        },
        'name': ':'+emoji+':',
        'type': 'Emoji'
    }
    return True

def addMention(wordStr: str,httpPrefix: str,following: str,replaceMentions: {},recipients: [],tags: {}) -> bool:
    """Detects mentions and adds them to the replacements dict and recipients list
    """
    possibleHandle=wordStr[1:]
    # @nick
    if following and '@' not in possibleHandle:
        # fall back to a best effort match against the following list
        # if no domain was specified. eg. @nick
        possibleNickname=possibleHandle
        for follow in following:
            if follow.startswith(possibleNickname+'@'):
                replaceDomain=follow.replace('\n','').split('@')[1]
                recipientActor=httpPrefix+"://"+replaceDomain+"/users/"+possibleNickname
                if recipientActor not in recipients:
                    recipients.append(recipientActor)
                tags[wordStr]={
                    'href': recipientActor,
                    'name': wordStr,
                    'type': 'Mention'
                }
                replaceMentions[wordStr]="<span class=\"h-card\"><a href=\""+httpPrefix+"://"+replaceDomain+"/@"+possibleNickname+"\" class=\"u-url mention\">@<span>"+possibleNickname+"</span></a></span>"
                return True
        return False
    possibleNickname=None
    possibleDomain=None
    if '@' not in possibleHandle:
        return False
    possibleNickname=possibleHandle.split('@')[0]
    if not possibleNickname:
        return False
    possibleDomain=possibleHandle.split('@')[1].strip('\n')
    if not possibleDomain:
        return False
    if following:
        for follow in following:
            if follow.replace('\n','')!=possibleHandle:
                continue
            recipientActor=httpPrefix+"://"+possibleDomain+"/users/"+possibleNickname
            if recipientActor not in recipients:
                recipients.append(recipientActor)
            tags[wordStr]={
                'href': recipientActor,
                'name': wordStr,
                'type': 'Mention'
            }
            replaceMentions[wordStr]="<span class=\"h-card\"><a href=\""+httpPrefix+"://"+possibleDomain+"/@"+possibleNickname+"\" class=\"u-url mention\">@<span>"+possibleNickname+"</span></a></span>"
            return True
    # @nick@domain
    if not (possibleDomain=='localhost' or '.' in possibleDomain):
        return False        
    recipientActor=httpPrefix+"://"+possibleDomain+"/users/"+possibleNickname
    if recipientActor not in recipients:
        recipients.append(recipientActor)
    tags[wordStr]={
        'href': recipientActor,
        'name': wordStr,
        'type': 'Mention'
    }
    replaceMentions[wordStr]="<span class=\"h-card\"><a href=\""+httpPrefix+"://"+possibleDomain+"/@"+possibleNickname+"\" class=\"u-url mention\">@<span>"+possibleNickname+"</span></a></span>"
    return True

def removeLongWords(content: str,maxWordLength: int,longWordsList: []) -> str:
    """Breaks up long words so that on mobile screens this doesn't disrupt the layout
    """
    words=content.split(' ')
    if not longWordsList:
        longWordsList=[]
        for wordStr in words:
            if len(wordStr)>maxWordLength:
                if wordStr not in longWordsList:
                    longWordsList.append(wordStr)
    for wordStr in longWordsList:
        if wordStr.startswith('<'):
            continue
        if '=\"' in wordStr:
            continue
        if '@' in wordStr:
            if '@@' not in wordStr:
                continue
        if 'https:' in wordStr:
            continue
        elif 'http:' in wordStr:
            continue
        elif 'dat:' in wordStr:
            continue
        if '<' in wordStr:
            wordStr=wordStr.split('<',1)[0]
        if '/' in wordStr:
            continue
        if len(wordStr[maxWordLength:])<maxWordLength:
            content= \
                content.replace(wordStr, \
                                wordStr[:maxWordLength]+'\n'+ \
                                wordStr[maxWordLength:])
        else:
            content= \
                content.replace(wordStr, \
                                wordStr[:maxWordLength])
    return content

def addHtmlTags(baseDir: str,httpPrefix: str, \
                nickname: str,domain: str,content: str, \
                recipients: [],hashtags: {},isJsonContent=False) -> str:
    """ Replaces plaintext mentions such as @nick@domain into html
    by matching against known following accounts
    """
    if content.startswith('<p>'):
        return content
    maxWordLength=40
    content=content.replace('\n',' --linebreak-- ')
    content=addMusicTag(content,'nowplaying')
    words=content.replace(',',' ').replace(';',' ').split(' ')
    
    # remove . for words which are not mentions
    wordCtr=0
    newWords=[]
    for wordIndex in range(0,len(words)):
        wordStr=words[wordIndex]
        if wordStr.endswith('.'):
            if not wordStr.startswith('@'):
                wordStr=wordStr[:-1]
        if wordStr.startswith('.'):
            wordStr=wordStr[1:]
        newWords.append(wordStr)
    words=newWords

    replaceMentions={}
    replaceHashTags={}
    replaceEmoji={}
    emojiDict={}
    originalDomain=domain
    if ':' in domain:
        domain=domain.split(':')[0]
    followingFilename=baseDir+'/accounts/'+nickname+'@'+domain+'/following.txt'

    # read the following list so that we can detect just @nick
    # in addition to @nick@domain
    following=None
    if '@' in words:
        if os.path.isfile(followingFilename):
            with open(followingFilename, "r") as f:
                following = f.readlines()

    # extract mentions and tags from words
    longWordsList=[]
    for wordStr in words:
        wordLen=len(wordStr)
        if wordLen>2:
            if wordLen>maxWordLength:
                longWordsList.append(wordStr)
            firstChar=wordStr[0]
            if firstChar=='@':
                if addMention(wordStr,httpPrefix,following,replaceMentions,recipients,hashtags):
                    continue
            elif firstChar=='#':
                if addHashTags(wordStr,httpPrefix,originalDomain,replaceHashTags,hashtags):
                    continue
            elif ':' in wordStr:
                #print('TAG: emoji located - '+wordStr)
                wordStr2=wordStr.split(':')[1]
                if not emojiDict:
                    # emoji.json is generated so that it can be customized and the changes
                    # will be retained even if default_emoji.json is subsequently updated                    
                    if not os.path.isfile(baseDir+'/emoji/emoji.json'):
                        copyfile(baseDir+'/emoji/default_emoji.json',baseDir+'/emoji/emoji.json')
                emojiDictCtr=0
                while not emojiDict and emojiDictCtr<5:
                    if emojiDictCtr>0:
                        print('Retry emoji load '+baseDir+'/emoji/emoji.json')
                    try:
                        with open(baseDir+'/emoji/emoji.json', 'r') as fp:
                            emojiDict=commentjson.load(fp)
                            if emojiDictCtr>0:
                                print('emojiDict loaded on try '+str(emojiDictCtr))
                            break
                    except:
                        print('WARN: commentjson exception addHtmlTags')
                        print('Failed to load emoji (try '+str(emojiDictCtr)+'): '+baseDir+'/emoji/emoji.json')
                        time.sleep(1)
                        emojiDictCtr+=1

                #print('TAG: looking up emoji for :'+wordStr2+':')
                addEmoji(baseDir,':'+wordStr2+':',httpPrefix,originalDomain,replaceEmoji,hashtags,emojiDict)

    # replace words with their html versions
    for wordStr,replaceStr in replaceMentions.items():
        content=content.replace(wordStr,replaceStr)
    for wordStr,replaceStr in replaceHashTags.items():
        content=content.replace(wordStr,replaceStr)
    if not isJsonContent:
        for wordStr,replaceStr in replaceEmoji.items():
            content=content.replace(wordStr,replaceStr)

    content=addWebLinks(content)
    if longWordsList:
        content=removeLongWords(content,maxWordLength,longWordsList)
    content=content.replace(' --linebreak-- ','</p><p>')
    return '<p>'+content+'</p>'
                
def getMentionsFromHtml(htmlText: str,matchStr="<span class=\"h-card\"><a href=\"") -> []:
    """Extracts mentioned actors from the given html content string
    """
    mentions=[]
    if matchStr not in htmlText:
        return mentions
    mentionsList=htmlText.split(matchStr)
    for mentionStr in mentionsList:
        if '"' not in mentionStr:
            continue
        actorStr=mentionStr.split('"')[0]
        if actorStr.startswith('http') or \
           actorStr.startswith('dat:'):
            if actorStr not in mentions:
                mentions.append(actorStr)
    return mentions
mentions function 2019-07-15 14:11:31 +00:00			`__filename__ = "content.py"`
			`__author__ = "Bob Mottram"`
			`__license__ = "AGPL3+"`
Version 1.0.0 2019-08-29 13:35:29 +00:00			`__version__ = "1.0.0"`
mentions function 2019-07-15 14:11:31 +00:00			`__maintainer__ = "Bob Mottram"`
			`__email__ = "bob@freedombone.net"`
			`__status__ = "Production"`

			`import os`
Loading emoji 2019-09-29 14:48:17 +00:00			`import time`
mentions function 2019-07-15 14:11:31 +00:00			`import commentjson`
Document customizations 2019-08-11 16:55:22 +00:00			`from shutil import copyfile`
mentions function 2019-07-15 14:11:31 +00:00
Separate tag replacement functions 2019-09-29 17:20:10 +00:00			`def replaceEmojiFromTags(content: str,tag: [],messageType: str) -> str:`
Insert emoji 2019-09-29 16:28:02 +00:00			`"""Uses the tags to replace :emoji: with html image markup`
			`"""`
Separate tag replacement functions 2019-09-29 17:20:10 +00:00			`for tagItem in tag:`
			`if not tagItem.get('type'):`
			`continue`
			`if tagItem['type']!='Emoji':`
			`continue`
			`if not tagItem.get('name'):`
Insert emoji 2019-09-29 16:28:02 +00:00			`continue`
			`if not tagItem.get('icon'):`
			`continue`
			`if not tagItem['icon'].get('url'):`
			`continue`
			`if tagItem['name'] not in content:`
			`continue`
			`htmlClass='emoji'`
			`if messageType=='post header':`
			`htmlClass='emojiheader'`
			`if messageType=='profile':`
			`htmlClass='emojiprofile'`
			`emojiHtml="<img src=\""+tagItem['icon']['url']+"\" alt=\""+tagItem['name'].replace(':','')+"\" align=\"middle\" class=\""+htmlClass+"\"/>"`
			`content=content.replace(tagItem['name'],emojiHtml)`
			`return content`

Add music tags 2019-09-05 09:54:27 +00:00			`def addMusicTag(content: str,tag: str) -> str:`
			`"""If a music link is found then ensure that the post is tagged appropriately`
			`"""`
			`if '#' not in tag:`
			`tag='#'+tag`
			`if tag in content:`
			`return content`
			`musicSites=['soundcloud.com','bandcamp.com']`
			`musicSiteFound=False`
			`for site in musicSites:`
			`if site+'/' in content:`
			`musicSiteFound=True`
			`break`
			`if not musicSiteFound:`
			`return content`
Add music emoji to music links 2019-09-24 09:27:34 +00:00			`return ':music: '+content+' '+tag+' '`
Add music tags 2019-09-05 09:54:27 +00:00
Add web links 2019-08-21 12:07:30 +00:00			`def addWebLinks(content: str) -> str:`
			`"""Adds markup for web links`
			`"""`
			`if not ('https://' in content or 'http://' in content):`
			`return content`

Tidying 2019-10-09 12:23:20 +00:00			`maxLinkLength=40`
Linebreaks with extra space 2019-10-01 10:36:51 +00:00			`words=content.replace('\n',' --linebreak-- ').split(' ')`
Add web links 2019-08-21 12:07:30 +00:00			`replaceDict={}`
			`for w in words:`
Support dat links 2019-10-01 08:54:52 +00:00			`if w.startswith('https://') or \`
			`w.startswith('http://') or \`
			`w.startswith('dat://'):`
Add web links 2019-08-21 12:07:30 +00:00			`if w.endswith('.') or w.endswith(';'):`
			`w=w[:-1]`
			`markup='<a href="'+w+'" rel="nofollow noopener" target="_blank">'`
			`if w.startswith('https://'):`
			`markup+='<span class="invisible">https://</span>'`
			`elif w.startswith('http://'):`
			`markup+='<span class="invisible">http://</span>'`
Support dat links 2019-10-01 08:54:52 +00:00			`elif w.startswith('dat://'):`
			`markup+='<span class="invisible">dat://</span>'`
			`linkText=w.replace('https://','').replace('http://','').replace('dat://','')`
Prevent links from becoming too long 2019-09-18 08:37:42 +00:00			`# prevent links from becoming too long`
Tidying 2019-10-09 12:23:20 +00:00			`if len(linkText)>maxLinkLength:`
			`markup+='<span class="ellipsis">'+linkText[:maxLinkLength]+'</span>'`
			`markup+='<span class="invisible">'+linkText[maxLinkLength:]+'</span></a>'`
Include ending after ellipsis 2019-10-01 10:47:47 +00:00			`else:`
			`markup+='<span class="ellipsis">'+linkText+'</span></a>'`
Add web links 2019-08-21 12:07:30 +00:00			`replaceDict[w]=markup`
			`for url,markup in replaceDict.items():`
			`content=content.replace(url,markup)`
Linebreaks with extra space 2019-10-01 10:36:51 +00:00			`content=content.replace(' --linebreak-- ','<br>')`
Add web links 2019-08-21 12:07:30 +00:00			`return content`

Add hashtag conversion to html 2019-08-09 11:12:08 +00:00			`def validHashTag(hashtag: str) -> bool:`
			`"""Returns true if the give hashtag contains valid characters`
			`"""`
			`validChars = set('0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')`
			`if set(hashtag).issubset(validChars):`
			`return True`
			`return False`

			`def addHashTags(wordStr: str,httpPrefix: str,domain: str,replaceHashTags: {},postHashtags: {}) -> bool:`
			`"""Detects hashtags and adds them to the replacements dict`
			`Also updates the hashtags list to be added to the post`
			`"""`
			`if replaceHashTags.get(wordStr):`
			`return True`
			`hashtag=wordStr[1:]`
			`if not validHashTag(hashtag):`
			`return False`
			`hashtagUrl=httpPrefix+"://"+domain+"/tags/"+hashtag`
			`postHashtags[hashtag]= {`
			`'href': hashtagUrl,`
			`'name': '#'+hashtag,`
			`'type': 'Hashtag'`
			`}`
			`replaceHashTags[wordStr]= \`
			`"<a href=\""+hashtagUrl+"\" class=\"mention hashtag\" rel=\"tag\">#<span>"+hashtag+"</span></a>"`
			`return True`

Emoji in posts 2019-08-09 16:18:00 +00:00			`def loadEmojiDict(emojiDataFilename: str,emojiDict: {}) -> None:`
			`"""Creates an emoji dictionary based on emoji/emoji-data.txt`
			`"""`
			`if not os.path.isfile(emojiDataFilename):`
			`return`
			`with open (emojiDataFilename, "r") as fileHandler:`
			`for line in fileHandler:`
			`if len(line)<5:`
			`continue`
			`if line.startswith('#'):`
			`continue`
			`if '; Emoji' not in line:`
			`continue`
			`if ')' not in line:`
			`continue`
			`emojiUnicode=line.split(' ')[0]`
			`if len(emojiUnicode)<4:`
			`continue`
			`if '..' in emojiUnicode:`
			`emojiUnicode=emojiUnicode.split('..')[0]`
Emoji spacing 2019-08-09 18:48:40 +00:00			`emojiName=line.split(')',1)[1].strip().replace('\n','').replace(' ','').replace('-','')`
Emoji in posts 2019-08-09 16:18:00 +00:00			`if '..' in emojiName:`
			`emojiName=emojiName.split('..')[0]`
			`emojiDict[emojiName.lower()]=emojiUnicode`

Remove argument 2019-09-23 11:36:54 +00:00			`def addEmoji(baseDir: str,wordStr: str,httpPrefix: str,domain: str,replaceEmoji: {},postTags: {},emojiDict: {}) -> bool:`
Emoji in posts 2019-08-09 16:18:00 +00:00			`"""Detects Emoji and adds them to the replacements dict`
			`Also updates the tags list to be added to the post`
			`"""`
			`if not wordStr.startswith(':'):`
			`return False`
			`if not wordStr.endswith(':'):`
			`return False`
			`if len(wordStr)<3:`
			`return False`
			`if replaceEmoji.get(wordStr):`
			`return True`
Comments 2019-09-23 11:11:13 +00:00			`# remove leading and trailing : characters`
Emoji in posts 2019-08-09 16:18:00 +00:00			`emoji=wordStr[1:]`
			`emoji=emoji[:-1]`
Comments 2019-09-23 11:11:13 +00:00			`# is the text of the emoji valid?`
Emoji in posts 2019-08-09 16:18:00 +00:00			`if not validHashTag(emoji):`
			`return False`
			`if not emojiDict.get(emoji):`
			`return False`
			`emojiFilename=baseDir+'/emoji/'+emojiDict[emoji]+'.png'`
			`if not os.path.isfile(emojiFilename):`
			`return False`
			`emojiUrl=httpPrefix+"://"+domain+"/emoji/"+emojiDict[emoji]+'.png'`
			`postTags[emoji]= {`
Emoji tag icon 2019-08-19 13:35:55 +00:00			`'icon': {`
			`'mediaType': 'image/png',`
			`'type': 'Image',`
			`'url': emojiUrl`
			`},`
Emoji in posts 2019-08-09 16:18:00 +00:00			`'name': ':'+emoji+':',`
			`'type': 'Emoji'`
			`}`
			`return True`

Add mention tags 2019-08-19 12:13:18 +00:00			`def addMention(wordStr: str,httpPrefix: str,following: str,replaceMentions: {},recipients: [],tags: {}) -> bool:`
Refactor mentions replacements 2019-08-09 09:09:21 +00:00			`"""Detects mentions and adds them to the replacements dict and recipients list`
			`"""`
			`possibleHandle=wordStr[1:]`
Detecting mentions for @nick@domain 2019-08-19 10:05:50 +00:00			`# @nick`
Handle mentions even if not following 2019-08-19 11:41:15 +00:00			`if following and '@' not in possibleHandle:`
Handle mentions without domains 2019-08-09 09:48:51 +00:00			`# fall back to a best effort match against the following list`
			`# if no domain was specified. eg. @nick`
			`possibleNickname=possibleHandle`
			`for follow in following:`
			`if follow.startswith(possibleNickname+'@'):`
			`replaceDomain=follow.replace('\n','').split('@')[1]`
			`recipientActor=httpPrefix+"://"+replaceDomain+"/users/"+possibleNickname`
			`if recipientActor not in recipients:`
			`recipients.append(recipientActor)`
Add mention tags 2019-08-19 12:13:18 +00:00			`tags[wordStr]={`
			`'href': recipientActor,`
			`'name': wordStr,`
			`'type': 'Mention'`
			`}`
Handle mentions without domains 2019-08-09 09:48:51 +00:00			`replaceMentions[wordStr]="<span class=\"h-card\"><a href=\""+httpPrefix+"://"+replaceDomain+"/@"+possibleNickname+"\" class=\"u-url mention\">@<span>"+possibleNickname+"</span></a></span>"`
			`return True`
			`return False`
Check for valid handle 2019-10-29 20:15:21 +00:00			`possibleNickname=None`
			`possibleDomain=None`
			`if '@' not in possibleHandle:`
			`return False`
Refactor mentions replacements 2019-08-09 09:09:21 +00:00			`possibleNickname=possibleHandle.split('@')[0]`
Check for valid handle 2019-10-29 20:15:21 +00:00			`if not possibleNickname:`
			`return False`
Tidying 2019-08-19 10:43:52 +00:00			`possibleDomain=possibleHandle.split('@')[1].strip('\n')`
Check for valid handle 2019-10-29 20:15:21 +00:00			`if not possibleDomain:`
			`return False`
Handle mentions even if not following 2019-08-19 11:41:15 +00:00			`if following:`
			`for follow in following:`
			`if follow.replace('\n','')!=possibleHandle:`
			`continue`
			`recipientActor=httpPrefix+"://"+possibleDomain+"/users/"+possibleNickname`
			`if recipientActor not in recipients:`
			`recipients.append(recipientActor)`
Add mention tags 2019-08-19 12:13:18 +00:00			`tags[wordStr]={`
			`'href': recipientActor,`
			`'name': wordStr,`
			`'type': 'Mention'`
			`}`
Handle mentions even if not following 2019-08-19 11:41:15 +00:00			`replaceMentions[wordStr]="<span class=\"h-card\"><a href=\""+httpPrefix+"://"+possibleDomain+"/@"+possibleNickname+"\" class=\"u-url mention\">@<span>"+possibleNickname+"</span></a></span>"`
			`return True`
Detecting mentions for @nick@domain 2019-08-19 10:05:50 +00:00			`# @nick@domain`
Check for valid handle 2019-10-29 20:15:21 +00:00			`if not (possibleDomain=='localhost' or '.' in possibleDomain):`
			`return False`
			`recipientActor=httpPrefix+"://"+possibleDomain+"/users/"+possibleNickname`
			`if recipientActor not in recipients:`
			`recipients.append(recipientActor)`
			`tags[wordStr]={`
			`'href': recipientActor,`
			`'name': wordStr,`
			`'type': 'Mention'`
			`}`
			`replaceMentions[wordStr]="<span class=\"h-card\"><a href=\""+httpPrefix+"://"+possibleDomain+"/@"+possibleNickname+"\" class=\"u-url mention\">@<span>"+possibleNickname+"</span></a></span>"`
			`return True`
Refactor mentions replacements 2019-08-09 09:09:21 +00:00
Tidying 2019-10-18 12:24:31 +00:00			`def removeLongWords(content: str,maxWordLength: int,longWordsList: []) -> str:`
Remove very long words 2019-10-09 12:19:17 +00:00			`"""Breaks up long words so that on mobile screens this doesn't disrupt the layout`
			`"""`
			`words=content.split(' ')`
Try to remove long words 2019-11-04 20:39:14 +00:00			`if not longWordsList:`
			`longWordsList=[]`
			`for wordStr in words:`
			`if len(wordStr)>maxWordLength:`
			`if wordStr not in longWordsList:`
			`longWordsList.append(wordStr)`
Tidying 2019-10-18 12:24:31 +00:00			`for wordStr in longWordsList:`
			`if wordStr.startswith('<'):`
			`continue`
@ 2019-11-04 21:08:43 +00:00			`if '=\"' in wordStr:`
			`continue`
			`if '@' in wordStr:`
Check for repeats 2019-11-04 21:11:09 +00:00			`if '@@' not in wordStr:`
			`continue`
Try to remove long words 2019-11-04 20:39:14 +00:00			`if 'https:' in wordStr:`
Don't trunkate web links 2019-10-25 18:27:32 +00:00			`continue`
Try to remove long words 2019-11-04 20:39:14 +00:00			`elif 'http:' in wordStr:`
			`continue`
			`elif 'dat:' in wordStr:`
			`continue`
			`if '<' in wordStr:`
			`wordStr=wordStr.split('<',1)[0]`
Don't trunkate web links 2019-10-25 18:27:32 +00:00			`if '/' in wordStr:`
			`continue`
Tidying 2019-10-18 12:24:31 +00:00			`if len(wordStr[maxWordLength:])<maxWordLength:`
			`content= \`
			`content.replace(wordStr, \`
Fixed separator 2019-11-04 20:57:41 +00:00			`wordStr[:maxWordLength]+'\n'+ \`
No suffix 2019-11-04 20:45:10 +00:00			`wordStr[maxWordLength:])`
Tidying 2019-10-18 12:24:31 +00:00			`else:`
			`content= \`
			`content.replace(wordStr, \`
No suffix 2019-11-04 20:45:10 +00:00			`wordStr[:maxWordLength])`
Remove very long words 2019-10-09 12:19:17 +00:00			`return content`

Refactor mentions replacements 2019-08-09 09:09:21 +00:00			`def addHtmlTags(baseDir: str,httpPrefix: str, \`
Also send to mentioned recipients 2019-08-05 16:56:32 +00:00			`nickname: str,domain: str,content: str, \`
Don't replace emoji within json content 2019-10-29 13:04:38 +00:00			`recipients: [],hashtags: {},isJsonContent=False) -> str:`
mentions function 2019-07-15 14:11:31 +00:00			`""" Replaces plaintext mentions such as @nick@domain into html`
			`by matching against known following accounts`
			`"""`
			`if content.startswith('<p>'):`
			`return content`
Remove very long words 2019-10-09 12:19:17 +00:00			`maxWordLength=40`
music links at end of function 2019-09-05 10:26:08 +00:00			`content=content.replace('\n',' --linebreak-- ')`
Add music tags first 2019-09-05 10:29:09 +00:00			`content=addMusicTag(content,'nowplaying')`
music links at end of function 2019-09-05 10:26:08 +00:00			`words=content.replace(',',' ').replace(';',' ').split(' ')`
Add music tags 2019-09-05 09:54:27 +00:00
Handle . within content when extracting tags 2019-08-19 11:07:04 +00:00			`# remove . for words which are not mentions`
			`wordCtr=0`
Create word array 2019-08-19 11:14:38 +00:00			`newWords=[]`
words list 2019-08-19 11:22:05 +00:00			`for wordIndex in range(0,len(words)):`
Handle . within content when extracting tags 2019-08-19 11:07:04 +00:00			`wordStr=words[wordIndex]`
			`if wordStr.endswith('.'):`
			`if not wordStr.startswith('@'):`
Create word array 2019-08-19 11:14:38 +00:00			`wordStr=wordStr[:-1]`
Typo 2019-08-19 11:08:47 +00:00			`if wordStr.startswith('.'):`
Create word array 2019-08-19 11:14:38 +00:00			`wordStr=wordStr[1:]`
			`newWords.append(wordStr)`
			`words=newWords`

mentions function 2019-07-15 14:11:31 +00:00			`replaceMentions={}`
Add hashtag conversion to html 2019-08-09 11:12:08 +00:00			`replaceHashTags={}`
Emoji in posts 2019-08-09 16:18:00 +00:00			`replaceEmoji={}`
			`emojiDict={}`
			`originalDomain=domain`
Handle domains with ports 2019-07-15 14:24:33 +00:00			`if ':' in domain:`
			`domain=domain.split(':')[0]`
mentions function 2019-07-15 14:11:31 +00:00			`followingFilename=baseDir+'/accounts/'+nickname+'@'+domain+'/following.txt'`
Refactor mentions replacements 2019-08-09 09:09:21 +00:00
			`# read the following list so that we can detect just @nick`
			`# in addition to @nick@domain`
Fix tags 2019-08-10 16:55:17 +00:00			`following=None`
Tidying 2019-10-18 12:24:31 +00:00			`if '@' in words:`
			`if os.path.isfile(followingFilename):`
			`with open(followingFilename, "r") as f:`
			`following = f.readlines()`
Refactor mentions replacements 2019-08-09 09:09:21 +00:00
			`# extract mentions and tags from words`
Tidying 2019-10-18 12:24:31 +00:00			`longWordsList=[]`
mentions function 2019-07-15 14:11:31 +00:00			`for wordStr in words:`
Tidying 2019-10-18 12:24:31 +00:00			`wordLen=len(wordStr)`
			`if wordLen>2:`
			`if wordLen>maxWordLength:`
			`longWordsList.append(wordStr)`
			`firstChar=wordStr[0]`
			`if firstChar=='@':`
			`if addMention(wordStr,httpPrefix,following,replaceMentions,recipients,hashtags):`
			`continue`
			`elif firstChar=='#':`
			`if addHashTags(wordStr,httpPrefix,originalDomain,replaceHashTags,hashtags):`
			`continue`
			`elif ':' in wordStr:`
Tidying 2019-10-12 12:55:05 +00:00			`#print('TAG: emoji located - '+wordStr)`
Detection of emoji 2019-10-09 18:05:24 +00:00			`wordStr2=wordStr.split(':')[1]`
No second colon 2019-10-09 18:32:53 +00:00			`if not emojiDict:`
Comments 2019-11-03 14:46:30 +00:00			`# emoji.json is generated so that it can be customized and the changes`
			`# will be retained even if default_emoji.json is subsequently updated`
No second colon 2019-10-09 18:32:53 +00:00			`if not os.path.isfile(baseDir+'/emoji/emoji.json'):`
			`copyfile(baseDir+'/emoji/default_emoji.json',baseDir+'/emoji/emoji.json')`
			`emojiDictCtr=0`
More debug 2019-10-16 14:21:01 +00:00			`while not emojiDict and emojiDictCtr<5:`
No second colon 2019-10-09 18:32:53 +00:00			`if emojiDictCtr>0:`
			`print('Retry emoji load '+baseDir+'/emoji/emoji.json')`
			`try:`
			`with open(baseDir+'/emoji/emoji.json', 'r') as fp:`
			`emojiDict=commentjson.load(fp)`
More debug 2019-10-16 14:21:01 +00:00			`if emojiDictCtr>0:`
			`print('emojiDict loaded on try '+str(emojiDictCtr))`
More retries 2019-10-12 09:37:21 +00:00			`break`
Catch all commentjson load errors 2019-10-26 13:01:32 +00:00			`except:`
			`print('WARN: commentjson exception addHtmlTags')`
			`print('Failed to load emoji (try '+str(emojiDictCtr)+'): '+baseDir+'/emoji/emoji.json')`
No second colon 2019-10-09 18:32:53 +00:00			`time.sleep(1)`
Indentation 2019-10-09 18:34:59 +00:00			`emojiDictCtr+=1`
Directly use a json file for emoji lookup 2019-08-11 17:16:42 +00:00
Tidying 2019-10-12 12:55:05 +00:00			`#print('TAG: looking up emoji for :'+wordStr2+':')`
No second colon 2019-10-09 18:32:53 +00:00			`addEmoji(baseDir,':'+wordStr2+':',httpPrefix,originalDomain,replaceEmoji,hashtags,emojiDict)`
Refactor mentions replacements 2019-08-09 09:09:21 +00:00
			`# replace words with their html versions`
mentions function 2019-07-15 14:11:31 +00:00			`for wordStr,replaceStr in replaceMentions.items():`
			`content=content.replace(wordStr,replaceStr)`
Add hashtag conversion to html 2019-08-09 11:12:08 +00:00			`for wordStr,replaceStr in replaceHashTags.items():`
			`content=content.replace(wordStr,replaceStr)`
Don't replace emoji within json content 2019-10-29 13:04:38 +00:00			`if not isJsonContent:`
			`for wordStr,replaceStr in replaceEmoji.items():`
			`content=content.replace(wordStr,replaceStr)`

Add web links 2019-08-21 12:07:30 +00:00			`content=addWebLinks(content)`
Tidying 2019-10-18 12:24:31 +00:00			`if longWordsList:`
			`content=removeLongWords(content,maxWordLength,longWordsList)`
Extra space 2019-09-05 10:23:22 +00:00			`content=content.replace(' --linebreak-- ','</p><p>')`
mentions function 2019-07-15 14:11:31 +00:00			`return '<p>'+content+'</p>'`

Include mentions in new posts 2019-08-05 19:13:15 +00:00			`def getMentionsFromHtml(htmlText: str,matchStr="<span class=\"h-card\"><a href=\"") -> []:`
			`"""Extracts mentioned actors from the given html content string`
			`"""`
			`mentions=[]`
			`if matchStr not in htmlText:`
			`return mentions`
Variable name 2019-08-05 19:20:13 +00:00			`mentionsList=htmlText.split(matchStr)`
Include mentions in new posts 2019-08-05 19:13:15 +00:00			`for mentionStr in mentionsList:`
			`if '"' not in mentionStr:`
			`continue`
			`actorStr=mentionStr.split('"')[0]`
			`if actorStr.startswith('http') or \`
			`actorStr.startswith('dat:'):`
Avoid duplicates 2019-09-22 17:54:33 +00:00			`if actorStr not in mentions:`
			`mentions.append(actorStr)`
Include mentions in new posts 2019-08-05 19:13:15 +00:00			`return mentions`