epicyon/content.py

196 lines
7.5 KiB
Python
Raw Normal View History

2019-07-15 14:11:31 +00:00
__filename__ = "content.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
__version__ = "0.0.1"
__maintainer__ = "Bob Mottram"
__email__ = "bob@freedombone.net"
__status__ = "Production"
import os
import commentjson
2019-08-09 11:12:08 +00:00
def validHashTag(hashtag: str) -> bool:
"""Returns true if the give hashtag contains valid characters
"""
validChars = set('0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')
if set(hashtag).issubset(validChars):
return True
return False
def addHashTags(wordStr: str,httpPrefix: str,domain: str,replaceHashTags: {},postHashtags: {}) -> bool:
"""Detects hashtags and adds them to the replacements dict
Also updates the hashtags list to be added to the post
"""
if not wordStr.startswith('#'):
return False
if len(wordStr)<2:
return False
if replaceHashTags.get(wordStr):
return True
hashtag=wordStr[1:]
if not validHashTag(hashtag):
return False
hashtagUrl=httpPrefix+"://"+domain+"/tags/"+hashtag
postHashtags[hashtag]= {
'href': hashtagUrl,
'name': '#'+hashtag,
'type': 'Hashtag'
}
replaceHashTags[wordStr]= \
"<a href=\""+hashtagUrl+"\" class=\"mention hashtag\" rel=\"tag\">#<span>"+hashtag+"</span></a>"
return True
2019-08-09 16:18:00 +00:00
def loadEmojiDict(emojiDataFilename: str,emojiDict: {}) -> None:
"""Creates an emoji dictionary based on emoji/emoji-data.txt
"""
if not os.path.isfile(emojiDataFilename):
return
with open (emojiDataFilename, "r") as fileHandler:
for line in fileHandler:
if len(line)<5:
continue
if line.startswith('#'):
continue
if '; Emoji' not in line:
continue
if ')' not in line:
continue
emojiUnicode=line.split(' ')[0]
if len(emojiUnicode)<4:
continue
if '..' in emojiUnicode:
emojiUnicode=emojiUnicode.split('..')[0]
2019-08-09 18:48:40 +00:00
emojiName=line.split(')',1)[1].strip().replace('\n','').replace(' ','').replace('-','')
2019-08-09 16:18:00 +00:00
if '..' in emojiName:
emojiName=emojiName.split('..')[0]
emojiDict[emojiName.lower()]=emojiUnicode
def addEmoji(baseDir: str,wordStr: str,httpPrefix: str,domain: str,replaceEmoji: {},postTags: {},emojiDict: {}) -> bool:
"""Detects Emoji and adds them to the replacements dict
Also updates the tags list to be added to the post
"""
if not wordStr.startswith(':'):
return False
if not wordStr.endswith(':'):
return False
if len(wordStr)<3:
return False
if replaceEmoji.get(wordStr):
return True
emoji=wordStr[1:]
emoji=emoji[:-1]
if not validHashTag(emoji):
return False
if not emojiDict.get(emoji):
return False
emojiFilename=baseDir+'/emoji/'+emojiDict[emoji]+'.png'
if not os.path.isfile(emojiFilename):
return False
emojiUrl=httpPrefix+"://"+domain+"/emoji/"+emojiDict[emoji]+'.png'
postTags[emoji]= {
'href': emojiUrl,
'name': ':'+emoji+':',
'type': 'Emoji'
}
replaceEmoji[wordStr]= \
2019-08-09 17:42:11 +00:00
"<img src=\""+emojiUrl+"\" alt=\""+emoji+"\" align=\"middle\" class=\"emoji\"/>"
2019-08-09 16:18:00 +00:00
return True
2019-08-09 09:09:21 +00:00
def addMention(wordStr: str,httpPrefix: str,following: str,replaceMentions: {},recipients: []) -> bool:
"""Detects mentions and adds them to the replacements dict and recipients list
"""
if not wordStr.startswith('@'):
return False
if len(wordStr)<2:
return False
possibleHandle=wordStr[1:]
2019-08-09 09:48:51 +00:00
if '@' not in possibleHandle:
# fall back to a best effort match against the following list
# if no domain was specified. eg. @nick
possibleNickname=possibleHandle
for follow in following:
if follow.startswith(possibleNickname+'@'):
replaceDomain=follow.replace('\n','').split('@')[1]
recipientActor=httpPrefix+"://"+replaceDomain+"/users/"+possibleNickname
if recipientActor not in recipients:
recipients.append(recipientActor)
replaceMentions[wordStr]="<span class=\"h-card\"><a href=\""+httpPrefix+"://"+replaceDomain+"/@"+possibleNickname+"\" class=\"u-url mention\">@<span>"+possibleNickname+"</span></a></span>"
replaceFound=True
return True
return False
2019-08-09 09:09:21 +00:00
possibleNickname=possibleHandle.split('@')[0]
possibleDomain=possibleHandle.split('@')[1]
for follow in following:
if follow.replace('\n','')==possibleHandle:
recipientActor=httpPrefix+"://"+possibleDomain+"/users/"+possibleNickname
if recipientActor not in recipients:
recipients.append(recipientActor)
replaceMentions[wordStr]="<span class=\"h-card\"><a href=\""+httpPrefix+"://"+possibleDomain+"/@"+possibleNickname+"\" class=\"u-url mention\">@<span>"+possibleNickname+"</span></a></span>"
2019-08-09 09:46:33 +00:00
return True
return False
2019-08-09 09:09:21 +00:00
def addHtmlTags(baseDir: str,httpPrefix: str, \
2019-08-05 16:56:32 +00:00
nickname: str,domain: str,content: str, \
2019-08-09 11:12:08 +00:00
recipients: [],hashtags: {}) -> str:
2019-07-15 14:11:31 +00:00
""" Replaces plaintext mentions such as @nick@domain into html
by matching against known following accounts
"""
if content.startswith('<p>'):
return content
2019-08-09 16:18:00 +00:00
wordsOnly=content.replace(',',' ').replace(';',' ').replace('.',' ')
2019-07-15 14:11:31 +00:00
words=wordsOnly.split(' ')
replaceMentions={}
2019-08-09 11:12:08 +00:00
replaceHashTags={}
2019-08-09 16:18:00 +00:00
replaceEmoji={}
emojiDict={}
originalDomain=domain
2019-07-15 14:24:33 +00:00
if ':' in domain:
domain=domain.split(':')[0]
2019-07-15 14:11:31 +00:00
followingFilename=baseDir+'/accounts/'+nickname+'@'+domain+'/following.txt'
2019-08-09 09:09:21 +00:00
# read the following list so that we can detect just @nick
# in addition to @nick@domain
2019-08-10 16:55:17 +00:00
following=None
if os.path.isfile(followingFilename):
with open(followingFilename, "r") as f:
following = f.readlines()
2019-08-09 09:09:21 +00:00
# extract mentions and tags from words
2019-07-15 14:11:31 +00:00
for wordStr in words:
2019-08-10 16:55:17 +00:00
if following:
if addMention(wordStr,httpPrefix,following,replaceMentions,recipients):
continue
2019-08-09 16:18:00 +00:00
if addHashTags(wordStr,httpPrefix,originalDomain,replaceHashTags,hashtags):
continue
if len(wordStr)>2 and wordStr.startswith(':') and wordStr.endswith(':') and not emojiDict:
print('Loading emoji lookup')
loadEmojiDict(baseDir+'/emoji/emoji-data.txt',emojiDict)
addEmoji(baseDir,wordStr,httpPrefix,originalDomain,replaceEmoji,hashtags,emojiDict)
2019-08-09 09:09:21 +00:00
# replace words with their html versions
2019-07-15 14:11:31 +00:00
for wordStr,replaceStr in replaceMentions.items():
content=content.replace(wordStr,replaceStr)
2019-08-09 11:12:08 +00:00
for wordStr,replaceStr in replaceHashTags.items():
content=content.replace(wordStr,replaceStr)
2019-08-09 16:18:00 +00:00
for wordStr,replaceStr in replaceEmoji.items():
content=content.replace(wordStr,replaceStr)
2019-08-09 11:12:08 +00:00
2019-07-15 14:11:31 +00:00
content=content.replace('\n','</p><p>')
return '<p>'+content+'</p>'
2019-08-05 19:13:15 +00:00
def getMentionsFromHtml(htmlText: str,matchStr="<span class=\"h-card\"><a href=\"") -> []:
"""Extracts mentioned actors from the given html content string
"""
mentions=[]
if matchStr not in htmlText:
return mentions
2019-08-05 19:20:13 +00:00
mentionsList=htmlText.split(matchStr)
2019-08-05 19:13:15 +00:00
for mentionStr in mentionsList:
if '"' not in mentionStr:
continue
actorStr=mentionStr.split('"')[0]
if actorStr.startswith('http') or \
actorStr.startswith('dat:'):
mentions.append(actorStr)
return mentions