__filename__ = "content.py" __author__ = "Bob Mottram" __license__ = "AGPL3+" __version__ = "0.0.1" __maintainer__ = "Bob Mottram" __email__ = "bob@freedombone.net" __status__ = "Production" import os import commentjson def validHashTag(hashtag: str) -> bool: """Returns true if the give hashtag contains valid characters """ validChars = set('0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ') if set(hashtag).issubset(validChars): return True return False def addHashTags(wordStr: str,httpPrefix: str,domain: str,replaceHashTags: {},postHashtags: {}) -> bool: """Detects hashtags and adds them to the replacements dict Also updates the hashtags list to be added to the post """ if not wordStr.startswith('#'): return False if len(wordStr)<2: return False if replaceHashTags.get(wordStr): return True hashtag=wordStr[1:] if not validHashTag(hashtag): return False hashtagUrl=httpPrefix+"://"+domain+"/tags/"+hashtag postHashtags[hashtag]= { 'href': hashtagUrl, 'name': '#'+hashtag, 'type': 'Hashtag' } replaceHashTags[wordStr]= \ "#"+hashtag+"" return True def loadEmojiDict(emojiDataFilename: str,emojiDict: {}) -> None: """Creates an emoji dictionary based on emoji/emoji-data.txt """ if not os.path.isfile(emojiDataFilename): return with open (emojiDataFilename, "r") as fileHandler: for line in fileHandler: if len(line)<5: continue if line.startswith('#'): continue if '; Emoji' not in line: continue if ')' not in line: continue emojiUnicode=line.split(' ')[0] if len(emojiUnicode)<4: continue if '..' in emojiUnicode: emojiUnicode=emojiUnicode.split('..')[0] emojiName=line.split(')',1)[1].replace('\n','').replace(' ','').replace('-','') if '..' in emojiName: emojiName=emojiName.split('..')[0] emojiDict[emojiName.lower()]=emojiUnicode def addEmoji(baseDir: str,wordStr: str,httpPrefix: str,domain: str,replaceEmoji: {},postTags: {},emojiDict: {}) -> bool: """Detects Emoji and adds them to the replacements dict Also updates the tags list to be added to the post """ if not wordStr.startswith(':'): return False if not wordStr.endswith(':'): return False if len(wordStr)<3: return False if replaceEmoji.get(wordStr): return True emoji=wordStr[1:] emoji=emoji[:-1] if not validHashTag(emoji): return False if not emojiDict.get(emoji): return False emojiFilename=baseDir+'/emoji/'+emojiDict[emoji]+'.png' if not os.path.isfile(emojiFilename): return False emojiUrl=httpPrefix+"://"+domain+"/emoji/"+emojiDict[emoji]+'.png' postTags[emoji]= { 'href': emojiUrl, 'name': ':'+emoji+':', 'type': 'Emoji' } replaceEmoji[wordStr]= \ "\""+emoji+"\"" return True def addMention(wordStr: str,httpPrefix: str,following: str,replaceMentions: {},recipients: []) -> bool: """Detects mentions and adds them to the replacements dict and recipients list """ if not wordStr.startswith('@'): return False if len(wordStr)<2: return False possibleHandle=wordStr[1:] if '@' not in possibleHandle: # fall back to a best effort match against the following list # if no domain was specified. eg. @nick possibleNickname=possibleHandle for follow in following: if follow.startswith(possibleNickname+'@'): replaceDomain=follow.replace('\n','').split('@')[1] recipientActor=httpPrefix+"://"+replaceDomain+"/users/"+possibleNickname if recipientActor not in recipients: recipients.append(recipientActor) replaceMentions[wordStr]="@"+possibleNickname+"" replaceFound=True return True return False possibleNickname=possibleHandle.split('@')[0] possibleDomain=possibleHandle.split('@')[1] for follow in following: if follow.replace('\n','')==possibleHandle: recipientActor=httpPrefix+"://"+possibleDomain+"/users/"+possibleNickname if recipientActor not in recipients: recipients.append(recipientActor) replaceMentions[wordStr]="@"+possibleNickname+"" return True return False def addHtmlTags(baseDir: str,httpPrefix: str, \ nickname: str,domain: str,content: str, \ recipients: [],hashtags: {}) -> str: """ Replaces plaintext mentions such as @nick@domain into html by matching against known following accounts """ if content.startswith('

'): return content wordsOnly=content.replace(',',' ').replace(';',' ').replace('.',' ') words=wordsOnly.split(' ') replaceMentions={} replaceHashTags={} replaceEmoji={} emojiDict={} originalDomain=domain if ':' in domain: domain=domain.split(':')[0] followingFilename=baseDir+'/accounts/'+nickname+'@'+domain+'/following.txt' if not os.path.isfile(followingFilename): content=content.replace('\n','

') content='

'+content+'

' return content.replace('

','') # read the following list so that we can detect just @nick # in addition to @nick@domain with open(followingFilename, "r") as f: following = f.readlines() # extract mentions and tags from words for wordStr in words: if addMention(wordStr,httpPrefix,following,replaceMentions,recipients): continue if addHashTags(wordStr,httpPrefix,originalDomain,replaceHashTags,hashtags): continue if len(wordStr)>2 and wordStr.startswith(':') and wordStr.endswith(':') and not emojiDict: print('Loading emoji lookup') loadEmojiDict(baseDir+'/emoji/emoji-data.txt',emojiDict) addEmoji(baseDir,wordStr,httpPrefix,originalDomain,replaceEmoji,hashtags,emojiDict) # replace words with their html versions for wordStr,replaceStr in replaceMentions.items(): content=content.replace(wordStr,replaceStr) for wordStr,replaceStr in replaceHashTags.items(): content=content.replace(wordStr,replaceStr) for wordStr,replaceStr in replaceEmoji.items(): content=content.replace(wordStr,replaceStr) content=content.replace('\n','

') return '

'+content+'

' def getMentionsFromHtml(htmlText: str,matchStr=" []: """Extracts mentioned actors from the given html content string """ mentions=[] if matchStr not in htmlText: return mentions mentionsList=htmlText.split(matchStr) for mentionStr in mentionsList: if '"' not in mentionStr: continue actorStr=mentionStr.split('"')[0] if actorStr.startswith('http') or \ actorStr.startswith('dat:'): mentions.append(actorStr) return mentions