epicyon/posts.py

502 lines
19 KiB
Python
Raw Normal View History

2019-06-28 18:55:29 +00:00
__filename__ = "posts.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
__version__ = "0.0.1"
__maintainer__ = "Bob Mottram"
__email__ = "bob@freedombone.net"
__status__ = "Production"
import requests
import json
2019-06-29 10:08:59 +00:00
import commentjson
2019-06-28 18:55:29 +00:00
import html
2019-06-29 10:08:59 +00:00
import datetime
2019-06-30 15:03:26 +00:00
import os
import shutil
2019-06-30 13:20:23 +00:00
import threading
2019-06-30 15:03:26 +00:00
import sys
import trace
2019-06-30 16:36:58 +00:00
from threads import threadWithTrace
2019-06-30 15:03:26 +00:00
from cache import storePersonInCache
from cache import getPersonFromCache
2019-06-29 10:08:59 +00:00
from pprint import pprint
2019-06-28 18:55:29 +00:00
from random import randint
from session import getJson
2019-06-30 11:07:39 +00:00
from session import postJson
2019-06-30 22:56:37 +00:00
from webfinger import webfingerHandle
2019-06-28 18:55:29 +00:00
try:
from BeautifulSoup import BeautifulSoup
except ImportError:
from bs4 import BeautifulSoup
2019-06-30 22:56:37 +00:00
def getPersonKey(username: str,domain: str,baseDir: str,keyType='public'):
2019-06-30 15:03:26 +00:00
"""Returns the public or private key of a person
"""
handle=username+'@'+domain
keyFilename=baseDir+'/keys/'+keyType+'/'+handle.lower()+'.key'
if not os.path.isfile(keyFilename):
return ''
keyPem=''
with open(keyFilename, "r") as pemFile:
keyPem=pemFile.read()
if len(keyPem)<20:
return ''
return keyPem
2019-06-28 20:43:37 +00:00
def permitted(url: str,federationList) -> bool:
2019-06-28 18:55:29 +00:00
"""Is a url from one of the permitted domains?
"""
2019-06-28 20:43:37 +00:00
for domain in federationList:
2019-06-28 18:55:29 +00:00
if domain in url:
return True
return False
def cleanHtml(rawHtml: str) -> str:
text = BeautifulSoup(rawHtml, 'html.parser').get_text()
return html.unescape(text)
def getUserUrl(wfRequest) -> str:
if wfRequest.get('links'):
for link in wfRequest['links']:
if link.get('type') and link.get('href'):
if link['type'] == 'application/activity+json':
return link['href']
return None
2019-06-28 19:36:39 +00:00
def parseUserFeed(session,feedUrl,asHeader) -> None:
2019-06-29 10:08:59 +00:00
feedJson = getJson(session,feedUrl,asHeader,None)
pprint(feedJson)
2019-06-28 18:55:29 +00:00
2019-06-29 10:08:59 +00:00
if 'orderedItems' in feedJson:
2019-06-29 10:59:16 +00:00
for item in feedJson['orderedItems']:
2019-06-28 18:55:29 +00:00
yield item
nextUrl = None
2019-06-29 10:08:59 +00:00
if 'first' in feedJson:
2019-06-29 10:59:16 +00:00
nextUrl = feedJson['first']
2019-06-29 10:08:59 +00:00
elif 'next' in feedJson:
nextUrl = feedJson['next']
2019-06-28 18:55:29 +00:00
if nextUrl:
2019-06-28 19:36:39 +00:00
for item in parseUserFeed(session,nextUrl,asHeader):
2019-06-28 18:55:29 +00:00
yield item
2019-06-30 11:34:19 +00:00
2019-06-30 10:14:02 +00:00
def getPersonBox(session,wfRequest,boxName='inbox'):
asHeader = {'Accept': 'application/ld+json; profile="https://www.w3.org/ns/activitystreams"'}
personUrl = getUserUrl(wfRequest)
if not personUrl:
return None
2019-06-30 15:03:26 +00:00
personJson = getPersonFromCache(personUrl)
2019-06-30 11:34:19 +00:00
if not personJson:
2019-06-30 10:21:07 +00:00
personJson = getJson(session,personUrl,asHeader,None)
2019-06-30 10:14:02 +00:00
if not personJson.get(boxName):
return personPosts
personId=None
if personJson.get('id'):
personId=personJson['id']
pubKey=None
if personJson.get('publicKey'):
if personJson['publicKey'].get('publicKeyPem'):
pubKey=personJson['publicKey']['publicKeyPem']
2019-06-30 10:21:07 +00:00
2019-06-30 11:34:19 +00:00
storePersonInCache(personUrl,personJson)
2019-06-30 10:21:07 +00:00
2019-06-30 10:14:02 +00:00
return personJson[boxName],pubKey,personId
2019-06-28 20:43:37 +00:00
def getUserPosts(session,wfRequest,maxPosts,maxMentions,maxEmoji,maxAttachments,federationList) -> {}:
2019-06-28 18:55:29 +00:00
userPosts={}
2019-06-30 10:14:02 +00:00
feedUrl,pubKey,personId = getPersonBox(session,wfRequest,'outbox')
if not feedUrl:
2019-06-28 18:55:29 +00:00
return userPosts
i = 0
2019-06-28 19:36:39 +00:00
for item in parseUserFeed(session,feedUrl,asHeader):
2019-06-28 18:55:29 +00:00
if not item.get('type'):
continue
if item['type'] != 'Create':
continue
if not item.get('object'):
continue
published = item['object']['published']
if not userPosts.get(published):
content = item['object']['content']
mentions=[]
emoji={}
if item['object'].get('tag'):
for tagItem in item['object']['tag']:
tagType=tagItem['type'].lower()
if tagType=='emoji':
if tagItem.get('name') and tagItem.get('icon'):
if tagItem['icon'].get('url'):
# No emoji from non-permitted domains
2019-06-28 20:43:37 +00:00
if permitted(tagItem['icon']['url'],federationList):
2019-06-28 18:55:29 +00:00
emojiName=tagItem['name']
emojiIcon=tagItem['icon']['url']
emoji[emojiName]=emojiIcon
if tagType=='mention':
if tagItem.get('name'):
if tagItem['name'] not in mentions:
mentions.append(tagItem['name'])
if len(mentions)>maxMentions:
continue
if len(emoji)>maxEmoji:
continue
summary = ''
if item['object'].get('summary'):
if item['object']['summary']:
summary = item['object']['summary']
inReplyTo = ''
if item['object'].get('inReplyTo'):
if item['object']['inReplyTo']:
# No replies to non-permitted domains
2019-06-28 20:43:37 +00:00
if not permitted(item['object']['inReplyTo'],federationList):
2019-06-28 18:55:29 +00:00
continue
inReplyTo = item['object']['inReplyTo']
conversation = ''
if item['object'].get('conversation'):
if item['object']['conversation']:
# no conversations originated in non-permitted domains
2019-06-28 20:43:37 +00:00
if permitted(item['object']['conversation'],federationList):
2019-06-28 18:55:29 +00:00
conversation = item['object']['conversation']
attachment = []
if item['object'].get('attachment'):
if item['object']['attachment']:
for attach in item['object']['attachment']:
if attach.get('name') and attach.get('url'):
# no attachments from non-permitted domains
2019-06-28 20:43:37 +00:00
if permitted(attach['url'],federationList):
2019-06-28 18:55:29 +00:00
attachment.append([attach['name'],attach['url']])
sensitive = False
if item['object'].get('sensitive'):
sensitive = item['object']['sensitive']
userPosts[published] = {
"sensitive": sensitive,
"inreplyto": inReplyTo,
"summary": summary,
"html": content,
"plaintext": cleanHtml(content),
"attachment": attachment,
"mentions": mentions,
"emoji": emoji,
"conversation": conversation
}
#print(str(item)+'\n')
i += 1
if i == maxPosts:
break
return userPosts
2019-06-30 22:56:37 +00:00
def createOutboxDir(username: str,domain: str,baseDir: str) -> str:
2019-06-29 10:08:59 +00:00
"""Create an outbox for a person and returns the feed filename and directory
"""
handle=username.lower()+'@'+domain.lower()
if not os.path.isdir(baseDir+'/accounts/'+handle):
os.mkdir(baseDir+'/accounts/'+handle)
outboxDir=baseDir+'/accounts/'+handle+'/outbox'
if not os.path.isdir(outboxDir):
os.mkdir(outboxDir)
2019-06-29 13:26:55 +00:00
return outboxDir
2019-06-29 10:08:59 +00:00
2019-06-30 22:56:37 +00:00
def createOutboxArchive(username: str,domain: str,baseDir: str) -> str:
2019-06-29 13:44:21 +00:00
"""Creates an archive directory for outbox posts
"""
handle=username.lower()+'@'+domain.lower()
if not os.path.isdir(baseDir+'/accounts/'+handle):
os.mkdir(baseDir+'/accounts/'+handle)
outboxArchiveDir=baseDir+'/accounts/'+handle+'/outboxarchive'
if not os.path.isdir(outboxArchiveDir):
os.mkdir(outboxArchiveDir)
return outboxArchiveDir
2019-06-30 22:56:37 +00:00
def deleteAllPosts(username: str, domain: str,baseDir: str) -> None:
2019-06-29 11:47:33 +00:00
"""Deletes all posts for a person
"""
2019-06-30 22:56:37 +00:00
outboxDir = createOutboxDir(username,domain,baseDir)
2019-06-29 11:47:33 +00:00
for deleteFilename in os.listdir(outboxDir):
filePath = os.path.join(outboxDir, deleteFilename)
try:
if os.path.isfile(filePath):
os.unlink(filePath)
elif os.path.isdir(filePath): shutil.rmtree(filePath)
except Exception as e:
print(e)
2019-06-29 22:29:18 +00:00
def getStatusNumber() -> (str,str):
"""Returns the status number and published date
2019-06-29 10:08:59 +00:00
"""
2019-06-28 18:55:29 +00:00
currTime=datetime.datetime.utcnow()
2019-06-29 10:08:59 +00:00
daysSinceEpoch=(currTime - datetime.datetime(1970,1,1)).days
2019-06-29 10:23:40 +00:00
# status is the number of seconds since epoch
2019-06-29 10:08:59 +00:00
statusNumber=str((daysSinceEpoch*24*60*60) + (currTime.hour*60*60) + (currTime.minute*60) + currTime.second)
2019-06-28 18:55:29 +00:00
published=currTime.strftime("%Y-%m-%dT%H:%M:%SZ")
conversationDate=currTime.strftime("%Y-%m-%d")
2019-06-29 22:29:18 +00:00
return statusNumber,published
2019-06-30 22:56:37 +00:00
def createPostBase(baseDir: str,username: str, domain: str, toUrl: str, ccUrl: str, https: bool, content: str, followersOnly: bool, saveToFile: bool, inReplyTo=None, inReplyToAtomUri=None, subject=None) -> {}:
2019-06-29 22:29:18 +00:00
"""Creates a public post
"""
prefix='https'
if not https:
prefix='http'
statusNumber,published = getStatusNumber()
conversationDate=published.split('T')[0]
2019-06-29 10:08:59 +00:00
conversationId=statusNumber
2019-06-28 18:55:29 +00:00
postTo='https://www.w3.org/ns/activitystreams#Public'
postCC=prefix+'://'+domain+'/users/'+username+'/followers'
if followersOnly:
postTo=postCC
postCC=''
2019-06-29 10:08:59 +00:00
newPostId=prefix+'://'+domain+'/users/'+username+'/statuses/'+statusNumber
2019-06-29 10:23:40 +00:00
sensitive=False
2019-06-30 21:20:02 +00:00
summary=None
2019-06-29 10:23:40 +00:00
if subject:
summary=subject
sensitive=True
2019-06-28 18:55:29 +00:00
newPost = {
2019-06-29 10:08:59 +00:00
'id': newPostId+'/activity',
2019-06-28 18:55:29 +00:00
'type': 'Create',
'actor': prefix+'://'+domain+'/users/'+username,
'published': published,
2019-06-30 10:14:02 +00:00
'to': [toUrl],
'cc': [ccUrl],
2019-06-29 10:08:59 +00:00
'object': {'id': newPostId,
2019-06-28 18:55:29 +00:00
'type': 'Note',
2019-06-29 10:23:40 +00:00
'summary': summary,
2019-06-29 10:41:22 +00:00
'inReplyTo': inReplyTo,
2019-06-28 18:55:29 +00:00
'published': published,
'url': prefix+'://'+domain+'/@'+username+'/'+statusNumber,
'attributedTo': prefix+'://'+domain+'/users/'+username,
2019-06-30 10:14:02 +00:00
'to': [toUrl],
'cc': [ccUrl],
2019-06-29 10:23:40 +00:00
'sensitive': sensitive,
2019-06-28 18:55:29 +00:00
'atomUri': prefix+'://'+domain+'/users/'+username+'/statuses/'+statusNumber,
2019-06-29 10:41:22 +00:00
'inReplyToAtomUri': inReplyToAtomUri,
2019-06-28 18:55:29 +00:00
'conversation': 'tag:'+domain+','+conversationDate+':objectId='+conversationId+':objectType=Conversation',
'content': content,
'contentMap': {
'en': content
},
'attachment': [],
'tag': [],
'replies': {}
# 'id': 'https://'+domain+'/users/'+username+'/statuses/'+statusNumber+'/replies',
# 'type': 'Collection',
# 'first': {
# 'type': 'CollectionPage',
# 'partOf': 'https://'+domain+'/users/'+username+'/statuses/'+statusNumber+'/replies',
# 'items': []
# }
#}
}
}
2019-06-29 10:08:59 +00:00
if saveToFile:
2019-06-30 22:56:37 +00:00
outboxDir = createOutboxDir(username,domain,baseDir)
2019-06-29 10:08:59 +00:00
filename=outboxDir+'/'+newPostId.replace('/','#')+'.json'
with open(filename, 'w') as fp:
commentjson.dump(newPost, fp, indent=4, sort_keys=False)
2019-06-28 18:55:29 +00:00
return newPost
2019-06-29 10:08:59 +00:00
2019-06-30 10:14:02 +00:00
def createPublicPost(username: str, domain: str, https: bool, content: str, followersOnly: bool, saveToFile: bool, inReplyTo=None, inReplyToAtomUri=None, subject=None) -> {}:
"""Public post to the outbox
"""
prefix='https'
if not https:
prefix='http'
return createPostBase(username, domain, 'https://www.w3.org/ns/activitystreams#Public', prefix+'://'+domain+'/users/'+username+'/followers', https, content, followersOnly, saveToFile, inReplyTo, inReplyToAtomUri, subject)
2019-06-30 22:56:37 +00:00
def threadSendPost(session,postJsonObject,federationList,inboxUrl: str,baseDir: str,signatureHeader,postLog) -> None:
2019-06-30 13:38:01 +00:00
"""Sends a post with exponential backoff
"""
2019-06-30 13:20:23 +00:00
tries=0
2019-06-30 13:38:01 +00:00
backoffTime=60
for attempt in range(20):
2019-06-30 15:03:26 +00:00
postResult = postJson(session,postJsonObject,federationList,inboxUrl,signatureHeader)
2019-06-30 13:38:01 +00:00
if postResult:
postLog.append(postJsonObject['published']+' '+postResult+'\n')
# keep the length of the log finite
# Don't accumulate massive files on systems with limited resources
while len(postLog)>64:
postlog.pop(0)
# save the log file
filename=baseDir+'/post.log'
with open(filename, "w") as logFile:
for line in postLog:
print(line, file=logFile)
# our work here is done
2019-06-30 13:20:23 +00:00
break
time.sleep(backoffTime)
backoffTime *= 2
2019-06-30 22:56:37 +00:00
def sendPost(session,baseDir,username: str, domain: str, port: int, toUsername: str, toDomain: str, toPort: int, cc: str, https: bool, content: str, followersOnly: bool, saveToFile: bool, federationList, sendThreads, postLog, inReplyTo=None, inReplyToAtomUri=None, subject=None) -> int:
2019-06-30 10:14:02 +00:00
"""Post to another inbox
"""
prefix='https'
if not https:
prefix='http'
2019-06-30 22:56:37 +00:00
if toPort!=80 and toPort!=443:
toDomain=toDomain+':'+str(toPort)
handle=prefix+'://'+toDomain+'/@'+toUsername
# lookup the inbox for the To handle
wfRequest = webfingerHandle(session,handle,https)
2019-06-30 10:14:02 +00:00
if not wfRequest:
return 1
2019-06-30 22:56:37 +00:00
# get the actor inbox for the To handle
2019-06-30 10:14:02 +00:00
inboxUrl,pubKey,toPersonId = getPersonBox(session,wfRequest,'inbox')
if not inboxUrl:
return 2
if not pubKey:
return 3
if not toPersonId:
return 4
postJsonObject=createPostBase(username, domain, toPersonId, cc, https, content, followersOnly, saveToFile, inReplyTo, inReplyToAtomUri, subject)
2019-06-30 22:56:37 +00:00
# get the senders private key
privateKeyPem=getPersonKey(username,domain,baseDir,'private')
2019-06-30 10:14:02 +00:00
if len(privateKeyPem)==0:
return 5
2019-06-30 11:07:39 +00:00
# construct the http header
2019-06-30 10:14:02 +00:00
signatureHeader = signPostHeaders(privateKeyPem, username, domain, '/inbox', https, postJsonObject)
2019-06-30 11:25:03 +00:00
signatureHeader['Content-type'] = 'application/json'
2019-06-30 10:14:02 +00:00
2019-06-30 13:20:23 +00:00
# Keep the number of threads being used small
2019-06-30 13:42:45 +00:00
while len(sendThreads)>10:
2019-06-30 15:03:26 +00:00
sendThreads[0].kill()
2019-06-30 13:38:01 +00:00
sendThreads.pop(0)
2019-06-30 22:56:37 +00:00
thr = threadWithTrace(target=threadSendPost,args=(session,postJsonObject.copy(),federationList,inboxUrl,baseDir,signatureHeader.copy(),postLog),daemon=True)
2019-06-30 13:20:23 +00:00
sendThreads.append(thr)
thr.start()
2019-06-30 10:14:02 +00:00
return 0
2019-06-30 22:56:37 +00:00
def createOutbox(baseDir: str,username: str,domain: str,port: int,https: bool,itemsPerPage: int,headerOnly: bool,pageNumber=None) -> {}:
2019-06-29 13:17:02 +00:00
"""Constructs the outbox feed
"""
2019-06-29 10:08:59 +00:00
prefix='https'
if not https:
prefix='http'
2019-06-30 19:01:43 +00:00
2019-06-30 22:56:37 +00:00
outboxDir = createOutboxDir(username,domain,baseDir)
2019-06-30 19:01:43 +00:00
if port!=80 and port!=443:
domain = domain+':'+str(port)
2019-06-29 16:47:37 +00:00
pageStr='?page=true'
if pageNumber:
try:
pageStr='?page='+str(pageNumber)
except:
pass
2019-06-29 10:08:59 +00:00
outboxHeader = {'@context': 'https://www.w3.org/ns/activitystreams',
'first': prefix+'://'+domain+'/users/'+username+'/outbox?page=true',
'id': prefix+'://'+domain+'/users/'+username+'/outbox',
2019-06-29 17:07:43 +00:00
'last': prefix+'://'+domain+'/users/'+username+'/outbox?page=true',
2019-06-29 13:17:02 +00:00
'totalItems': 0,
2019-06-29 10:08:59 +00:00
'type': 'OrderedCollection'}
outboxItems = {'@context': 'https://www.w3.org/ns/activitystreams',
2019-06-29 16:47:37 +00:00
'id': prefix+'://'+domain+'/users/'+username+'/outbox'+pageStr,
2019-06-29 10:08:59 +00:00
'orderedItems': [
],
'partOf': prefix+'://'+domain+'/users/'+username+'/outbox',
'type': 'OrderedCollectionPage'}
2019-06-29 13:17:02 +00:00
# counter for posts loop
2019-06-29 16:47:37 +00:00
postsOnPageCtr=0
2019-06-29 13:17:02 +00:00
# post filenames sorted in descending order
postsInOutbox=sorted(os.listdir(outboxDir), reverse=True)
# number of posts in outbox
outboxHeader['totalItems']=len(postsInOutbox)
prevPostFilename=None
2019-06-29 17:07:43 +00:00
if not pageNumber:
pageNumber=1
2019-06-29 13:17:02 +00:00
# Generate first and last entries within header
if len(postsInOutbox)>0:
2019-06-29 17:07:43 +00:00
lastPage=int(len(postsInOutbox)/itemsPerPage)
if lastPage<1:
lastPage=1
2019-06-29 13:17:02 +00:00
outboxHeader['last']= \
2019-06-29 17:07:43 +00:00
prefix+'://'+domain+'/users/'+username+'/outbox?page='+str(lastPage)
2019-06-29 13:17:02 +00:00
# Insert posts
2019-06-29 16:47:37 +00:00
currPage=1
postsCtr=0
2019-06-29 13:17:02 +00:00
for postFilename in postsInOutbox:
2019-06-29 16:47:37 +00:00
# Are we at the starting page yet?
if prevPostFilename and currPage==pageNumber and postsCtr==0:
# update the prev entry for the last message id
postId = prevPostFilename.split('#statuses#')[1].replace('#activity','')
outboxHeader['prev']= \
prefix+'://'+domain+'/users/'+username+'/outbox?min_id='+postId+'&page=true'
2019-06-29 13:23:46 +00:00
# get the full path of the post file
2019-06-29 13:17:02 +00:00
filePath = os.path.join(outboxDir, postFilename)
try:
if os.path.isfile(filePath):
2019-06-29 16:47:37 +00:00
if currPage == pageNumber and postsOnPageCtr <= itemsPerPage:
2019-06-29 13:23:46 +00:00
# get the post as json
2019-06-29 13:17:02 +00:00
with open(filePath, 'r') as fp:
p=commentjson.load(fp)
2019-06-29 13:23:46 +00:00
# insert it into the outbox feed
2019-06-29 16:47:37 +00:00
if postsOnPageCtr < itemsPerPage:
if not headerOnly:
outboxItems['orderedItems'].append(p)
elif postsOnPageCtr == itemsPerPage:
2019-06-29 13:23:46 +00:00
# if this is the last post update the next message ID
2019-06-29 13:17:02 +00:00
if '/statuses/' in p['id']:
postId = p['id'].split('/statuses/')[1].replace('/activity','')
outboxHeader['next']= \
prefix+'://'+domain+'/users/'+ \
username+'/outbox?max_id='+ \
postId+'&page=true'
2019-06-29 16:47:37 +00:00
postsOnPageCtr += 1
2019-06-29 13:23:46 +00:00
# remember the last post filename for use with prev
2019-06-29 13:17:02 +00:00
prevPostFilename = postFilename
2019-06-29 16:47:37 +00:00
if postsOnPageCtr > itemsPerPage:
2019-06-29 13:17:02 +00:00
break
2019-06-29 16:47:37 +00:00
# count the pages
postsCtr += 1
if postsCtr >= itemsPerPage:
postsCtr = 0
currPage += 1
2019-06-29 13:17:02 +00:00
except Exception as e:
print(e)
2019-06-29 16:47:37 +00:00
if headerOnly:
return outboxHeader
return outboxItems
2019-06-29 13:44:21 +00:00
2019-06-30 22:56:37 +00:00
def archivePosts(username: str,domain: str,baseDir: str,maxPostsInOutbox=256) -> None:
2019-06-29 13:44:21 +00:00
"""Retain a maximum number of posts within the outbox
Move any others to an archive directory
"""
2019-06-30 22:56:37 +00:00
outboxDir = createOutboxDir(username,domain,baseDir)
archiveDir = createOutboxArchive(username,domain,baseDir)
2019-06-29 13:44:21 +00:00
postsInOutbox=sorted(os.listdir(outboxDir), reverse=False)
noOfPosts=len(postsInOutbox)
if noOfPosts<=maxPostsInOutbox:
return
for postFilename in postsInOutbox:
filePath = os.path.join(outboxDir, postFilename)
if os.path.isfile(filePath):
archivePath = os.path.join(archiveDir, postFilename)
os.rename(filePath,archivePath)
# TODO: possibly archive any associated media files
noOfPosts -= 1
if noOfPosts <= maxPostsInOutbox:
break