2019-06-28 18:55:29 +00:00
|
|
|
__filename__ = "posts.py"
|
|
|
|
__author__ = "Bob Mottram"
|
|
|
|
__license__ = "AGPL3+"
|
|
|
|
__version__ = "0.0.1"
|
|
|
|
__maintainer__ = "Bob Mottram"
|
|
|
|
__email__ = "bob@freedombone.net"
|
|
|
|
__status__ = "Production"
|
|
|
|
|
|
|
|
import requests
|
|
|
|
import json
|
2019-06-29 10:08:59 +00:00
|
|
|
import commentjson
|
2019-06-28 18:55:29 +00:00
|
|
|
import html
|
2019-06-29 10:08:59 +00:00
|
|
|
import datetime
|
2019-06-29 11:47:33 +00:00
|
|
|
import os, shutil
|
2019-06-29 10:08:59 +00:00
|
|
|
from pprint import pprint
|
2019-06-28 18:55:29 +00:00
|
|
|
from random import randint
|
|
|
|
from session import getJson
|
|
|
|
try:
|
|
|
|
from BeautifulSoup import BeautifulSoup
|
|
|
|
except ImportError:
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
2019-06-28 20:43:37 +00:00
|
|
|
def permitted(url: str,federationList) -> bool:
|
2019-06-28 18:55:29 +00:00
|
|
|
"""Is a url from one of the permitted domains?
|
|
|
|
"""
|
2019-06-28 20:43:37 +00:00
|
|
|
for domain in federationList:
|
2019-06-28 18:55:29 +00:00
|
|
|
if domain in url:
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
def cleanHtml(rawHtml: str) -> str:
|
|
|
|
text = BeautifulSoup(rawHtml, 'html.parser').get_text()
|
|
|
|
return html.unescape(text)
|
|
|
|
|
|
|
|
def getUserUrl(wfRequest) -> str:
|
|
|
|
if wfRequest.get('links'):
|
|
|
|
for link in wfRequest['links']:
|
|
|
|
if link.get('type') and link.get('href'):
|
|
|
|
if link['type'] == 'application/activity+json':
|
|
|
|
return link['href']
|
|
|
|
return None
|
|
|
|
|
2019-06-28 19:36:39 +00:00
|
|
|
def parseUserFeed(session,feedUrl,asHeader) -> None:
|
2019-06-29 10:08:59 +00:00
|
|
|
feedJson = getJson(session,feedUrl,asHeader,None)
|
|
|
|
pprint(feedJson)
|
2019-06-28 18:55:29 +00:00
|
|
|
|
2019-06-29 10:08:59 +00:00
|
|
|
if 'orderedItems' in feedJson:
|
2019-06-29 10:59:16 +00:00
|
|
|
for item in feedJson['orderedItems']:
|
2019-06-28 18:55:29 +00:00
|
|
|
yield item
|
|
|
|
|
|
|
|
nextUrl = None
|
2019-06-29 10:08:59 +00:00
|
|
|
if 'first' in feedJson:
|
2019-06-29 10:59:16 +00:00
|
|
|
nextUrl = feedJson['first']
|
2019-06-29 10:08:59 +00:00
|
|
|
elif 'next' in feedJson:
|
|
|
|
nextUrl = feedJson['next']
|
2019-06-28 18:55:29 +00:00
|
|
|
|
|
|
|
if nextUrl:
|
2019-06-28 19:36:39 +00:00
|
|
|
for item in parseUserFeed(session,nextUrl,asHeader):
|
2019-06-28 18:55:29 +00:00
|
|
|
yield item
|
|
|
|
|
2019-06-28 20:43:37 +00:00
|
|
|
def getUserPosts(session,wfRequest,maxPosts,maxMentions,maxEmoji,maxAttachments,federationList) -> {}:
|
2019-06-28 18:55:29 +00:00
|
|
|
userPosts={}
|
|
|
|
asHeader = {'Accept': 'application/ld+json; profile="https://www.w3.org/ns/activitystreams"'}
|
|
|
|
userUrl = getUserUrl(wfRequest)
|
|
|
|
if not userUrl:
|
|
|
|
return userPosts
|
|
|
|
userJson = getJson(session,userUrl,asHeader,None)
|
|
|
|
if not userJson.get('outbox'):
|
|
|
|
return userPosts
|
|
|
|
feedUrl = userJson['outbox']
|
|
|
|
|
|
|
|
i = 0
|
2019-06-28 19:36:39 +00:00
|
|
|
for item in parseUserFeed(session,feedUrl,asHeader):
|
2019-06-28 18:55:29 +00:00
|
|
|
if not item.get('type'):
|
|
|
|
continue
|
|
|
|
if item['type'] != 'Create':
|
|
|
|
continue
|
|
|
|
if not item.get('object'):
|
|
|
|
continue
|
|
|
|
published = item['object']['published']
|
|
|
|
if not userPosts.get(published):
|
|
|
|
content = item['object']['content']
|
|
|
|
|
|
|
|
mentions=[]
|
|
|
|
emoji={}
|
|
|
|
if item['object'].get('tag'):
|
|
|
|
for tagItem in item['object']['tag']:
|
|
|
|
tagType=tagItem['type'].lower()
|
|
|
|
if tagType=='emoji':
|
|
|
|
if tagItem.get('name') and tagItem.get('icon'):
|
|
|
|
if tagItem['icon'].get('url'):
|
|
|
|
# No emoji from non-permitted domains
|
2019-06-28 20:43:37 +00:00
|
|
|
if permitted(tagItem['icon']['url'],federationList):
|
2019-06-28 18:55:29 +00:00
|
|
|
emojiName=tagItem['name']
|
|
|
|
emojiIcon=tagItem['icon']['url']
|
|
|
|
emoji[emojiName]=emojiIcon
|
|
|
|
if tagType=='mention':
|
|
|
|
if tagItem.get('name'):
|
|
|
|
if tagItem['name'] not in mentions:
|
|
|
|
mentions.append(tagItem['name'])
|
|
|
|
if len(mentions)>maxMentions:
|
|
|
|
continue
|
|
|
|
if len(emoji)>maxEmoji:
|
|
|
|
continue
|
|
|
|
|
|
|
|
summary = ''
|
|
|
|
if item['object'].get('summary'):
|
|
|
|
if item['object']['summary']:
|
|
|
|
summary = item['object']['summary']
|
|
|
|
|
|
|
|
inReplyTo = ''
|
|
|
|
if item['object'].get('inReplyTo'):
|
|
|
|
if item['object']['inReplyTo']:
|
|
|
|
# No replies to non-permitted domains
|
2019-06-28 20:43:37 +00:00
|
|
|
if not permitted(item['object']['inReplyTo'],federationList):
|
2019-06-28 18:55:29 +00:00
|
|
|
continue
|
|
|
|
inReplyTo = item['object']['inReplyTo']
|
|
|
|
|
|
|
|
conversation = ''
|
|
|
|
if item['object'].get('conversation'):
|
|
|
|
if item['object']['conversation']:
|
|
|
|
# no conversations originated in non-permitted domains
|
2019-06-28 20:43:37 +00:00
|
|
|
if permitted(item['object']['conversation'],federationList):
|
2019-06-28 18:55:29 +00:00
|
|
|
conversation = item['object']['conversation']
|
|
|
|
|
|
|
|
attachment = []
|
|
|
|
if item['object'].get('attachment'):
|
|
|
|
if item['object']['attachment']:
|
|
|
|
for attach in item['object']['attachment']:
|
|
|
|
if attach.get('name') and attach.get('url'):
|
|
|
|
# no attachments from non-permitted domains
|
2019-06-28 20:43:37 +00:00
|
|
|
if permitted(attach['url'],federationList):
|
2019-06-28 18:55:29 +00:00
|
|
|
attachment.append([attach['name'],attach['url']])
|
|
|
|
|
|
|
|
sensitive = False
|
|
|
|
if item['object'].get('sensitive'):
|
|
|
|
sensitive = item['object']['sensitive']
|
|
|
|
|
|
|
|
userPosts[published] = {
|
|
|
|
"sensitive": sensitive,
|
|
|
|
"inreplyto": inReplyTo,
|
|
|
|
"summary": summary,
|
|
|
|
"html": content,
|
|
|
|
"plaintext": cleanHtml(content),
|
|
|
|
"attachment": attachment,
|
|
|
|
"mentions": mentions,
|
|
|
|
"emoji": emoji,
|
|
|
|
"conversation": conversation
|
|
|
|
}
|
|
|
|
#print(str(item)+'\n')
|
|
|
|
i += 1
|
|
|
|
|
|
|
|
if i == maxPosts:
|
|
|
|
break
|
|
|
|
return userPosts
|
|
|
|
|
2019-06-29 13:26:55 +00:00
|
|
|
def createOutboxDir(username: str,domain: str) -> str:
|
2019-06-29 10:08:59 +00:00
|
|
|
"""Create an outbox for a person and returns the feed filename and directory
|
|
|
|
"""
|
|
|
|
handle=username.lower()+'@'+domain.lower()
|
|
|
|
baseDir=os.getcwd()
|
|
|
|
if not os.path.isdir(baseDir+'/accounts/'+handle):
|
|
|
|
os.mkdir(baseDir+'/accounts/'+handle)
|
|
|
|
outboxDir=baseDir+'/accounts/'+handle+'/outbox'
|
|
|
|
if not os.path.isdir(outboxDir):
|
|
|
|
os.mkdir(outboxDir)
|
2019-06-29 13:26:55 +00:00
|
|
|
return outboxDir
|
2019-06-29 10:08:59 +00:00
|
|
|
|
2019-06-29 13:44:21 +00:00
|
|
|
def createOutboxArchive(username: str,domain: str) -> str:
|
|
|
|
"""Creates an archive directory for outbox posts
|
|
|
|
"""
|
|
|
|
handle=username.lower()+'@'+domain.lower()
|
|
|
|
baseDir=os.getcwd()
|
|
|
|
if not os.path.isdir(baseDir+'/accounts/'+handle):
|
|
|
|
os.mkdir(baseDir+'/accounts/'+handle)
|
|
|
|
outboxArchiveDir=baseDir+'/accounts/'+handle+'/outboxarchive'
|
|
|
|
if not os.path.isdir(outboxArchiveDir):
|
|
|
|
os.mkdir(outboxArchiveDir)
|
|
|
|
return outboxArchiveDir
|
|
|
|
|
2019-06-29 11:47:33 +00:00
|
|
|
def deleteAllPosts(username: str, domain: str) -> None:
|
|
|
|
"""Deletes all posts for a person
|
|
|
|
"""
|
2019-06-29 13:26:55 +00:00
|
|
|
outboxDir = createOutboxDir(username,domain)
|
2019-06-29 11:47:33 +00:00
|
|
|
for deleteFilename in os.listdir(outboxDir):
|
|
|
|
filePath = os.path.join(outboxDir, deleteFilename)
|
|
|
|
try:
|
|
|
|
if os.path.isfile(filePath):
|
|
|
|
os.unlink(filePath)
|
|
|
|
elif os.path.isdir(filePath): shutil.rmtree(filePath)
|
|
|
|
except Exception as e:
|
|
|
|
print(e)
|
|
|
|
# TODO update output feed
|
|
|
|
|
2019-06-29 10:41:22 +00:00
|
|
|
def createPublicPost(username: str, domain: str, https: bool, content: str, followersOnly: bool, saveToFile: bool, inReplyTo=None, inReplyToAtomUri=None, subject=None) -> {}:
|
2019-06-29 11:47:33 +00:00
|
|
|
"""Creates a public post
|
2019-06-29 10:08:59 +00:00
|
|
|
"""
|
2019-06-28 18:55:29 +00:00
|
|
|
prefix='https'
|
|
|
|
if not https:
|
|
|
|
prefix='http'
|
|
|
|
currTime=datetime.datetime.utcnow()
|
2019-06-29 10:08:59 +00:00
|
|
|
daysSinceEpoch=(currTime - datetime.datetime(1970,1,1)).days
|
2019-06-29 10:23:40 +00:00
|
|
|
# status is the number of seconds since epoch
|
2019-06-29 10:08:59 +00:00
|
|
|
statusNumber=str((daysSinceEpoch*24*60*60) + (currTime.hour*60*60) + (currTime.minute*60) + currTime.second)
|
2019-06-28 18:55:29 +00:00
|
|
|
published=currTime.strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
|
|
conversationDate=currTime.strftime("%Y-%m-%d")
|
2019-06-29 10:08:59 +00:00
|
|
|
conversationId=statusNumber
|
2019-06-28 18:55:29 +00:00
|
|
|
postTo='https://www.w3.org/ns/activitystreams#Public'
|
|
|
|
postCC=prefix+'://'+domain+'/users/'+username+'/followers'
|
|
|
|
if followersOnly:
|
|
|
|
postTo=postCC
|
|
|
|
postCC=''
|
2019-06-29 10:08:59 +00:00
|
|
|
newPostId=prefix+'://'+domain+'/users/'+username+'/statuses/'+statusNumber
|
2019-06-29 10:23:40 +00:00
|
|
|
sensitive=False
|
|
|
|
if subject:
|
|
|
|
summary=subject
|
|
|
|
sensitive=True
|
2019-06-28 18:55:29 +00:00
|
|
|
newPost = {
|
2019-06-29 10:08:59 +00:00
|
|
|
'id': newPostId+'/activity',
|
2019-06-28 18:55:29 +00:00
|
|
|
'type': 'Create',
|
|
|
|
'actor': prefix+'://'+domain+'/users/'+username,
|
|
|
|
'published': published,
|
|
|
|
'to': ['https://www.w3.org/ns/activitystreams#Public'],
|
|
|
|
'cc': [prefix+'://'+domain+'/users/'+username+'/followers'],
|
2019-06-29 10:08:59 +00:00
|
|
|
'object': {'id': newPostId,
|
2019-06-28 18:55:29 +00:00
|
|
|
'type': 'Note',
|
2019-06-29 10:23:40 +00:00
|
|
|
'summary': summary,
|
2019-06-29 10:41:22 +00:00
|
|
|
'inReplyTo': inReplyTo,
|
2019-06-28 18:55:29 +00:00
|
|
|
'published': published,
|
|
|
|
'url': prefix+'://'+domain+'/@'+username+'/'+statusNumber,
|
|
|
|
'attributedTo': prefix+'://'+domain+'/users/'+username,
|
|
|
|
'to': ['https://www.w3.org/ns/activitystreams#Public'],
|
|
|
|
'cc': [prefix+'://'+domain+'/users/'+username+'/followers'],
|
2019-06-29 10:23:40 +00:00
|
|
|
'sensitive': sensitive,
|
2019-06-28 18:55:29 +00:00
|
|
|
'atomUri': prefix+'://'+domain+'/users/'+username+'/statuses/'+statusNumber,
|
2019-06-29 10:41:22 +00:00
|
|
|
'inReplyToAtomUri': inReplyToAtomUri,
|
2019-06-28 18:55:29 +00:00
|
|
|
'conversation': 'tag:'+domain+','+conversationDate+':objectId='+conversationId+':objectType=Conversation',
|
|
|
|
'content': content,
|
|
|
|
'contentMap': {
|
|
|
|
'en': content
|
|
|
|
},
|
|
|
|
'attachment': [],
|
|
|
|
'tag': [],
|
|
|
|
'replies': {}
|
|
|
|
# 'id': 'https://'+domain+'/users/'+username+'/statuses/'+statusNumber+'/replies',
|
|
|
|
# 'type': 'Collection',
|
|
|
|
# 'first': {
|
|
|
|
# 'type': 'CollectionPage',
|
|
|
|
# 'partOf': 'https://'+domain+'/users/'+username+'/statuses/'+statusNumber+'/replies',
|
|
|
|
# 'items': []
|
|
|
|
# }
|
|
|
|
#}
|
|
|
|
}
|
|
|
|
}
|
2019-06-29 10:08:59 +00:00
|
|
|
if saveToFile:
|
2019-06-29 13:26:55 +00:00
|
|
|
outboxDir = createOutboxDir(username,domain)
|
2019-06-29 10:08:59 +00:00
|
|
|
filename=outboxDir+'/'+newPostId.replace('/','#')+'.json'
|
|
|
|
with open(filename, 'w') as fp:
|
|
|
|
commentjson.dump(newPost, fp, indent=4, sort_keys=False)
|
2019-06-28 18:55:29 +00:00
|
|
|
return newPost
|
2019-06-29 10:08:59 +00:00
|
|
|
|
2019-06-29 16:47:37 +00:00
|
|
|
def createOutbox(username: str,domain: str,https: bool,itemsPerPage: int,headerOnly: bool,pageNumber=None) -> {}:
|
2019-06-29 13:17:02 +00:00
|
|
|
"""Constructs the outbox feed
|
|
|
|
"""
|
2019-06-29 10:08:59 +00:00
|
|
|
prefix='https'
|
|
|
|
if not https:
|
|
|
|
prefix='http'
|
2019-06-29 13:26:55 +00:00
|
|
|
outboxDir = createOutboxDir(username,domain)
|
2019-06-29 16:47:37 +00:00
|
|
|
pageStr='?page=true'
|
|
|
|
if pageNumber:
|
|
|
|
try:
|
|
|
|
pageStr='?page='+str(pageNumber)
|
|
|
|
except:
|
|
|
|
pass
|
2019-06-29 10:08:59 +00:00
|
|
|
outboxHeader = {'@context': 'https://www.w3.org/ns/activitystreams',
|
|
|
|
'first': prefix+'://'+domain+'/users/'+username+'/outbox?page=true',
|
|
|
|
'id': prefix+'://'+domain+'/users/'+username+'/outbox',
|
2019-06-29 17:07:43 +00:00
|
|
|
'last': prefix+'://'+domain+'/users/'+username+'/outbox?page=true',
|
2019-06-29 13:17:02 +00:00
|
|
|
'totalItems': 0,
|
2019-06-29 10:08:59 +00:00
|
|
|
'type': 'OrderedCollection'}
|
|
|
|
outboxItems = {'@context': 'https://www.w3.org/ns/activitystreams',
|
2019-06-29 16:47:37 +00:00
|
|
|
'id': prefix+'://'+domain+'/users/'+username+'/outbox'+pageStr,
|
2019-06-29 10:08:59 +00:00
|
|
|
'orderedItems': [
|
|
|
|
],
|
|
|
|
'partOf': prefix+'://'+domain+'/users/'+username+'/outbox',
|
|
|
|
'type': 'OrderedCollectionPage'}
|
2019-06-29 13:17:02 +00:00
|
|
|
|
|
|
|
# counter for posts loop
|
2019-06-29 16:47:37 +00:00
|
|
|
postsOnPageCtr=0
|
2019-06-29 13:17:02 +00:00
|
|
|
|
|
|
|
# post filenames sorted in descending order
|
|
|
|
postsInOutbox=sorted(os.listdir(outboxDir), reverse=True)
|
|
|
|
|
|
|
|
# number of posts in outbox
|
|
|
|
outboxHeader['totalItems']=len(postsInOutbox)
|
|
|
|
prevPostFilename=None
|
|
|
|
|
2019-06-29 17:07:43 +00:00
|
|
|
if not pageNumber:
|
|
|
|
pageNumber=1
|
|
|
|
|
2019-06-29 13:17:02 +00:00
|
|
|
# Generate first and last entries within header
|
|
|
|
if len(postsInOutbox)>0:
|
2019-06-29 17:07:43 +00:00
|
|
|
lastPage=int(len(postsInOutbox)/itemsPerPage)
|
|
|
|
if lastPage<1:
|
|
|
|
lastPage=1
|
2019-06-29 13:17:02 +00:00
|
|
|
outboxHeader['last']= \
|
2019-06-29 17:07:43 +00:00
|
|
|
prefix+'://'+domain+'/users/'+username+'/outbox?page='+str(lastPage)
|
2019-06-29 13:17:02 +00:00
|
|
|
|
|
|
|
# Insert posts
|
2019-06-29 16:47:37 +00:00
|
|
|
currPage=1
|
|
|
|
postsCtr=0
|
2019-06-29 13:17:02 +00:00
|
|
|
for postFilename in postsInOutbox:
|
2019-06-29 16:47:37 +00:00
|
|
|
# Are we at the starting page yet?
|
|
|
|
if prevPostFilename and currPage==pageNumber and postsCtr==0:
|
|
|
|
# update the prev entry for the last message id
|
|
|
|
postId = prevPostFilename.split('#statuses#')[1].replace('#activity','')
|
|
|
|
outboxHeader['prev']= \
|
|
|
|
prefix+'://'+domain+'/users/'+username+'/outbox?min_id='+postId+'&page=true'
|
2019-06-29 13:23:46 +00:00
|
|
|
# get the full path of the post file
|
2019-06-29 13:17:02 +00:00
|
|
|
filePath = os.path.join(outboxDir, postFilename)
|
|
|
|
try:
|
|
|
|
if os.path.isfile(filePath):
|
2019-06-29 16:47:37 +00:00
|
|
|
if currPage == pageNumber and postsOnPageCtr <= itemsPerPage:
|
2019-06-29 13:23:46 +00:00
|
|
|
# get the post as json
|
2019-06-29 13:17:02 +00:00
|
|
|
with open(filePath, 'r') as fp:
|
|
|
|
p=commentjson.load(fp)
|
2019-06-29 13:23:46 +00:00
|
|
|
# insert it into the outbox feed
|
2019-06-29 16:47:37 +00:00
|
|
|
if postsOnPageCtr < itemsPerPage:
|
|
|
|
if not headerOnly:
|
|
|
|
outboxItems['orderedItems'].append(p)
|
|
|
|
elif postsOnPageCtr == itemsPerPage:
|
2019-06-29 13:23:46 +00:00
|
|
|
# if this is the last post update the next message ID
|
2019-06-29 13:17:02 +00:00
|
|
|
if '/statuses/' in p['id']:
|
|
|
|
postId = p['id'].split('/statuses/')[1].replace('/activity','')
|
|
|
|
outboxHeader['next']= \
|
|
|
|
prefix+'://'+domain+'/users/'+ \
|
|
|
|
username+'/outbox?max_id='+ \
|
|
|
|
postId+'&page=true'
|
2019-06-29 16:47:37 +00:00
|
|
|
postsOnPageCtr += 1
|
2019-06-29 13:23:46 +00:00
|
|
|
# remember the last post filename for use with prev
|
2019-06-29 13:17:02 +00:00
|
|
|
prevPostFilename = postFilename
|
2019-06-29 16:47:37 +00:00
|
|
|
if postsOnPageCtr > itemsPerPage:
|
2019-06-29 13:17:02 +00:00
|
|
|
break
|
2019-06-29 16:47:37 +00:00
|
|
|
# count the pages
|
|
|
|
postsCtr += 1
|
|
|
|
if postsCtr >= itemsPerPage:
|
|
|
|
postsCtr = 0
|
|
|
|
currPage += 1
|
2019-06-29 13:17:02 +00:00
|
|
|
except Exception as e:
|
|
|
|
print(e)
|
2019-06-29 16:47:37 +00:00
|
|
|
if headerOnly:
|
|
|
|
return outboxHeader
|
|
|
|
return outboxItems
|
2019-06-29 13:44:21 +00:00
|
|
|
|
|
|
|
def archivePosts(username: str,domain: str,maxPostsInOutbox=256) -> None:
|
|
|
|
"""Retain a maximum number of posts within the outbox
|
|
|
|
Move any others to an archive directory
|
|
|
|
"""
|
|
|
|
outboxDir = createOutboxDir(username,domain)
|
|
|
|
archiveDir = createOutboxArchive(username,domain)
|
|
|
|
postsInOutbox=sorted(os.listdir(outboxDir), reverse=False)
|
|
|
|
noOfPosts=len(postsInOutbox)
|
|
|
|
if noOfPosts<=maxPostsInOutbox:
|
|
|
|
return
|
|
|
|
|
|
|
|
for postFilename in postsInOutbox:
|
|
|
|
filePath = os.path.join(outboxDir, postFilename)
|
|
|
|
if os.path.isfile(filePath):
|
|
|
|
archivePath = os.path.join(archiveDir, postFilename)
|
|
|
|
os.rename(filePath,archivePath)
|
|
|
|
# TODO: possibly archive any associated media files
|
|
|
|
noOfPosts -= 1
|
|
|
|
if noOfPosts <= maxPostsInOutbox:
|
|
|
|
break
|