epicyon/utils.py

805 lines
28 KiB
Python
Raw Normal View History

2020-04-04 13:44:49 +00:00
__filename__ = "utils.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
__version__ = "1.1.0"
__maintainer__ = "Bob Mottram"
__email__ = "bob@freedombone.net"
__status__ = "Production"
2019-07-02 09:25:29 +00:00
import os
2019-10-11 18:03:58 +00:00
import time
2019-09-29 18:48:34 +00:00
import shutil
2019-07-02 09:25:29 +00:00
import datetime
2019-11-23 10:20:30 +00:00
import json
2020-02-22 16:00:27 +00:00
from calendar import monthrange
2019-07-02 09:25:29 +00:00
2020-04-04 13:44:49 +00:00
def removeAvatarFromCache(baseDir: str, actorStr: str) -> None:
"""Removes any existing avatar entries from the cache
This avoids duplicate entries with differing extensions
"""
2020-04-04 13:44:49 +00:00
avatarFilenameExtensions = ('png', 'jpg', 'gif', 'webp')
for extension in avatarFilenameExtensions:
2020-04-04 13:44:49 +00:00
avatarFilename = \
baseDir + '/cache/avatars/' + actorStr + '.' + extension
if os.path.isfile(avatarFilename):
os.remove(avatarFilename)
2020-04-04 13:44:49 +00:00
def saveJson(jsonObject: {}, filename: str) -> bool:
2019-10-22 11:55:06 +00:00
"""Saves json to a file
"""
2020-04-04 13:44:49 +00:00
tries = 0
while tries < 5:
2019-10-22 11:55:06 +00:00
try:
with open(filename, 'w') as fp:
2019-11-23 10:20:30 +00:00
fp.write(json.dumps(jsonObject))
2019-10-22 11:55:06 +00:00
return True
2020-04-04 13:44:49 +00:00
except BaseException:
print('WARN: saveJson ' + str(tries))
2019-10-22 11:55:06 +00:00
time.sleep(1)
2020-04-04 13:44:49 +00:00
tries += 1
2019-10-22 11:55:06 +00:00
return False
2020-04-04 13:44:49 +00:00
def loadJson(filename: str, delaySec=2) -> {}:
2019-10-22 11:55:06 +00:00
"""Makes a few attempts to load a json formatted file
"""
2020-04-04 13:44:49 +00:00
jsonObject = None
tries = 0
while tries < 5:
2019-10-22 11:55:06 +00:00
try:
with open(filename, 'r') as fp:
2020-04-04 13:44:49 +00:00
data = fp.read()
jsonObject = json.loads(data)
2019-10-22 11:55:06 +00:00
break
2020-04-04 13:44:49 +00:00
except BaseException:
2019-10-26 13:01:32 +00:00
print('WARN: loadJson exception')
2020-04-04 13:44:49 +00:00
if delaySec > 0:
time.sleep(delaySec)
2020-04-04 13:44:49 +00:00
tries += 1
2019-10-22 11:55:06 +00:00
return jsonObject
2020-04-04 13:44:49 +00:00
def loadJsonOnionify(filename: str, domain: str, onionDomain: str,
delaySec=2) -> {}:
2020-03-02 14:35:44 +00:00
"""Makes a few attempts to load a json formatted file
This also converts the domain name to the onion domain
"""
2020-04-04 13:44:49 +00:00
jsonObject = None
tries = 0
while tries < 5:
2020-03-02 14:35:44 +00:00
try:
with open(filename, 'r') as fp:
2020-04-04 13:44:49 +00:00
data = fp.read()
2020-03-02 14:35:44 +00:00
if data:
2020-04-04 13:44:49 +00:00
data = data.replace(domain, onionDomain)
data = data.replace('https:', 'http:')
print('*****data: ' + data)
jsonObject = json.loads(data)
2020-03-02 14:35:44 +00:00
break
2020-04-04 13:44:49 +00:00
except BaseException:
2020-03-02 14:35:44 +00:00
print('WARN: loadJson exception')
2020-04-04 13:44:49 +00:00
if delaySec > 0:
2020-03-02 14:35:44 +00:00
time.sleep(delaySec)
2020-04-04 13:44:49 +00:00
tries += 1
2020-03-02 14:35:44 +00:00
return jsonObject
2020-04-04 13:44:49 +00:00
def getStatusNumber() -> (str, str):
2019-07-02 09:25:29 +00:00
"""Returns the status number and published date
"""
2020-04-04 13:44:49 +00:00
currTime = datetime.datetime.utcnow()
daysSinceEpoch = (currTime - datetime.datetime(1970, 1, 1)).days
2019-07-02 09:25:29 +00:00
# status is the number of seconds since epoch
2020-04-04 13:44:49 +00:00
statusNumber = \
str(((daysSinceEpoch * 24 * 60 * 60) +
(currTime.hour * 60 * 60) +
(currTime.minute * 60) +
currTime.second) * 1000 +
int(currTime.microsecond / 1000))
# See https://github.com/tootsuite/mastodon/blob/
# 995f8b389a66ab76ec92d9a240de376f1fc13a38/lib/mastodon/snowflake.rb
2019-10-12 12:45:53 +00:00
# use the leftover microseconds as the sequence number
2020-04-04 13:44:49 +00:00
sequenceId = currTime.microsecond % 1000
2019-10-12 12:45:53 +00:00
# shift by 16bits "sequence data"
2020-04-04 13:44:49 +00:00
statusNumber = str((int(statusNumber) << 16) + sequenceId)
published = currTime.strftime("%Y-%m-%dT%H:%M:%SZ")
return statusNumber, published
2019-07-02 09:25:29 +00:00
2020-03-28 10:33:04 +00:00
def evilIncarnate() -> []:
2020-04-04 13:44:49 +00:00
return ('gab.com', 'gabfed.com', 'spinster.xyz',
'kiwifarms.cc', 'djitter.com')
2020-03-28 10:33:04 +00:00
2019-09-09 15:53:23 +00:00
def isEvil(domain: str) -> bool:
2020-01-17 23:19:17 +00:00
if not isinstance(domain, str):
2020-04-04 13:44:49 +00:00
print('WARN: Malformed domain ' + str(domain))
2020-01-17 23:19:17 +00:00
return True
2019-09-09 15:53:23 +00:00
# https://www.youtube.com/watch?v=5qw1hcevmdU
2020-04-04 13:44:49 +00:00
evilDomains = evilIncarnate()
2019-09-09 15:53:23 +00:00
for concentratedEvil in evilDomains:
if domain.endswith(concentratedEvil):
return True
return False
2020-04-04 13:44:49 +00:00
def createPersonDir(nickname: str, domain: str, baseDir: str,
dirname: str) -> str:
2019-07-04 10:02:56 +00:00
"""Create a directory for a person
2019-07-02 09:25:29 +00:00
"""
2020-04-04 13:44:49 +00:00
handle = nickname + '@' + domain
if not os.path.isdir(baseDir + '/accounts/' + handle):
os.mkdir(baseDir + '/accounts/' + handle)
boxDir = baseDir + '/accounts/' + handle + '/' + dirname
2019-07-04 10:02:56 +00:00
if not os.path.isdir(boxDir):
os.mkdir(boxDir)
return boxDir
2020-04-04 13:44:49 +00:00
def createOutboxDir(nickname: str, domain: str, baseDir: str) -> str:
2019-07-04 10:02:56 +00:00
"""Create an outbox for a person
"""
2020-04-04 13:44:49 +00:00
return createPersonDir(nickname, domain, baseDir, 'outbox')
2019-07-04 10:02:56 +00:00
2020-04-04 13:44:49 +00:00
def createInboxQueueDir(nickname: str, domain: str, baseDir: str) -> str:
2019-07-04 10:02:56 +00:00
"""Create an inbox queue and returns the feed filename and directory
"""
2020-04-04 13:44:49 +00:00
return createPersonDir(nickname, domain, baseDir, 'queue')
2019-07-02 10:39:55 +00:00
def domainPermitted(domain: str, federationList: []):
2020-04-04 13:44:49 +00:00
if len(federationList) == 0:
2019-07-02 10:39:55 +00:00
return True
2019-07-11 12:29:31 +00:00
if ':' in domain:
2020-04-04 13:44:49 +00:00
domain = domain.split(':')[0]
2019-07-02 10:39:55 +00:00
if domain in federationList:
return True
return False
2020-04-04 13:44:49 +00:00
def urlPermitted(url: str, federationList: [], capability: str):
2019-09-09 15:53:23 +00:00
if isEvil(url):
return False
2019-11-16 12:14:14 +00:00
if not federationList:
2019-07-02 10:39:55 +00:00
return True
for domain in federationList:
if domain in url:
return True
return False
2019-07-06 15:17:21 +00:00
2020-04-04 13:44:49 +00:00
def getDisplayName(baseDir: str, actor: str, personCache: {}) -> str:
"""Returns the display name for the given actor
2019-08-22 12:41:16 +00:00
"""
if '/statuses/' in actor:
2020-04-04 13:44:49 +00:00
actor = actor.split('/statuses/')[0]
2019-08-22 13:29:57 +00:00
if not personCache.get(actor):
return None
2019-08-22 12:56:33 +00:00
if personCache[actor].get('actor'):
if personCache[actor]['actor'].get('name'):
return personCache[actor]['actor']['name']
else:
# Try to obtain from the cached actors
2020-04-04 13:44:49 +00:00
cachedActorFilename = \
baseDir + '/cache/actors/' + (actor.replace('/', '#')) + '.json'
if os.path.isfile(cachedActorFilename):
2020-04-04 13:44:49 +00:00
actorJson = loadJson(cachedActorFilename, 1)
if actorJson:
if actorJson.get('name'):
return(actorJson['name'])
2019-08-22 12:41:16 +00:00
return None
2020-04-04 13:44:49 +00:00
2019-07-06 15:17:21 +00:00
def getNicknameFromActor(actor: str) -> str:
"""Returns the nickname from an actor url
"""
if '/users/' not in actor:
if '/profile/' in actor:
2020-04-04 13:44:49 +00:00
nickStr = actor.split('/profile/')[1].replace('@', '')
if '/' not in nickStr:
return nickStr
else:
return nickStr.split('/')[0]
2019-10-17 22:26:47 +00:00
if '/channel/' in actor:
2020-04-04 13:44:49 +00:00
nickStr = actor.split('/channel/')[1].replace('@', '')
2019-10-17 22:26:47 +00:00
if '/' not in nickStr:
return nickStr
else:
return nickStr.split('/')[0]
2019-08-21 16:23:06 +00:00
# https://domain/@nick
if '/@' in actor:
2020-04-04 13:44:49 +00:00
nickStr = actor.split('/@')[1]
2019-08-21 16:23:06 +00:00
if '/' in nickStr:
2020-04-04 13:44:49 +00:00
nickStr = nickStr.split('/')[0]
2019-08-21 16:23:06 +00:00
return nickStr
2019-07-06 15:17:21 +00:00
return None
2020-04-04 13:44:49 +00:00
nickStr = actor.split('/users/')[1].replace('@', '')
2019-07-10 09:47:07 +00:00
if '/' not in nickStr:
return nickStr
else:
return nickStr.split('/')[0]
2019-07-06 15:17:21 +00:00
2020-04-04 13:44:49 +00:00
def getDomainFromActor(actor: str) -> (str, int):
2019-07-06 15:17:21 +00:00
"""Returns the domain name from an actor url
"""
2020-04-04 13:44:49 +00:00
port = None
if '/profile/' in actor:
2020-04-04 13:44:49 +00:00
domain = actor.split('/profile/')[0].replace('https://', '')
domain = domain.replace('http://', '').replace('i2p://', '')
2020-05-17 09:37:59 +00:00
domain = domain.replace('dat://', '').replace('hyper://', '')
2019-07-06 15:17:21 +00:00
else:
2019-10-17 22:26:47 +00:00
if '/channel/' in actor:
2020-04-04 13:44:49 +00:00
domain = actor.split('/channel/')[0].replace('https://', '')
domain = domain.replace('http://', '').replace('i2p://', '')
2020-05-17 09:37:59 +00:00
domain = domain.replace('dat://', '').replace('hyper://', '')
else:
2019-10-17 22:26:47 +00:00
if '/users/' not in actor:
2020-04-04 13:44:49 +00:00
domain = actor.replace('https://', '').replace('http://', '')
2020-05-17 09:37:59 +00:00
domain = domain.replace('i2p://', '')
domain = domain.replace('dat://', '').replace('hyper://', '')
2019-10-17 22:26:47 +00:00
if '/' in actor:
2020-04-04 13:44:49 +00:00
domain = domain.split('/')[0]
2019-10-17 22:26:47 +00:00
else:
2020-04-04 13:44:49 +00:00
domain = actor.split('/users/')[0].replace('https://', '')
domain = domain.replace('http://', '').replace('i2p://', '')
2020-05-17 09:37:59 +00:00
domain = domain.replace('dat://', '').replace('hyper://', '')
2019-07-06 15:17:21 +00:00
if ':' in domain:
2020-04-04 13:44:49 +00:00
portStr = domain.split(':')[1]
2020-03-01 10:01:15 +00:00
if not portStr.isdigit():
2020-04-04 13:44:49 +00:00
return None, None
port = int(portStr)
domain = domain.split(':')[0]
return domain, port
def followPerson(baseDir: str, nickname: str, domain: str,
followNickname: str, followDomain: str,
federationList: [], debug: bool,
2019-07-06 19:24:52 +00:00
followFile='following.txt') -> bool:
"""Adds a person to the follow list
"""
2020-04-04 13:44:49 +00:00
if not domainPermitted(followDomain.lower().replace('\n', ''),
2019-07-06 19:24:52 +00:00
federationList):
if debug:
2020-04-04 13:44:49 +00:00
print('DEBUG: follow of domain ' +
followDomain + ' not permitted')
2019-07-06 19:24:52 +00:00
return False
2019-07-11 12:29:31 +00:00
if debug:
2020-04-04 13:44:49 +00:00
print('DEBUG: follow of domain ' + followDomain)
2019-07-16 22:57:45 +00:00
if ':' in domain:
2020-04-04 13:44:49 +00:00
handle = nickname + '@' + domain.split(':')[0].lower()
2019-07-16 22:57:45 +00:00
else:
2020-04-04 13:44:49 +00:00
handle = nickname + '@' + domain.lower()
2020-03-03 11:02:34 +00:00
2020-04-04 13:44:49 +00:00
if not os.path.isdir(baseDir + '/accounts/' + handle):
print('WARN: account for ' + handle + ' does not exist')
2020-03-03 09:56:48 +00:00
return False
2019-07-16 22:57:45 +00:00
if ':' in followDomain:
2020-04-04 13:44:49 +00:00
handleToFollow = followNickname + '@' + followDomain.split(':')[0]
2019-07-16 22:57:45 +00:00
else:
2020-04-04 13:44:49 +00:00
handleToFollow = followNickname + '@' + followDomain
# was this person previously unfollowed?
2020-04-04 13:44:49 +00:00
unfollowedFilename = baseDir + '/accounts/' + handle + '/unfollowed.txt'
if os.path.isfile(unfollowedFilename):
if handleToFollow in open(unfollowedFilename).read():
# remove them from the unfollowed file
2020-04-04 13:44:49 +00:00
newLines = ''
with open(unfollowedFilename, "r") as f:
2020-04-04 13:44:49 +00:00
lines = f.readlines()
for line in lines:
if handleToFollow not in line:
2020-04-04 13:44:49 +00:00
newLines += line
with open(unfollowedFilename, "w") as f:
f.write(newLines)
2020-04-04 13:44:49 +00:00
if not os.path.isdir(baseDir + '/accounts'):
os.mkdir(baseDir + '/accounts')
handleToFollow = followNickname + '@' + followDomain
filename = baseDir + '/accounts/' + handle + '/' + followFile
2019-07-06 19:24:52 +00:00
if os.path.isfile(filename):
if handleToFollow in open(filename).read():
2019-07-11 12:29:31 +00:00
if debug:
print('DEBUG: follow already exists')
2019-07-06 19:24:52 +00:00
return True
2019-10-26 15:15:38 +00:00
# prepend to follow file
try:
with open(filename, 'r+') as followFile:
2020-04-04 13:44:49 +00:00
content = followFile.read()
2019-10-26 15:15:38 +00:00
followFile.seek(0, 0)
2020-04-04 13:44:49 +00:00
followFile.write(handleToFollow + '\n' + content)
2019-10-26 15:15:38 +00:00
if debug:
print('DEBUG: follow added')
return True
except Exception as e:
2020-04-04 13:44:49 +00:00
print('WARN: Failed to write entry to follow file ' +
filename + ' ' + str(e))
2019-07-11 12:29:31 +00:00
if debug:
2020-04-04 13:44:49 +00:00
print('DEBUG: creating new following file to follow ' + handleToFollow)
2019-07-06 19:24:52 +00:00
with open(filename, "w") as followfile:
2020-04-04 13:44:49 +00:00
followfile.write(handleToFollow + '\n')
2019-07-06 19:24:52 +00:00
return True
2019-07-11 12:29:31 +00:00
2020-04-04 13:44:49 +00:00
def locatePost(baseDir: str, nickname: str, domain: str,
postUrl: str, replies=False) -> str:
2019-07-11 12:29:31 +00:00
"""Returns the filename for the given status post url
"""
2019-07-13 19:28:14 +00:00
if not replies:
2020-04-04 13:44:49 +00:00
extension = 'json'
2019-07-13 19:28:14 +00:00
else:
2020-04-04 13:44:49 +00:00
extension = 'replies'
2019-11-18 14:42:18 +00:00
2019-07-11 19:31:02 +00:00
# if this post in the shared inbox?
2020-04-04 13:44:49 +00:00
postUrl = postUrl.replace('/', '#').replace('/activity', '').strip()
2019-11-18 14:42:18 +00:00
2020-05-18 10:19:31 +00:00
# add the extension
postUrl = postUrl + '.' + extension
2020-05-18 10:14:29 +00:00
# search boxes
boxes = ('inbox', 'outbox', 'tlblogs')
2020-05-18 10:19:31 +00:00
accountDir = baseDir + '/accounts/' + nickname + '@' + domain + '/'
2020-05-18 10:14:29 +00:00
for boxName in boxes:
2020-05-18 10:19:31 +00:00
postFilename = accountDir + boxName + '/' + postUrl
2020-05-18 10:14:29 +00:00
if os.path.isfile(postFilename):
return postFilename
# is it in the announce cache?
2020-05-18 10:59:45 +00:00
postFilename = baseDir + '/cache/announce/' + nickname + '/' + postUrl
2019-11-18 14:42:18 +00:00
if os.path.isfile(postFilename):
return postFilename
2020-05-18 10:14:29 +00:00
2020-04-04 13:44:49 +00:00
print('WARN: unable to locate ' + nickname + ' ' +
2020-05-18 10:19:31 +00:00
postUrl)
2019-11-18 14:42:18 +00:00
return None
2019-07-14 16:37:01 +00:00
2020-04-04 13:44:49 +00:00
def removeAttachment(baseDir: str, httpPrefix: str, domain: str, postJson: {}):
2019-07-14 16:57:06 +00:00
if not postJson.get('attachment'):
return
if not postJson['attachment'][0].get('url'):
return
2020-04-04 13:44:49 +00:00
# if port:
# if port != 80 and port != 443:
# if ':' not in domain:
# domain = domain + ':' + str(port)
attachmentUrl = postJson['attachment'][0]['url']
2019-07-14 16:57:06 +00:00
if not attachmentUrl:
return
2020-04-04 13:44:49 +00:00
mediaFilename = baseDir + '/' + \
attachmentUrl.replace(httpPrefix + '://' + domain + '/', '')
2019-07-14 16:57:06 +00:00
if os.path.isfile(mediaFilename):
os.remove(mediaFilename)
2020-04-04 13:44:49 +00:00
etagFilename = mediaFilename + '.etag'
2019-12-04 13:00:34 +00:00
if os.path.isfile(etagFilename):
os.remove(etagFilename)
2020-04-04 13:44:49 +00:00
postJson['attachment'] = []
2019-07-14 16:57:06 +00:00
2020-04-04 13:44:49 +00:00
def removeModerationPostFromIndex(baseDir: str, postUrl: str,
debug: bool) -> None:
2019-08-12 18:02:29 +00:00
"""Removes a url from the moderation index
"""
2020-04-04 13:44:49 +00:00
moderationIndexFile = baseDir + '/accounts/moderation.txt'
2019-08-12 18:02:29 +00:00
if not os.path.isfile(moderationIndexFile):
return
2020-04-04 13:44:49 +00:00
postId = postUrl.replace('/activity', '')
2019-08-12 18:02:29 +00:00
if postId in open(moderationIndexFile).read():
with open(moderationIndexFile, "r") as f:
2020-04-04 13:44:49 +00:00
lines = f.readlines()
2019-08-12 18:02:29 +00:00
with open(moderationIndexFile, "w+") as f:
for line in lines:
2020-05-22 11:32:38 +00:00
if line.strip("\n").strip("\r") != postId:
2019-08-12 18:02:29 +00:00
f.write(line)
else:
if debug:
2020-04-04 13:44:49 +00:00
print('DEBUG: removed ' + postId +
' from moderation index')
2019-08-12 18:02:29 +00:00
def isReplyToBlogPost(baseDir: str, nickname: str, domain: str,
postJsonObject: str):
"""Is the given post a reply to a blog post?
"""
if not postJsonObject.get('object'):
return False
if not isinstance(postJsonObject['object'], dict):
return False
if not postJsonObject['object'].get('inReplyTo'):
return False
blogsIndexFilename = baseDir + '/accounts/' + \
nickname + '@' + domain + '/tlblogs.index'
if not os.path.isfile(blogsIndexFilename):
return False
postId = postJsonObject['object']['inReplyTo'].replace('/activity', '')
postId = postId.replace('/', '#')
if postId in open(blogsIndexFilename).read():
return True
return False
2020-04-04 13:44:49 +00:00
def deletePost(baseDir: str, httpPrefix: str,
nickname: str, domain: str, postFilename: str,
debug: bool) -> None:
2019-07-14 16:37:01 +00:00
"""Recursively deletes a post and its replies and attachments
"""
2020-04-04 13:44:49 +00:00
postJsonObject = loadJson(postFilename, 1)
2019-09-30 22:39:02 +00:00
if postJsonObject:
2019-11-17 15:19:34 +00:00
# don't allow deletion of bookmarked posts
2020-04-04 13:44:49 +00:00
bookmarksIndexFilename = \
baseDir + '/accounts/' + nickname + '@' + domain + \
'/bookmarks.index'
2019-11-17 15:19:34 +00:00
if os.path.isfile(bookmarksIndexFilename):
2020-04-04 13:44:49 +00:00
bookmarkIndex = postFilename.split('/')[-1] + '\n'
2019-11-17 15:19:34 +00:00
if bookmarkIndex in open(bookmarksIndexFilename).read():
return
# don't remove replies to blog posts
if isReplyToBlogPost(baseDir, nickname, domain,
postJsonObject):
return
# remove any attachment
2020-04-04 13:44:49 +00:00
removeAttachment(baseDir, httpPrefix, domain, postJsonObject)
2019-11-16 22:09:54 +00:00
2019-12-01 13:45:30 +00:00
# remove any mute file
2020-04-04 13:44:49 +00:00
muteFilename = postFilename + '.muted'
2019-12-01 13:45:30 +00:00
if os.path.isfile(muteFilename):
os.remove(muteFilename)
# remove cached html version of the post
2020-04-04 13:44:49 +00:00
cachedPostFilename = \
getCachedPostFilename(baseDir, nickname, domain, postJsonObject)
2019-12-01 13:45:30 +00:00
if cachedPostFilename:
if os.path.isfile(cachedPostFilename):
os.remove(cachedPostFilename)
2020-04-04 13:44:49 +00:00
# removePostFromCache(postJsonObject,recentPostsCache)
2020-03-22 21:16:02 +00:00
2020-04-04 13:44:49 +00:00
hasObject = False
2019-11-16 22:09:54 +00:00
if postJsonObject.get('object'):
2020-04-04 13:44:49 +00:00
hasObject = True
2019-11-16 22:09:54 +00:00
2019-08-12 13:22:17 +00:00
# remove from moderation index file
2019-11-16 22:09:54 +00:00
if hasObject:
2019-12-02 16:08:26 +00:00
if isinstance(postJsonObject['object'], dict):
if postJsonObject['object'].get('moderationStatus'):
if postJsonObject.get('id'):
2020-04-04 13:44:49 +00:00
postId = postJsonObject['id'].replace('/activity', '')
removeModerationPostFromIndex(baseDir, postId, debug)
2019-07-14 17:02:41 +00:00
# remove any hashtags index entries
2020-04-04 13:44:49 +00:00
removeHashtagIndex = False
2019-11-16 22:09:54 +00:00
if hasObject:
if hasObject and isinstance(postJsonObject['object'], dict):
if postJsonObject['object'].get('content'):
if '#' in postJsonObject['object']['content']:
2020-04-04 13:44:49 +00:00
removeHashtagIndex = True
if removeHashtagIndex:
2020-04-04 13:44:49 +00:00
if postJsonObject['object'].get('id') and \
postJsonObject['object'].get('tag'):
# get the id of the post
2020-04-04 13:44:49 +00:00
postId = \
postJsonObject['object']['id'].replace('/activity', '')
for tag in postJsonObject['object']['tag']:
2020-04-04 13:44:49 +00:00
if tag['type'] != 'Hashtag':
continue
2019-12-12 12:57:08 +00:00
if not tag.get('name'):
continue
# find the index file for this tag
2020-04-04 13:44:49 +00:00
tagIndexFilename = \
baseDir + '/tags/' + tag['name'][1:] + '.txt'
if not os.path.isfile(tagIndexFilename):
continue
# remove postId from the tag index file
2020-04-04 13:44:49 +00:00
lines = None
with open(tagIndexFilename, "r") as f:
2020-04-04 13:44:49 +00:00
lines = f.readlines()
2019-12-12 19:02:06 +00:00
if lines:
2020-04-04 13:44:49 +00:00
newlines = ''
2020-05-17 09:37:59 +00:00
for fileLine in lines:
if postId in fileLine:
2019-12-12 19:02:06 +00:00
continue
2020-05-17 09:37:59 +00:00
newlines += fileLine
2019-12-12 19:02:06 +00:00
if not newlines.strip():
2020-04-04 13:44:49 +00:00
# if there are no lines then remove the
# hashtag file
2019-12-12 19:02:06 +00:00
os.remove(tagIndexFilename)
else:
with open(tagIndexFilename, "w+") as f:
f.write(newlines)
2019-07-14 17:02:41 +00:00
# remove any replies
2020-04-04 13:44:49 +00:00
repliesFilename = postFilename.replace('.json', '.replies')
2019-07-14 16:37:01 +00:00
if os.path.isfile(repliesFilename):
if debug:
2020-04-04 13:44:49 +00:00
print('DEBUG: removing replies to ' + postFilename)
with open(repliesFilename, 'r') as f:
2019-07-14 16:37:01 +00:00
for replyId in f:
2020-04-04 13:44:49 +00:00
replyFile = locatePost(baseDir, nickname, domain, replyId)
2019-07-14 16:37:01 +00:00
if replyFile:
if os.path.isfile(replyFile):
2020-04-04 13:44:49 +00:00
deletePost(baseDir, httpPrefix,
nickname, domain, replyFile, debug)
2019-07-14 17:02:41 +00:00
# remove the replies file
2019-07-14 16:37:01 +00:00
os.remove(repliesFilename)
2019-07-14 17:02:41 +00:00
# finally, remove the post itself
2020-03-22 21:16:02 +00:00
os.remove(postFilename)
2019-07-27 22:48:34 +00:00
2020-04-04 13:44:49 +00:00
def validNickname(domain: str, nickname: str) -> bool:
forbiddenChars = ('.', ' ', '/', '?', ':', ';', '@')
2019-07-27 22:48:34 +00:00
for c in forbiddenChars:
if c in nickname:
return False
2020-04-04 13:44:49 +00:00
if nickname == domain:
2019-08-23 13:47:29 +00:00
return False
2020-04-04 13:44:49 +00:00
reservedNames = ('inbox', 'dm', 'outbox', 'following',
'public', 'followers', 'profile',
'channel', 'capabilities', 'calendar',
'tlreplies', 'tlmedia', 'tlblogs',
'moderation', 'activity', 'undo',
'reply', 'replies', 'question', 'like',
'likes', 'users', 'statuses',
'updates', 'repeat', 'announce',
'shares')
2019-07-27 22:48:34 +00:00
if nickname in reservedNames:
return False
return True
2019-08-08 11:24:26 +00:00
2020-04-04 13:44:49 +00:00
2019-08-08 11:24:26 +00:00
def noOfAccounts(baseDir: str) -> bool:
"""Returns the number of accounts on the system
"""
2020-04-04 13:44:49 +00:00
accountCtr = 0
for subdir, dirs, files in os.walk(baseDir + '/accounts'):
2019-08-08 11:24:26 +00:00
for account in dirs:
if '@' in account:
2019-11-13 14:07:11 +00:00
if not account.startswith('inbox@'):
2020-04-04 13:44:49 +00:00
accountCtr += 1
2019-08-08 11:24:26 +00:00
return accountCtr
2019-08-10 11:31:42 +00:00
2020-04-04 13:44:49 +00:00
def noOfActiveAccountsMonthly(baseDir: str, months: int) -> bool:
2019-11-13 15:15:08 +00:00
"""Returns the number of accounts on the system this month
"""
2020-04-04 13:44:49 +00:00
accountCtr = 0
currTime = int(time.time())
monthSeconds = int(60*60*24*30*months)
for subdir, dirs, files in os.walk(baseDir + '/accounts'):
2019-11-13 15:15:08 +00:00
for account in dirs:
if '@' in account:
if not account.startswith('inbox@'):
2020-04-04 13:44:49 +00:00
lastUsedFilename = \
baseDir + '/accounts/' + account + '/.lastUsed'
2019-11-13 15:15:08 +00:00
if os.path.isfile(lastUsedFilename):
with open(lastUsedFilename, 'r') as lastUsedFile:
2020-04-04 13:44:49 +00:00
lastUsed = lastUsedFile.read()
2019-11-13 15:15:08 +00:00
if lastUsed.isdigit():
2020-04-04 13:44:49 +00:00
timeDiff = (currTime - int(lastUsed))
if timeDiff < monthSeconds:
accountCtr += 1
2019-11-13 15:15:08 +00:00
return accountCtr
2020-04-04 13:44:49 +00:00
def isPublicPostFromUrl(baseDir: str, nickname: str, domain: str,
postUrl: str) -> bool:
"""Returns whether the given url is a public post
"""
2020-04-04 13:44:49 +00:00
postFilename = locatePost(baseDir, nickname, domain, postUrl)
if not postFilename:
return False
2020-04-04 13:44:49 +00:00
postJsonObject = loadJson(postFilename, 1)
if not postJsonObject:
return False
return isPublicPost(postJsonObject)
2020-04-04 13:44:49 +00:00
2019-08-10 11:31:42 +00:00
def isPublicPost(postJsonObject: {}) -> bool:
"""Returns true if the given post is public
"""
if not postJsonObject.get('type'):
return False
2020-04-04 13:44:49 +00:00
if postJsonObject['type'] != 'Create':
2019-08-10 11:31:42 +00:00
return False
if not postJsonObject.get('object'):
return False
if not isinstance(postJsonObject['object'], dict):
return False
if not postJsonObject['object'].get('to'):
return False
for recipient in postJsonObject['object']['to']:
if recipient.endswith('#Public'):
return True
return False
2019-09-29 18:48:34 +00:00
2020-04-04 13:44:49 +00:00
2019-09-29 18:48:34 +00:00
def copytree(src: str, dst: str, symlinks=False, ignore=None):
"""Copy a directory
"""
for item in os.listdir(src):
2020-04-04 13:44:49 +00:00
s = os.path.join(src, item)
d = os.path.join(dst, item)
2019-09-29 18:48:34 +00:00
if os.path.isdir(s):
shutil.copytree(s, d, symlinks, ignore)
else:
shutil.copy2(s, d)
2019-10-19 17:50:05 +00:00
2020-04-04 13:44:49 +00:00
def getCachedPostDirectory(baseDir: str, nickname: str, domain: str) -> str:
2019-10-19 17:50:05 +00:00
"""Returns the directory where the html post cache exists
"""
2020-04-04 13:44:49 +00:00
htmlPostCacheDir = baseDir + '/accounts/' + \
nickname + '@' + domain + '/postcache'
2019-10-19 17:50:05 +00:00
return htmlPostCacheDir
2020-04-04 13:44:49 +00:00
def getCachedPostFilename(baseDir: str, nickname: str, domain: str,
2019-10-19 17:50:05 +00:00
postJsonObject: {}) -> str:
"""Returns the html cache filename for the given post
"""
2020-04-04 13:44:49 +00:00
cachedPostDir = getCachedPostDirectory(baseDir, nickname, domain)
2019-11-29 23:04:37 +00:00
if not os.path.isdir(cachedPostDir):
2020-04-04 13:44:49 +00:00
# print('ERROR: invalid html cache directory '+cachedPostDir)
2019-11-29 23:04:37 +00:00
return None
if '@' not in cachedPostDir:
2020-04-04 13:44:49 +00:00
# print('ERROR: invalid html cache directory '+cachedPostDir)
2019-11-29 23:04:37 +00:00
return None
2020-04-04 13:44:49 +00:00
cachedPostFilename = \
cachedPostDir + \
'/' + postJsonObject['id'].replace('/activity', '').replace('/', '#')
cachedPostFilename = cachedPostFilename + '.html'
2019-10-19 17:50:05 +00:00
return cachedPostFilename
2019-11-24 13:46:28 +00:00
2020-04-04 13:44:49 +00:00
def removePostFromCache(postJsonObject: {}, recentPostsCache: {}):
2019-11-24 13:46:28 +00:00
""" if the post exists in the recent posts cache then remove it
"""
if not postJsonObject.get('id'):
return
if not recentPostsCache.get('index'):
return
2020-04-04 13:44:49 +00:00
postId = postJsonObject['id']
2019-11-26 10:43:37 +00:00
if '#' in postId:
2020-04-04 13:44:49 +00:00
postId = postId.split('#', 1)[0]
postId = postId.replace('/activity', '').replace('/', '#')
2019-11-24 13:46:28 +00:00
if postId not in recentPostsCache['index']:
return
2019-11-24 23:52:29 +00:00
if recentPostsCache['json'].get(postId):
del recentPostsCache['json'][postId]
if recentPostsCache['html'].get(postId):
del recentPostsCache['html'][postId]
2019-11-24 13:46:28 +00:00
recentPostsCache['index'].remove(postId)
2020-04-04 13:44:49 +00:00
def updateRecentPostsCache(recentPostsCache: {}, maxRecentPosts: int,
postJsonObject: {}, htmlStr: str) -> None:
"""Store recent posts in memory so that they can be quickly recalled
"""
if not postJsonObject.get('id'):
return
2020-04-04 13:44:49 +00:00
postId = postJsonObject['id']
2019-11-26 10:43:37 +00:00
if '#' in postId:
2020-04-04 13:44:49 +00:00
postId = postId.split('#', 1)[0]
postId = postId.replace('/activity', '').replace('/', '#')
if recentPostsCache.get('index'):
if postId in recentPostsCache['index']:
return
recentPostsCache['index'].append(postId)
2020-04-04 13:44:49 +00:00
postJsonObject['muted'] = False
recentPostsCache['json'][postId] = json.dumps(postJsonObject)
recentPostsCache['html'][postId] = htmlStr
2020-04-04 13:44:49 +00:00
while len(recentPostsCache['html'].items()) > maxRecentPosts:
recentPostsCache['index'].pop(0)
del recentPostsCache['json'][postId]
del recentPostsCache['html'][postId]
else:
2020-04-04 13:44:49 +00:00
recentPostsCache['index'] = [postId]
recentPostsCache['json'] = {}
recentPostsCache['html'] = {}
recentPostsCache['json'][postId] = json.dumps(postJsonObject)
recentPostsCache['html'][postId] = htmlStr
2020-02-21 10:19:02 +00:00
def fileLastModified(filename: str) -> str:
"""Returns the date when a file was last modified
"""
2020-04-04 13:44:49 +00:00
t = os.path.getmtime(filename)
modifiedTime = datetime.datetime.fromtimestamp(t)
2020-02-21 10:19:02 +00:00
return modifiedTime.strftime("%Y-%m-%dT%H:%M:%SZ")
2020-02-22 16:00:27 +00:00
2020-04-04 13:44:49 +00:00
def daysInMonth(year: int, monthNumber: int) -> int:
2020-02-22 16:00:27 +00:00
"""Returns the number of days in the month
"""
2020-04-04 13:44:49 +00:00
if monthNumber < 1 or monthNumber > 12:
2020-02-22 16:00:27 +00:00
return None
2020-04-04 13:44:49 +00:00
daysRange = monthrange(year, monthNumber)
2020-02-22 16:00:27 +00:00
return daysRange[1]
2020-04-04 13:44:49 +00:00
2020-02-22 16:00:27 +00:00
def mergeDicts(dict1: {}, dict2: {}) -> {}:
"""Merges two dictionaries
"""
2020-04-04 13:44:49 +00:00
res = {**dict1, **dict2}
2020-02-22 16:00:27 +00:00
return res
2020-02-24 23:14:49 +00:00
2020-04-04 13:44:49 +00:00
2020-02-24 23:14:49 +00:00
def isBlogPost(postJsonObject: {}) -> bool:
"""Is the given post a blog post?
"""
2020-04-04 13:44:49 +00:00
if postJsonObject['type'] != 'Create':
2020-02-24 23:14:49 +00:00
return False
if not postJsonObject.get('object'):
return False
if not isinstance(postJsonObject['object'], dict):
return False
if not postJsonObject['object'].get('type'):
return False
if not postJsonObject['object'].get('content'):
return False
2020-04-04 13:44:49 +00:00
if postJsonObject['object']['type'] != 'Article':
2020-02-24 23:14:49 +00:00
return False
2020-03-22 21:16:02 +00:00
return True
2020-04-11 10:19:35 +00:00
def searchBoxPosts(baseDir: str, nickname: str, domain: str,
2020-04-11 13:20:52 +00:00
searchStr: str, maxResults: int,
2020-04-11 10:19:35 +00:00
boxName='outbox') -> []:
2020-04-11 13:20:52 +00:00
"""Search your posts and return a list of the filenames
containing matching strings
2020-04-11 10:19:35 +00:00
"""
2020-04-11 13:07:05 +00:00
path = baseDir + '/accounts/' + nickname + '@' + domain + '/' + boxName
2020-04-11 10:19:35 +00:00
if not os.path.isdir(path):
return []
2020-04-11 13:38:28 +00:00
searchStr = searchStr.lower().strip()
2020-04-11 13:20:52 +00:00
if '+' in searchStr:
searchWords = searchStr.split('+')
for index in range(len(searchWords)):
searchWords[index] = searchWords[index].strip()
2020-04-11 13:45:53 +00:00
print('SEARCH: ' + str(searchWords))
else:
searchWords = [searchStr]
2020-04-11 10:19:35 +00:00
res = []
for root, dirs, fnames in os.walk(path):
for fname in fnames:
filePath = os.path.join(root, fname)
2020-04-11 13:14:53 +00:00
with open(filePath, 'r') as postFile:
2020-04-11 13:38:28 +00:00
data = postFile.read().lower()
2020-04-11 13:35:22 +00:00
notFound = False
for keyword in searchWords:
if keyword not in data:
2020-04-11 13:35:22 +00:00
notFound = True
2020-04-11 13:45:53 +00:00
break
2020-04-11 13:35:22 +00:00
if notFound:
continue
2020-04-11 13:14:53 +00:00
2020-04-11 10:19:35 +00:00
res.append(filePath)
if len(res) >= maxResults:
return res
return res
2020-05-04 18:24:30 +00:00
2020-05-04 18:29:30 +00:00
def getFileCaseInsensitive(path: str) -> str:
2020-05-04 18:24:30 +00:00
"""Returns a case specific filename given a case insensitive version of it
"""
2020-05-04 18:29:30 +00:00
directory, filename = os.path.split(path)
2020-05-04 18:24:30 +00:00
directory, filename = (directory or '.'), filename.lower()
for f in os.listdir(directory):
2020-05-04 18:29:30 +00:00
newpath = os.path.join(directory, f)
if os.path.isfile(newpath) and f.lower() == filename:
return newpath
return path