epicyon/media.py

218 lines
6.9 KiB
Python
Raw Normal View History

2020-04-03 16:55:55 +00:00
__filename__ = "media.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
__version__ = "1.1.0"
__maintainer__ = "Bob Mottram"
__email__ = "bob@freedombone.net"
__status__ = "Production"
2019-07-12 19:08:46 +00:00
from blurhash import blurhash_encode as blurencode
from PIL import Image
import numpy
import os
import datetime
2019-12-04 18:52:27 +00:00
from hashlib import sha1
2019-07-12 19:08:46 +00:00
from auth import createPassword
from shutil import copyfile
2019-07-12 19:51:10 +00:00
from shutil import rmtree
2019-07-12 19:55:23 +00:00
from shutil import move
2019-07-12 19:08:46 +00:00
2020-04-03 16:55:55 +00:00
2020-01-15 10:57:09 +00:00
def replaceYouTube(postJsonObject: {}) -> None:
"""Replace YouTube with invidio.us
This denies Google some, but not all, tracking data
"""
if not isinstance(postJsonObject['object'], dict):
return
if not postJsonObject['object'].get('content'):
return
if 'www.youtube.com' not in postJsonObject['object']['content']:
return
2020-04-03 16:55:55 +00:00
postJsonObject['object']['content'] = \
postJsonObject['object']['content'].replace('www.youtube.com',
'invidio.us')
2020-04-03 16:55:55 +00:00
def removeMetaData(imageFilename: str, outputFilename: str) -> None:
2020-01-08 14:53:28 +00:00
"""Attempts to do this with pure python didn't work well,
so better to use a dedicated tool if one is installed
2020-01-08 14:31:25 +00:00
"""
2020-04-03 16:55:55 +00:00
copyfile(imageFilename, outputFilename)
2020-07-08 14:32:11 +00:00
if not os.path.isfile(outputFilename):
print('ERROR: unable to remove metadata from ' + imageFilename)
return
2020-01-08 14:53:28 +00:00
if os.path.isfile('/usr/bin/exiftool'):
2020-04-03 16:55:55 +00:00
print('Removing metadata from ' + outputFilename + ' using exiftool')
2020-07-08 15:17:00 +00:00
os.system('exiftool -all= ' + outputFilename) # nosec
2020-01-08 14:53:28 +00:00
elif os.path.isfile('/usr/bin/mogrify'):
2020-04-03 16:55:55 +00:00
print('Removing metadata from ' + outputFilename + ' using mogrify')
2020-07-08 15:17:00 +00:00
os.system('/usr/bin/mogrify -strip ' + outputFilename) # nosec
2019-07-24 13:14:23 +00:00
2020-04-03 16:55:55 +00:00
2019-07-12 19:51:10 +00:00
def getImageHash(imageFilename: str) -> str:
2019-07-12 19:26:54 +00:00
return blurencode(numpy.array(Image.open(imageFilename).convert("RGB")))
2019-07-12 19:08:46 +00:00
2020-04-03 16:55:55 +00:00
2019-08-30 18:01:29 +00:00
def isMedia(imageFilename: str) -> bool:
2020-04-03 16:55:55 +00:00
permittedMedia = ('png', 'jpg', 'gif', 'webp',
'mp4', 'ogv', 'mp3', 'ogg')
2020-03-22 21:16:02 +00:00
for m in permittedMedia:
2020-04-03 16:55:55 +00:00
if imageFilename.endswith('.' + m):
2019-08-30 18:01:29 +00:00
return True
2020-04-03 16:55:55 +00:00
print('WARN: ' + imageFilename + ' is not a permitted media type')
2019-07-12 19:08:46 +00:00
return False
2020-04-03 16:55:55 +00:00
def createMediaDirs(baseDir: str, mediaPath: str) -> None:
if not os.path.isdir(baseDir + '/media'):
os.mkdir(baseDir + '/media')
if not os.path.isdir(baseDir + '/' + mediaPath):
os.mkdir(baseDir + '/' + mediaPath)
2019-07-12 19:08:46 +00:00
2019-07-12 19:26:54 +00:00
def getMediaPath() -> str:
2020-04-03 16:55:55 +00:00
currTime = datetime.datetime.utcnow()
weeksSinceEpoch = int((currTime - datetime.datetime(1970, 1, 1)).days / 7)
return 'media/' + str(weeksSinceEpoch)
2019-08-30 15:50:20 +00:00
def getAttachmentMediaType(filename: str) -> str:
"""Returns the type of media for the given file
image, video or audio
"""
2020-04-03 16:55:55 +00:00
mediaType = None
imageTypes = ('png', 'jpg', 'jpeg',
'gif', 'webp')
2019-08-30 15:50:20 +00:00
for mType in imageTypes:
2020-04-03 16:55:55 +00:00
if filename.endswith('.' + mType):
2019-08-30 15:50:20 +00:00
return 'image'
2020-04-03 16:55:55 +00:00
videoTypes = ('mp4', 'webm', 'ogv')
2019-08-30 15:50:20 +00:00
for mType in videoTypes:
2020-04-03 16:55:55 +00:00
if filename.endswith('.' + mType):
2019-08-30 15:50:20 +00:00
return 'video'
2020-04-03 16:55:55 +00:00
audioTypes = ('mp3', 'ogg')
2019-08-30 15:50:20 +00:00
for mType in audioTypes:
2020-04-03 16:55:55 +00:00
if filename.endswith('.' + mType):
2019-08-30 15:50:20 +00:00
return 'audio'
return mediaType
2020-04-03 16:55:55 +00:00
2019-12-04 18:52:27 +00:00
def updateEtag(mediaFilename: str) -> None:
""" calculate the etag, which is a sha1 of the data
"""
# only create etags for media
if '/media/' not in mediaFilename:
return
# check that the media exists
if not os.path.isfile(mediaFilename):
return
# read the binary data
2020-04-03 16:55:55 +00:00
data = None
2019-12-04 18:52:27 +00:00
try:
2019-12-04 18:55:40 +00:00
with open(mediaFilename, 'rb') as mediaFile:
2020-04-03 16:55:55 +00:00
data = mediaFile.read()
except BaseException:
2019-12-04 18:52:27 +00:00
pass
if not data:
return
# calculate hash
2020-07-08 15:17:00 +00:00
etag = sha1(data).hexdigest() # nosec
2019-12-04 18:52:27 +00:00
# save the hash
try:
2020-07-12 20:04:58 +00:00
with open(mediaFilename + '.etag', 'w+') as etagFile:
2019-12-04 18:52:27 +00:00
etagFile.write(etag)
2020-04-03 16:55:55 +00:00
except BaseException:
2019-12-04 18:52:27 +00:00
pass
2020-04-03 16:55:55 +00:00
def attachMedia(baseDir: str, httpPrefix: str, domain: str, port: int,
postJson: {}, imageFilename: str,
mediaType: str, description: str,
2019-07-12 19:08:46 +00:00
useBlurhash: bool) -> {}:
2019-08-30 18:32:34 +00:00
"""Attaches media to a json object post
2019-07-12 19:08:46 +00:00
The description can be None
2020-04-03 16:55:55 +00:00
Blurhash is optional, since low power systems may take a long
time to calculate it
2019-07-12 19:08:46 +00:00
"""
2019-08-30 18:01:29 +00:00
if not isMedia(imageFilename):
2019-07-12 19:08:46 +00:00
return postJson
2020-03-22 21:16:02 +00:00
2020-04-03 16:55:55 +00:00
fileExtension = None
acceptedTypes = ('png', 'jpg', 'gif', 'webp',
'mp4', 'webm', 'ogv', 'mp3', 'ogg')
2019-08-30 15:50:20 +00:00
for mType in acceptedTypes:
2020-04-03 16:55:55 +00:00
if imageFilename.endswith('.' + mType):
if mType == 'jpg':
mType = 'jpeg'
if mType == 'mp3':
mType = 'mpeg'
fileExtension = mType
2020-03-22 21:16:02 +00:00
if not fileExtension:
2019-08-30 18:01:29 +00:00
return postJson
2020-04-03 16:55:55 +00:00
mediaType = mediaType + '/' + fileExtension
print('Attached media type: ' + mediaType)
2019-08-30 15:50:20 +00:00
2020-04-03 16:55:55 +00:00
if fileExtension == 'jpeg':
fileExtension = 'jpg'
if mediaType == 'audio/mpeg':
fileExtension = 'mp3'
2019-07-12 19:08:46 +00:00
if port:
2020-04-03 16:55:55 +00:00
if port != 80 and port != 443:
if ':' not in domain:
2020-04-03 16:55:55 +00:00
domain = domain + ':' + str(port)
2019-07-12 19:08:46 +00:00
2020-04-03 16:55:55 +00:00
mPath = getMediaPath()
mediaPath = mPath + '/' + createPassword(32) + '.' + fileExtension
2019-07-16 10:19:04 +00:00
if baseDir:
2020-04-03 16:55:55 +00:00
createMediaDirs(baseDir, mPath)
mediaFilename = baseDir + '/' + mediaPath
2019-07-12 19:08:46 +00:00
2020-04-03 16:55:55 +00:00
attachmentJson = {
2019-07-12 19:08:46 +00:00
'mediaType': mediaType,
'name': description,
2019-12-04 17:02:38 +00:00
'type': 'Document',
2020-04-03 16:55:55 +00:00
'url': httpPrefix + '://' + domain + '/' + mediaPath
2019-07-12 19:08:46 +00:00
}
2020-03-09 17:40:00 +00:00
if mediaType.startswith('image/'):
2020-04-03 16:55:55 +00:00
attachmentJson['focialPoint'] = [0.0, 0.0]
2020-03-09 17:40:00 +00:00
if useBlurhash:
2020-04-03 16:55:55 +00:00
attachmentJson['blurhash'] = getImageHash(imageFilename)
postJson['attachment'] = [attachmentJson]
2019-07-12 19:08:46 +00:00
2019-08-30 19:01:16 +00:00
if baseDir:
2020-03-09 17:41:37 +00:00
if mediaType.startswith('image/'):
2020-04-03 16:55:55 +00:00
removeMetaData(imageFilename, mediaFilename)
2019-08-30 19:01:16 +00:00
else:
2020-04-03 16:55:55 +00:00
copyfile(imageFilename, mediaFilename)
2019-12-04 18:52:27 +00:00
updateEtag(mediaFilename)
2019-07-12 19:08:46 +00:00
return postJson
2020-04-03 16:55:55 +00:00
def archiveMedia(baseDir: str, archiveDirectory: str, maxWeeks=4) -> None:
2019-07-12 19:51:10 +00:00
"""Any media older than the given number of weeks gets archived
"""
2020-04-03 16:55:55 +00:00
currTime = datetime.datetime.utcnow()
weeksSinceEpoch = int((currTime - datetime.datetime(1970, 1, 1)).days/7)
minWeek = weeksSinceEpoch-maxWeeks
2019-07-12 19:51:10 +00:00
2019-07-12 20:43:55 +00:00
if archiveDirectory:
if not os.path.isdir(archiveDirectory):
os.mkdir(archiveDirectory)
2020-04-03 16:55:55 +00:00
if not os.path.isdir(archiveDirectory + '/media'):
os.mkdir(archiveDirectory + '/media')
2020-03-22 21:16:02 +00:00
2020-04-03 16:55:55 +00:00
for subdir, dirs, files in os.walk(baseDir + '/media'):
2019-07-12 19:51:10 +00:00
for weekDir in dirs:
2020-04-03 16:55:55 +00:00
if int(weekDir) < minWeek:
2019-07-12 19:55:23 +00:00
if archiveDirectory:
2020-04-03 16:55:55 +00:00
move(os.path.join(baseDir + '/media', weekDir),
archiveDirectory + '/media')
2019-07-12 19:55:23 +00:00
else:
# archive to /dev/null
2020-04-03 16:55:55 +00:00
rmtree(os.path.join(baseDir + '/media', weekDir))