epicyon/media.py

432 lines
15 KiB
Python
Raw Normal View History

2020-04-03 16:55:55 +00:00
__filename__ = "media.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
2021-01-26 10:07:42 +00:00
__version__ = "1.2.0"
2020-04-03 16:55:55 +00:00
__maintainer__ = "Bob Mottram"
2021-09-10 16:14:50 +00:00
__email__ = "bob@libreserver.org"
2020-04-03 16:55:55 +00:00
__status__ = "Production"
2021-06-15 15:08:12 +00:00
__module_group__ = "Timeline"
2019-07-12 19:08:46 +00:00
import os
2021-08-13 19:09:38 +00:00
import time
2019-07-12 19:08:46 +00:00
import datetime
import subprocess
2021-10-11 17:20:16 +00:00
import random
2021-05-09 12:17:55 +00:00
from random import randint
2019-12-04 18:52:27 +00:00
from hashlib import sha1
2019-07-12 19:08:46 +00:00
from auth import createPassword
from utils import getBaseContentFromPost
2020-12-16 11:04:46 +00:00
from utils import getFullDomain
2020-11-21 11:54:29 +00:00
from utils import getImageExtensions
from utils import getVideoExtensions
from utils import getAudioExtensions
from utils import getMediaExtensions
2021-07-06 10:00:19 +00:00
from utils import hasObjectDict
2021-07-13 21:59:53 +00:00
from utils import acctDir
2019-07-12 19:08:46 +00:00
from shutil import copyfile
2019-07-12 19:51:10 +00:00
from shutil import rmtree
2019-07-12 19:55:23 +00:00
from shutil import move
from city import spoofGeolocation
2019-07-12 19:08:46 +00:00
2020-04-03 16:55:55 +00:00
2021-10-11 17:20:16 +00:00
def _getBlurHash() -> str:
"""You may laugh, but this is a lot less computationally intensive,
especially on large images, while still providing some visual variety
in the timeline
"""
hashes = [
"UfGuaW01%gRi%MM{azofozo0V@xuozn#ofs.",
"UFD]o8-;9FIU~qD%j[%M-;j[ofWB?bt7IURj",
"UyO|v_1#im=s%y#U%OxDwRt3W9R-ogjHj[WX",
"U96vAQt6H;WBt7ofWBa#MbWBo#j[byaze-oe",
"UJKA.q01M|IV%LM|RjNGIVj[f6oLjrofaeof",
"U9MPjn]?~Cxut~.PS1%1xXIo0fEer_$*^jxG",
"UtLENXWCRjju~qayaeaz00j[ofayIVkCkCfQ",
"UHGbeg-pbzWZ.ANI$wsQ$H-;E9W?0Nx]?FjE",
"UcHU%#4n_ND%?bxatRWBIU%MazxtNaRjs:of",
"ULR:TsWr~6xZofWWf6s-~6oK9eR,oes-WXNJ",
"U77VQB-:MaMx%L%MogRkMwkCxuoIS*WYjEsl",
"U%Nm{8R+%MxuE1t6WBNG-=RjoIt6~Vj]RkR*",
"UCM7u;?boft7oft7ayj[~qt7WBoft7oft7Rj"
]
return random.choice(hashes)
2021-12-25 22:09:19 +00:00
def _replaceSiloDomain(post_json_object: {},
2021-09-18 17:20:01 +00:00
siloDomain: str, replacementDomain: str,
2021-12-25 23:03:28 +00:00
system_language: str) -> None:
2021-09-18 17:20:01 +00:00
"""Replace a silo domain with a replacement domain
"""
if not replacementDomain:
return
2021-12-25 22:09:19 +00:00
if not hasObjectDict(post_json_object):
return
2021-12-25 22:09:19 +00:00
if not post_json_object['object'].get('content'):
return
2021-12-25 23:03:28 +00:00
contentStr = getBaseContentFromPost(post_json_object, system_language)
2021-09-18 17:20:01 +00:00
if siloDomain not in contentStr:
return
2021-09-21 22:01:52 +00:00
contentStr = contentStr.replace(siloDomain, replacementDomain)
2021-12-25 22:09:19 +00:00
post_json_object['object']['content'] = contentStr
if post_json_object['object'].get('contentMap'):
2021-12-25 23:03:28 +00:00
post_json_object['object']['contentMap'][system_language] = contentStr
2020-04-03 16:55:55 +00:00
2021-12-25 22:09:19 +00:00
def replaceYouTube(post_json_object: {}, replacementDomain: str,
2021-12-25 23:03:28 +00:00
system_language: str) -> None:
2021-09-18 17:20:01 +00:00
"""Replace YouTube with a replacement domain
This denies Google some, but not all, tracking data
"""
2021-12-25 22:09:19 +00:00
_replaceSiloDomain(post_json_object, 'www.youtube.com',
2021-12-25 23:03:28 +00:00
replacementDomain, system_language)
2021-09-18 17:20:01 +00:00
2021-12-25 22:09:19 +00:00
def replaceTwitter(post_json_object: {}, replacementDomain: str,
2021-12-25 23:03:28 +00:00
system_language: str) -> None:
2021-09-18 17:08:14 +00:00
"""Replace Twitter with a replacement domain
This allows you to view twitter posts without having a twitter account
"""
2021-12-25 22:09:19 +00:00
_replaceSiloDomain(post_json_object, 'twitter.com',
2021-12-25 23:03:28 +00:00
replacementDomain, system_language)
2021-09-18 17:08:14 +00:00
2021-05-09 12:17:55 +00:00
def _removeMetaData(imageFilename: str, outputFilename: str) -> None:
2020-01-08 14:53:28 +00:00
"""Attempts to do this with pure python didn't work well,
so better to use a dedicated tool if one is installed
2020-01-08 14:31:25 +00:00
"""
2020-04-03 16:55:55 +00:00
copyfile(imageFilename, outputFilename)
2020-07-08 14:32:11 +00:00
if not os.path.isfile(outputFilename):
print('ERROR: unable to remove metadata from ' + imageFilename)
return
2020-01-08 14:53:28 +00:00
if os.path.isfile('/usr/bin/exiftool'):
2020-04-03 16:55:55 +00:00
print('Removing metadata from ' + outputFilename + ' using exiftool')
2020-07-08 15:17:00 +00:00
os.system('exiftool -all= ' + outputFilename) # nosec
2020-01-08 14:53:28 +00:00
elif os.path.isfile('/usr/bin/mogrify'):
2020-04-03 16:55:55 +00:00
print('Removing metadata from ' + outputFilename + ' using mogrify')
2020-07-08 15:17:00 +00:00
os.system('/usr/bin/mogrify -strip ' + outputFilename) # nosec
2019-07-24 13:14:23 +00:00
2020-04-03 16:55:55 +00:00
2021-12-25 16:17:53 +00:00
def _spoofMetaData(base_dir: str, nickname: str, domain: str,
2021-11-08 18:09:24 +00:00
outputFilename: str, spoofCity: str,
2021-12-25 17:13:38 +00:00
content_license_url: str) -> None:
2021-05-10 11:25:03 +00:00
"""Spoof image metadata using a decoy model for a given city
2021-05-09 12:17:55 +00:00
"""
if not os.path.isfile(outputFilename):
2021-05-09 19:11:05 +00:00
print('ERROR: unable to spoof metadata within ' + outputFilename)
2021-05-09 12:17:55 +00:00
return
2021-05-10 10:46:45 +00:00
# get the random seed used to generate a unique pattern for this account
2021-12-25 16:17:53 +00:00
decoySeedFilename = acctDir(base_dir, nickname, domain) + '/decoyseed'
2021-05-10 11:19:42 +00:00
decoySeed = 63725
if os.path.isfile(decoySeedFilename):
with open(decoySeedFilename, 'r') as fp:
decoySeed = int(fp.read())
2021-05-10 10:46:45 +00:00
else:
2021-05-10 11:34:50 +00:00
decoySeed = randint(10000, 10000000000000000)
try:
with open(decoySeedFilename, 'w+') as fp:
fp.write(str(decoySeed))
2021-11-25 21:18:53 +00:00
except OSError:
2021-10-29 18:48:15 +00:00
print('EX: unable to write ' + decoySeedFilename)
2021-05-10 10:46:45 +00:00
2021-05-09 12:17:55 +00:00
if os.path.isfile('/usr/bin/exiftool'):
print('Spoofing metadata in ' + outputFilename + ' using exiftool')
2021-05-09 14:05:52 +00:00
currTimeAdjusted = \
datetime.datetime.utcnow() - \
datetime.timedelta(minutes=randint(2, 120))
published = currTimeAdjusted.strftime("%Y:%m:%d %H:%M:%S+00:00")
2021-05-11 12:36:35 +00:00
(latitude, longitude, latitudeRef, longitudeRef,
camMake, camModel, camSerialNumber) = \
2021-12-25 16:17:53 +00:00
spoofGeolocation(base_dir, spoofCity, currTimeAdjusted,
decoySeed, None, None)
2021-11-09 12:20:57 +00:00
if os.system('exiftool -artist=@"' + nickname + '@' + domain + '" ' +
2021-08-13 19:56:42 +00:00
'-Make="' + camMake + '" ' +
'-Model="' + camModel + '" ' +
'-Comment="' + str(camSerialNumber) + '" ' +
'-DateTimeOriginal="' + published + '" ' +
'-FileModifyDate="' + published + '" ' +
'-CreateDate="' + published + '" ' +
'-GPSLongitudeRef=' + longitudeRef + ' ' +
'-GPSAltitude=0 ' +
'-GPSLongitude=' + str(longitude) + ' ' +
'-GPSLatitudeRef=' + latitudeRef + ' ' +
'-GPSLatitude=' + str(latitude) + ' ' +
2021-12-25 17:13:38 +00:00
'-copyright="' + content_license_url + '" ' +
2021-08-13 19:56:42 +00:00
'-Comment="" ' +
outputFilename) != 0: # nosec
print('ERROR: exiftool failed to run')
2021-05-09 12:17:55 +00:00
else:
print('ERROR: exiftool is not installed')
return
2021-08-13 20:18:36 +00:00
def convertImageToLowBandwidth(imageFilename: str) -> None:
"""Converts an image to a low bandwidth version
"""
2021-12-25 18:20:56 +00:00
low_bandwidthFilename = imageFilename + '.low'
if os.path.isfile(low_bandwidthFilename):
2021-08-13 19:09:38 +00:00
try:
2021-12-25 18:20:56 +00:00
os.remove(low_bandwidthFilename)
2021-11-25 18:42:38 +00:00
except OSError:
2021-10-29 18:48:15 +00:00
print('EX: convertImageToLowBandwidth unable to delete ' +
2021-12-25 18:20:56 +00:00
low_bandwidthFilename)
2021-08-13 19:09:38 +00:00
cmd = \
2021-08-13 18:06:01 +00:00
'/usr/bin/convert +noise Multiplicative ' + \
'-evaluate median 10% -dither Floyd-Steinberg ' + \
2021-12-25 18:20:56 +00:00
'-monochrome ' + imageFilename + ' ' + low_bandwidthFilename
2021-08-13 17:43:19 +00:00
print('Low bandwidth image conversion: ' + cmd)
subprocess.call(cmd, shell=True)
2021-08-13 19:09:38 +00:00
# wait for conversion to happen
ctr = 0
2021-12-25 18:20:56 +00:00
while not os.path.isfile(low_bandwidthFilename):
2021-08-13 19:09:38 +00:00
print('Waiting for low bandwidth image conversion ' + str(ctr))
2021-08-13 20:18:36 +00:00
time.sleep(0.2)
2021-08-13 19:09:38 +00:00
ctr += 1
2021-08-13 20:18:36 +00:00
if ctr > 100:
2021-08-13 19:09:38 +00:00
print('WARN: timed out waiting for low bandwidth image conversion')
break
2021-12-25 18:20:56 +00:00
if os.path.isfile(low_bandwidthFilename):
2021-08-13 17:56:11 +00:00
try:
2021-08-13 19:43:55 +00:00
os.remove(imageFilename)
2021-11-25 18:42:38 +00:00
except OSError:
2021-10-29 18:48:15 +00:00
print('EX: convertImageToLowBandwidth unable to delete ' +
imageFilename)
2021-12-25 18:20:56 +00:00
os.rename(low_bandwidthFilename, imageFilename)
2021-08-13 19:43:55 +00:00
if os.path.isfile(imageFilename):
print('Image converted to low bandwidth ' + imageFilename)
2021-08-13 17:56:11 +00:00
else:
print('Low bandwidth converted image not found: ' +
2021-12-25 18:20:56 +00:00
low_bandwidthFilename)
2021-12-25 16:17:53 +00:00
def processMetaData(base_dir: str, nickname: str, domain: str,
2021-05-09 19:11:05 +00:00
imageFilename: str, outputFilename: str,
2021-12-25 17:13:38 +00:00
city: str, content_license_url: str) -> None:
2021-05-09 12:17:55 +00:00
"""Handles image metadata. This tries to spoof the metadata
if possible, but otherwise just removes it
"""
2021-05-09 19:11:05 +00:00
# first remove the metadata
2021-06-12 16:57:57 +00:00
_removeMetaData(imageFilename, outputFilename)
2021-05-09 12:17:55 +00:00
2021-05-09 19:11:05 +00:00
# now add some spoofed data to misdirect surveillance capitalists
2021-12-25 16:17:53 +00:00
_spoofMetaData(base_dir, nickname, domain, outputFilename, city,
2021-12-25 17:13:38 +00:00
content_license_url)
2021-05-09 19:11:05 +00:00
2021-05-09 12:17:55 +00:00
def _isMedia(imageFilename: str) -> bool:
2021-03-06 23:03:14 +00:00
"""Is the given file a media file?
"""
2021-03-06 23:07:54 +00:00
if not os.path.isfile(imageFilename):
print('WARN: Media file does not exist ' + imageFilename)
2021-03-06 23:03:14 +00:00
return False
2020-11-21 11:54:29 +00:00
permittedMedia = getMediaExtensions()
2020-03-22 21:16:02 +00:00
for m in permittedMedia:
2020-04-03 16:55:55 +00:00
if imageFilename.endswith('.' + m):
2019-08-30 18:01:29 +00:00
return True
2020-04-03 16:55:55 +00:00
print('WARN: ' + imageFilename + ' is not a permitted media type')
2019-07-12 19:08:46 +00:00
return False
2020-04-03 16:55:55 +00:00
2021-12-25 16:17:53 +00:00
def createMediaDirs(base_dir: str, mediaPath: str) -> None:
if not os.path.isdir(base_dir + '/media'):
os.mkdir(base_dir + '/media')
if not os.path.isdir(base_dir + '/' + mediaPath):
os.mkdir(base_dir + '/' + mediaPath)
2020-04-03 16:55:55 +00:00
2019-07-12 19:08:46 +00:00
2019-07-12 19:26:54 +00:00
def getMediaPath() -> str:
2020-04-03 16:55:55 +00:00
currTime = datetime.datetime.utcnow()
weeksSinceEpoch = int((currTime - datetime.datetime(1970, 1, 1)).days / 7)
return 'media/' + str(weeksSinceEpoch)
2019-08-30 15:50:20 +00:00
def getAttachmentMediaType(filename: str) -> str:
"""Returns the type of media for the given file
image, video or audio
"""
2020-04-03 16:55:55 +00:00
mediaType = None
2020-11-21 11:54:29 +00:00
imageTypes = getImageExtensions()
2019-08-30 15:50:20 +00:00
for mType in imageTypes:
2020-04-03 16:55:55 +00:00
if filename.endswith('.' + mType):
2019-08-30 15:50:20 +00:00
return 'image'
2020-11-21 11:54:29 +00:00
videoTypes = getVideoExtensions()
2019-08-30 15:50:20 +00:00
for mType in videoTypes:
2020-04-03 16:55:55 +00:00
if filename.endswith('.' + mType):
2019-08-30 15:50:20 +00:00
return 'video'
2020-11-21 11:54:29 +00:00
audioTypes = getAudioExtensions()
2019-08-30 15:50:20 +00:00
for mType in audioTypes:
2020-04-03 16:55:55 +00:00
if filename.endswith('.' + mType):
2019-08-30 15:50:20 +00:00
return 'audio'
return mediaType
2020-04-03 16:55:55 +00:00
def _updateEtag(mediaFilename: str) -> None:
2019-12-04 18:52:27 +00:00
""" calculate the etag, which is a sha1 of the data
"""
# only create etags for media
if '/media/' not in mediaFilename:
return
# check that the media exists
if not os.path.isfile(mediaFilename):
return
# read the binary data
2020-04-03 16:55:55 +00:00
data = None
2019-12-04 18:52:27 +00:00
try:
2019-12-04 18:55:40 +00:00
with open(mediaFilename, 'rb') as mediaFile:
2020-04-03 16:55:55 +00:00
data = mediaFile.read()
2021-11-25 22:22:54 +00:00
except OSError:
2021-10-29 18:48:15 +00:00
print('EX: _updateEtag unable to read ' + str(mediaFilename))
2019-12-04 18:52:27 +00:00
if not data:
return
# calculate hash
2020-07-08 15:17:00 +00:00
etag = sha1(data).hexdigest() # nosec
2019-12-04 18:52:27 +00:00
# save the hash
try:
with open(mediaFilename + '.etag', 'w+') as etagFile:
etagFile.write(etag)
2021-11-25 21:18:53 +00:00
except OSError:
2021-10-29 18:48:15 +00:00
print('EX: _updateEtag unable to write ' +
str(mediaFilename) + '.etag')
2019-12-04 18:52:27 +00:00
2020-04-03 16:55:55 +00:00
2021-12-25 17:09:22 +00:00
def attachMedia(base_dir: str, http_prefix: str,
2021-05-09 12:17:55 +00:00
nickname: str, domain: str, port: int,
2020-04-03 16:55:55 +00:00
postJson: {}, imageFilename: str,
2021-05-09 19:11:05 +00:00
mediaType: str, description: str,
2021-12-25 18:20:56 +00:00
city: str, low_bandwidth: bool,
2021-12-25 17:13:38 +00:00
content_license_url: str) -> {}:
2019-08-30 18:32:34 +00:00
"""Attaches media to a json object post
2019-07-12 19:08:46 +00:00
The description can be None
"""
if not _isMedia(imageFilename):
2019-07-12 19:08:46 +00:00
return postJson
2020-03-22 21:16:02 +00:00
2020-04-03 16:55:55 +00:00
fileExtension = None
2020-11-21 11:54:29 +00:00
acceptedTypes = getMediaExtensions()
2019-08-30 15:50:20 +00:00
for mType in acceptedTypes:
2020-04-03 16:55:55 +00:00
if imageFilename.endswith('.' + mType):
if mType == 'jpg':
mType = 'jpeg'
if mType == 'mp3':
mType = 'mpeg'
fileExtension = mType
2020-03-22 21:16:02 +00:00
if not fileExtension:
2019-08-30 18:01:29 +00:00
return postJson
2020-04-03 16:55:55 +00:00
mediaType = mediaType + '/' + fileExtension
print('Attached media type: ' + mediaType)
2019-08-30 15:50:20 +00:00
2020-04-03 16:55:55 +00:00
if fileExtension == 'jpeg':
fileExtension = 'jpg'
if mediaType == 'audio/mpeg':
fileExtension = 'mp3'
2019-07-12 19:08:46 +00:00
2020-12-16 11:04:46 +00:00
domain = getFullDomain(domain, port)
2019-07-12 19:08:46 +00:00
2020-04-03 16:55:55 +00:00
mPath = getMediaPath()
mediaPath = mPath + '/' + createPassword(32) + '.' + fileExtension
2021-12-25 16:17:53 +00:00
if base_dir:
createMediaDirs(base_dir, mPath)
mediaFilename = base_dir + '/' + mediaPath
2019-07-12 19:08:46 +00:00
2021-10-12 18:20:40 +00:00
mediaPath = \
mediaPath.replace('media/', 'system/media_attachments/files/', 1)
2020-04-03 16:55:55 +00:00
attachmentJson = {
2019-07-12 19:08:46 +00:00
'mediaType': mediaType,
'name': description,
2019-12-04 17:02:38 +00:00
'type': 'Document',
2021-12-25 17:09:22 +00:00
'url': http_prefix + '://' + domain + '/' + mediaPath
2019-07-12 19:08:46 +00:00
}
2020-03-09 17:40:00 +00:00
if mediaType.startswith('image/'):
2021-10-11 17:20:16 +00:00
attachmentJson['blurhash'] = _getBlurHash()
# find the dimensions of the image and add them as metadata
attachImageWidth, attachImageHeight = \
getImageDimensions(imageFilename)
if attachImageWidth and attachImageHeight:
attachmentJson['width'] = attachImageWidth
attachmentJson['height'] = attachImageHeight
2020-04-03 16:55:55 +00:00
postJson['attachment'] = [attachmentJson]
2019-07-12 19:08:46 +00:00
2021-12-25 16:17:53 +00:00
if base_dir:
2020-03-09 17:41:37 +00:00
if mediaType.startswith('image/'):
2021-12-25 18:20:56 +00:00
if low_bandwidth:
2021-08-13 20:18:36 +00:00
convertImageToLowBandwidth(imageFilename)
2021-12-25 16:17:53 +00:00
processMetaData(base_dir, nickname, domain,
2021-11-08 18:09:24 +00:00
imageFilename, mediaFilename, city,
2021-12-25 17:13:38 +00:00
content_license_url)
2019-08-30 19:01:16 +00:00
else:
2020-04-03 16:55:55 +00:00
copyfile(imageFilename, mediaFilename)
_updateEtag(mediaFilename)
2019-12-04 18:52:27 +00:00
2019-07-12 19:08:46 +00:00
return postJson
2020-04-03 16:55:55 +00:00
2021-12-25 23:41:17 +00:00
def archiveMedia(base_dir: str, archive_directory: str, maxWeeks: int) -> None:
2019-07-12 19:51:10 +00:00
"""Any media older than the given number of weeks gets archived
"""
2020-12-08 14:09:54 +00:00
if maxWeeks == 0:
return
2020-04-03 16:55:55 +00:00
currTime = datetime.datetime.utcnow()
weeksSinceEpoch = int((currTime - datetime.datetime(1970, 1, 1)).days/7)
2020-12-08 14:09:54 +00:00
minWeek = weeksSinceEpoch - maxWeeks
2019-07-12 19:51:10 +00:00
2021-12-25 23:41:17 +00:00
if archive_directory:
if not os.path.isdir(archive_directory):
os.mkdir(archive_directory)
if not os.path.isdir(archive_directory + '/media'):
os.mkdir(archive_directory + '/media')
2020-03-22 21:16:02 +00:00
2021-12-25 16:17:53 +00:00
for subdir, dirs, files in os.walk(base_dir + '/media'):
2019-07-12 19:51:10 +00:00
for weekDir in dirs:
2020-04-03 16:55:55 +00:00
if int(weekDir) < minWeek:
2021-12-25 23:41:17 +00:00
if archive_directory:
2021-12-25 16:17:53 +00:00
move(os.path.join(base_dir + '/media', weekDir),
2021-12-25 23:41:17 +00:00
archive_directory + '/media')
2019-07-12 19:55:23 +00:00
else:
# archive to /dev/null
2021-12-25 16:17:53 +00:00
rmtree(os.path.join(base_dir + '/media', weekDir),
2021-10-29 18:48:15 +00:00
ignore_errors=False, onerror=None)
2020-12-13 22:13:45 +00:00
break
2021-06-07 17:55:25 +00:00
def pathIsVideo(path: str) -> bool:
if path.endswith('.ogv') or \
path.endswith('.mp4'):
return True
return False
def pathIsAudio(path: str) -> bool:
if path.endswith('.ogg') or \
path.endswith('.mp3'):
return True
return False
def getImageDimensions(imageFilename: str) -> (int, int):
"""Returns the dimensions of an image file
"""
try:
result = subprocess.run(['identify', '-format', '"%wx%h"',
imageFilename], stdout=subprocess.PIPE)
except BaseException:
2021-10-29 18:48:15 +00:00
print('EX: getImageDimensions unable to run identify command')
return None, None
if not result:
return None, None
dimensionsStr = result.stdout.decode('utf-8').replace('"', '')
if 'x' not in dimensionsStr:
return None, None
widthStr = dimensionsStr.split('x')[0]
if not widthStr.isdigit():
return None, None
heightStr = dimensionsStr.split('x')[1]
if not heightStr.isdigit():
return None, None
return int(widthStr), int(heightStr)