epicyon/media.py

417 lines
15 KiB
Python
Raw Normal View History

2020-04-03 16:55:55 +00:00
__filename__ = "media.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
2021-01-26 10:07:42 +00:00
__version__ = "1.2.0"
2020-04-03 16:55:55 +00:00
__maintainer__ = "Bob Mottram"
__email__ = "bob@freedombone.net"
__status__ = "Production"
2019-07-12 19:08:46 +00:00
import os
import datetime
2021-05-10 10:46:45 +00:00
import random
import math
2021-05-09 12:17:55 +00:00
from random import randint
2019-12-04 18:52:27 +00:00
from hashlib import sha1
2019-07-12 19:08:46 +00:00
from auth import createPassword
2020-12-16 11:04:46 +00:00
from utils import getFullDomain
2020-11-21 11:54:29 +00:00
from utils import getImageExtensions
from utils import getVideoExtensions
from utils import getAudioExtensions
from utils import getMediaExtensions
2019-07-12 19:08:46 +00:00
from shutil import copyfile
2019-07-12 19:51:10 +00:00
from shutil import rmtree
2019-07-12 19:55:23 +00:00
from shutil import move
2019-07-12 19:08:46 +00:00
2020-04-03 16:55:55 +00:00
def replaceYouTube(postJsonObject: {}, replacementDomain: str) -> None:
"""Replace YouTube with a replacement domain
This denies Google some, but not all, tracking data
"""
if not replacementDomain:
return
if not isinstance(postJsonObject['object'], dict):
return
if not postJsonObject['object'].get('content'):
return
if 'www.youtube.com' not in postJsonObject['object']['content']:
return
2020-04-03 16:55:55 +00:00
postJsonObject['object']['content'] = \
postJsonObject['object']['content'].replace('www.youtube.com',
replacementDomain)
2020-04-03 16:55:55 +00:00
2021-05-09 12:17:55 +00:00
def _removeMetaData(imageFilename: str, outputFilename: str) -> None:
2020-01-08 14:53:28 +00:00
"""Attempts to do this with pure python didn't work well,
so better to use a dedicated tool if one is installed
2020-01-08 14:31:25 +00:00
"""
2020-04-03 16:55:55 +00:00
copyfile(imageFilename, outputFilename)
2020-07-08 14:32:11 +00:00
if not os.path.isfile(outputFilename):
print('ERROR: unable to remove metadata from ' + imageFilename)
return
2020-01-08 14:53:28 +00:00
if os.path.isfile('/usr/bin/exiftool'):
2020-04-03 16:55:55 +00:00
print('Removing metadata from ' + outputFilename + ' using exiftool')
2020-07-08 15:17:00 +00:00
os.system('exiftool -all= ' + outputFilename) # nosec
2020-01-08 14:53:28 +00:00
elif os.path.isfile('/usr/bin/mogrify'):
2020-04-03 16:55:55 +00:00
print('Removing metadata from ' + outputFilename + ' using mogrify')
2020-07-08 15:17:00 +00:00
os.system('/usr/bin/mogrify -strip ' + outputFilename) # nosec
2019-07-24 13:14:23 +00:00
2020-04-03 16:55:55 +00:00
2021-05-10 11:19:42 +00:00
def _getCityPulse(currTimeOfDay, decoySeed: int) -> (float, float):
"""The data decoy
2021-05-10 10:46:45 +00:00
This simulates expected average patterns of movement in a city.
Jane or Joe average lives and works in the city, commuting in
and out of the central district for work. They have a unique
life pattern, which machine learning can latch onto.
2021-05-10 11:19:42 +00:00
This returns a polar coordinate:
Distance from the city centre is in the range 0.0 - 1.0
Angle is in radians
2021-05-10 10:46:45 +00:00
"""
2021-05-10 11:19:42 +00:00
randgen = random.Random(decoySeed)
2021-05-10 10:46:45 +00:00
variance = 3
busyStates = ("work", "shop", "play", "party")
2021-05-10 11:19:42 +00:00
dataDecoyState = "sleep"
dataDecoyIndex = 0
2021-05-10 10:46:45 +00:00
weekday = currTimeOfDay.weekday()
minHour = 7 + randint(0, variance)
maxHour = 17 + randint(0, variance)
if currTimeOfDay.hour > minHour:
if currTimeOfDay.hour <= maxHour:
if weekday < 5:
2021-05-10 11:19:42 +00:00
dataDecoyState = "work"
dataDecoyIndex = 1
2021-05-10 10:46:45 +00:00
elif weekday == 5:
2021-05-10 11:19:42 +00:00
dataDecoyState = "shop"
dataDecoyIndex = 2
2021-05-10 10:46:45 +00:00
else:
2021-05-10 11:19:42 +00:00
dataDecoyState = "play"
dataDecoyIndex = 3
2021-05-10 10:46:45 +00:00
else:
if weekday < 5:
2021-05-10 11:19:42 +00:00
dataDecoyState = "evening"
dataDecoyIndex = 4
2021-05-10 10:46:45 +00:00
else:
2021-05-10 11:19:42 +00:00
dataDecoyState = "party"
dataDecoyIndex = 5
2021-05-10 10:48:27 +00:00
angleRadians = \
2021-05-10 11:19:42 +00:00
(randgen.randint(0, 100000 - 5 + dataDecoyIndex) / 100000) * \
2021-05-10 10:48:27 +00:00
2 * math.pi
2021-05-10 11:19:42 +00:00
# some people are quite random, others have more predictable habits
2021-05-10 13:43:38 +00:00
decoyRandomness = randgen.randint(1, 3)
2021-05-10 11:19:42 +00:00
# occasionally throw in a wildcard to keep the machine learning guessing
if randint(0, 100) < decoyRandomness:
distanceFromCityCenter = (randint(0, 100000) / 100000)
angleRadians = (randint(0, 100000) / 100000) * 2 * math.pi
2021-05-10 10:46:45 +00:00
else:
2021-05-10 11:19:42 +00:00
# what consitutes the central district is fuzzy
centralDistrictFuzz = (randgen.randint(0, 100000) / 100000) * 0.1
busyRadius = 0.3 + centralDistrictFuzz
if dataDecoyState in busyStates:
# if we are busy then we're somewhere in the city center
distanceFromCityCenter = \
(randgen.randint(0, 100000) / 100000) * busyRadius
else:
# otherwise we're in the burbs
distanceFromCityCenter = busyRadius + \
((1.0 - busyRadius) * (randgen.randint(0, 100000) / 100000))
2021-05-10 10:46:45 +00:00
return distanceFromCityCenter, angleRadians
2021-05-09 19:11:05 +00:00
def spoofGeolocation(baseDir: str,
2021-05-10 11:19:42 +00:00
city: str, currTime, decoySeed: int,
2021-05-09 19:11:05 +00:00
citiesList: []) -> (float, float, str, str):
"""Given a city and the current time spoofs the location
for an image
returns latitude, longitude, N/S, E/W
"""
locationsFilename = baseDir + '/custom_locations.txt'
if not os.path.isfile(locationsFilename):
locationsFilename = baseDir + '/locations.txt'
2021-05-10 10:46:45 +00:00
cityRadius = 0.1
2021-05-10 13:43:38 +00:00
variance = 0.001
2021-05-09 19:29:53 +00:00
default_latitude = 51.8744
default_longitude = 0.368333
2021-05-09 19:11:05 +00:00
default_latdirection = 'N'
2021-05-09 19:29:53 +00:00
default_longdirection = 'W'
2021-05-09 19:11:05 +00:00
if citiesList:
cities = citiesList
else:
if not os.path.isfile(locationsFilename):
return (default_latitude, default_longitude,
default_latdirection, default_longdirection)
cities = []
with open(locationsFilename, "r") as f:
cities = f.readlines()
city = city.lower()
for cityName in cities:
if city in cityName.lower():
latitude = cityName.split(':')[1]
longitude = cityName.split(':')[2]
latdirection = 'N'
longdirection = 'E'
if 'S' in latitude:
latdirection = 'S'
latitude = latitude.replace('S', '')
if 'W' in longitude:
longdirection = 'W'
longitude = longitude.replace('W', '')
2021-05-10 10:46:45 +00:00
latitude = float(latitude)
longitude = float(longitude)
# get the time of day at the city
approxTimeZone = int(longitude / 15.0)
if longdirection == 'E':
approxTimeZone = -approxTimeZone
currTimeAdjusted = currTime - \
datetime.timedelta(hours=approxTimeZone)
# patterns of activity change in the city over time
(distanceFromCityCenter, angleRadians) = \
2021-05-10 11:19:42 +00:00
_getCityPulse(currTimeAdjusted, decoySeed)
2021-05-10 10:46:45 +00:00
# Get the position within the city, with some randomness added
latitude += \
distanceFromCityCenter * cityRadius * math.cos(angleRadians)
2021-05-10 10:58:02 +00:00
# add a small amount of variance around the location
2021-05-10 10:58:36 +00:00
fraction = randint(0, 100000) / 100000
2021-05-10 10:46:45 +00:00
latitude += (fraction * fraction * variance) - (variance / 2.0)
longitude += \
distanceFromCityCenter * cityRadius * math.sin(angleRadians)
2021-05-10 10:58:02 +00:00
# add a small amount of variance around the location
2021-05-10 10:58:36 +00:00
fraction = randint(0, 100000) / 100000
2021-05-10 10:46:45 +00:00
longitude += (fraction * fraction * variance) - (variance / 2.0)
2021-05-10 10:59:31 +00:00
2021-05-10 10:58:02 +00:00
# gps locations aren't transcendental, so round to a fixed
# number of decimal places
2021-05-10 10:59:31 +00:00
latitude = int(latitude * 10000) / 10000.0
2021-05-09 19:11:05 +00:00
longitude = int(longitude * 10000) / 10000.0
return latitude, longitude, latdirection, longdirection
return (default_latitude, default_longitude,
default_latdirection, default_longdirection)
2021-05-10 10:46:45 +00:00
def _spoofMetaData(baseDir: str, nickname: str, domain: str,
2021-05-09 19:11:05 +00:00
outputFilename: str, spoofCity: str) -> None:
2021-05-10 11:25:03 +00:00
"""Spoof image metadata using a decoy model for a given city
2021-05-09 12:17:55 +00:00
"""
if not os.path.isfile(outputFilename):
2021-05-09 19:11:05 +00:00
print('ERROR: unable to spoof metadata within ' + outputFilename)
2021-05-09 12:17:55 +00:00
return
2021-05-10 10:46:45 +00:00
# get the random seed used to generate a unique pattern for this account
2021-05-10 11:19:42 +00:00
decoySeedFilename = \
baseDir + '/accounts/' + nickname + '@' + domain + '/decoyseed'
decoySeed = 63725
if os.path.isfile(decoySeedFilename):
with open(decoySeedFilename, 'r') as fp:
decoySeed = int(fp.read())
2021-05-10 10:46:45 +00:00
else:
2021-05-10 11:34:50 +00:00
decoySeed = randint(10000, 10000000000000000)
try:
with open(decoySeedFilename, 'w+') as fp:
fp.write(str(decoySeed))
except BaseException:
pass
2021-05-10 10:46:45 +00:00
2021-05-09 12:17:55 +00:00
if os.path.isfile('/usr/bin/exiftool'):
print('Spoofing metadata in ' + outputFilename + ' using exiftool')
2021-05-09 14:05:52 +00:00
currTimeAdjusted = \
datetime.datetime.utcnow() - \
datetime.timedelta(minutes=randint(2, 120))
published = currTimeAdjusted.strftime("%Y:%m:%d %H:%M:%S+00:00")
2021-05-09 19:11:05 +00:00
(latitude, longitude, latitudeRef, longitudeRef) = \
2021-05-10 10:46:45 +00:00
spoofGeolocation(baseDir, spoofCity, currTimeAdjusted,
2021-05-10 11:19:42 +00:00
decoySeed, None)
2021-05-09 14:10:34 +00:00
os.system('exiftool -artist="' + nickname + '" ' +
2021-05-09 19:11:05 +00:00
'-DateTimeOriginal="' + published + '" ' +
'-FileModifyDate="' + published + '" ' +
'-CreateDate="' + published + '" ' +
'-GPSLongitudeRef=' + longitudeRef + ' ' +
'-GPSAltitude=0 ' +
'-GPSLongitude=' + str(longitude) + ' ' +
'-GPSLatitudeRef=' + latitudeRef + ' ' +
'-GPSLatitude=' + str(latitude) + ' ' +
'-Comment="" ' +
2021-05-09 14:10:34 +00:00
outputFilename) # nosec
2021-05-09 12:17:55 +00:00
else:
print('ERROR: exiftool is not installed')
return
2021-05-10 10:46:45 +00:00
def processMetaData(baseDir: str, nickname: str, domain: str,
2021-05-09 19:11:05 +00:00
imageFilename: str, outputFilename: str,
city: str) -> None:
2021-05-09 12:17:55 +00:00
"""Handles image metadata. This tries to spoof the metadata
if possible, but otherwise just removes it
"""
2021-05-09 19:11:05 +00:00
# first remove the metadata
2021-05-09 12:17:55 +00:00
_removeMetaData(imageFilename, outputFilename)
2021-05-09 19:11:05 +00:00
# now add some spoofed data to misdirect surveillance capitalists
2021-05-10 10:46:45 +00:00
_spoofMetaData(baseDir, nickname, domain, outputFilename, city)
2021-05-09 19:11:05 +00:00
2021-05-09 12:17:55 +00:00
def _isMedia(imageFilename: str) -> bool:
2021-03-06 23:03:14 +00:00
"""Is the given file a media file?
"""
2021-03-06 23:07:54 +00:00
if not os.path.isfile(imageFilename):
print('WARN: Media file does not exist ' + imageFilename)
2021-03-06 23:03:14 +00:00
return False
2020-11-21 11:54:29 +00:00
permittedMedia = getMediaExtensions()
2020-03-22 21:16:02 +00:00
for m in permittedMedia:
2020-04-03 16:55:55 +00:00
if imageFilename.endswith('.' + m):
2019-08-30 18:01:29 +00:00
return True
2020-04-03 16:55:55 +00:00
print('WARN: ' + imageFilename + ' is not a permitted media type')
2019-07-12 19:08:46 +00:00
return False
2020-04-03 16:55:55 +00:00
def createMediaDirs(baseDir: str, mediaPath: str) -> None:
if not os.path.isdir(baseDir + '/media'):
os.mkdir(baseDir + '/media')
if not os.path.isdir(baseDir + '/' + mediaPath):
os.mkdir(baseDir + '/' + mediaPath)
2019-07-12 19:08:46 +00:00
2019-07-12 19:26:54 +00:00
def getMediaPath() -> str:
2020-04-03 16:55:55 +00:00
currTime = datetime.datetime.utcnow()
weeksSinceEpoch = int((currTime - datetime.datetime(1970, 1, 1)).days / 7)
return 'media/' + str(weeksSinceEpoch)
2019-08-30 15:50:20 +00:00
def getAttachmentMediaType(filename: str) -> str:
"""Returns the type of media for the given file
image, video or audio
"""
2020-04-03 16:55:55 +00:00
mediaType = None
2020-11-21 11:54:29 +00:00
imageTypes = getImageExtensions()
2019-08-30 15:50:20 +00:00
for mType in imageTypes:
2020-04-03 16:55:55 +00:00
if filename.endswith('.' + mType):
2019-08-30 15:50:20 +00:00
return 'image'
2020-11-21 11:54:29 +00:00
videoTypes = getVideoExtensions()
2019-08-30 15:50:20 +00:00
for mType in videoTypes:
2020-04-03 16:55:55 +00:00
if filename.endswith('.' + mType):
2019-08-30 15:50:20 +00:00
return 'video'
2020-11-21 11:54:29 +00:00
audioTypes = getAudioExtensions()
2019-08-30 15:50:20 +00:00
for mType in audioTypes:
2020-04-03 16:55:55 +00:00
if filename.endswith('.' + mType):
2019-08-30 15:50:20 +00:00
return 'audio'
return mediaType
2020-04-03 16:55:55 +00:00
def _updateEtag(mediaFilename: str) -> None:
2019-12-04 18:52:27 +00:00
""" calculate the etag, which is a sha1 of the data
"""
# only create etags for media
if '/media/' not in mediaFilename:
return
# check that the media exists
if not os.path.isfile(mediaFilename):
return
# read the binary data
2020-04-03 16:55:55 +00:00
data = None
2019-12-04 18:52:27 +00:00
try:
2019-12-04 18:55:40 +00:00
with open(mediaFilename, 'rb') as mediaFile:
2020-04-03 16:55:55 +00:00
data = mediaFile.read()
except BaseException:
2019-12-04 18:52:27 +00:00
pass
if not data:
return
# calculate hash
2020-07-08 15:17:00 +00:00
etag = sha1(data).hexdigest() # nosec
2019-12-04 18:52:27 +00:00
# save the hash
try:
2020-07-12 20:04:58 +00:00
with open(mediaFilename + '.etag', 'w+') as etagFile:
2019-12-04 18:52:27 +00:00
etagFile.write(etag)
2020-04-03 16:55:55 +00:00
except BaseException:
2019-12-04 18:52:27 +00:00
pass
2020-04-03 16:55:55 +00:00
2021-05-09 12:17:55 +00:00
def attachMedia(baseDir: str, httpPrefix: str,
nickname: str, domain: str, port: int,
2020-04-03 16:55:55 +00:00
postJson: {}, imageFilename: str,
2021-05-09 19:11:05 +00:00
mediaType: str, description: str,
city: str) -> {}:
2019-08-30 18:32:34 +00:00
"""Attaches media to a json object post
2019-07-12 19:08:46 +00:00
The description can be None
"""
if not _isMedia(imageFilename):
2019-07-12 19:08:46 +00:00
return postJson
2020-03-22 21:16:02 +00:00
2020-04-03 16:55:55 +00:00
fileExtension = None
2020-11-21 11:54:29 +00:00
acceptedTypes = getMediaExtensions()
2019-08-30 15:50:20 +00:00
for mType in acceptedTypes:
2020-04-03 16:55:55 +00:00
if imageFilename.endswith('.' + mType):
if mType == 'jpg':
mType = 'jpeg'
if mType == 'mp3':
mType = 'mpeg'
fileExtension = mType
2020-03-22 21:16:02 +00:00
if not fileExtension:
2019-08-30 18:01:29 +00:00
return postJson
2020-04-03 16:55:55 +00:00
mediaType = mediaType + '/' + fileExtension
print('Attached media type: ' + mediaType)
2019-08-30 15:50:20 +00:00
2020-04-03 16:55:55 +00:00
if fileExtension == 'jpeg':
fileExtension = 'jpg'
if mediaType == 'audio/mpeg':
fileExtension = 'mp3'
2019-07-12 19:08:46 +00:00
2020-12-16 11:04:46 +00:00
domain = getFullDomain(domain, port)
2019-07-12 19:08:46 +00:00
2020-04-03 16:55:55 +00:00
mPath = getMediaPath()
mediaPath = mPath + '/' + createPassword(32) + '.' + fileExtension
2019-07-16 10:19:04 +00:00
if baseDir:
2020-04-03 16:55:55 +00:00
createMediaDirs(baseDir, mPath)
mediaFilename = baseDir + '/' + mediaPath
2019-07-12 19:08:46 +00:00
2020-04-03 16:55:55 +00:00
attachmentJson = {
2019-07-12 19:08:46 +00:00
'mediaType': mediaType,
'name': description,
2019-12-04 17:02:38 +00:00
'type': 'Document',
2020-04-03 16:55:55 +00:00
'url': httpPrefix + '://' + domain + '/' + mediaPath
2019-07-12 19:08:46 +00:00
}
2020-03-09 17:40:00 +00:00
if mediaType.startswith('image/'):
2020-04-03 16:55:55 +00:00
attachmentJson['focialPoint'] = [0.0, 0.0]
postJson['attachment'] = [attachmentJson]
2019-07-12 19:08:46 +00:00
2019-08-30 19:01:16 +00:00
if baseDir:
2020-03-09 17:41:37 +00:00
if mediaType.startswith('image/'):
2021-05-10 10:46:45 +00:00
processMetaData(baseDir, nickname, domain,
2021-05-09 19:11:05 +00:00
imageFilename, mediaFilename, city)
2019-08-30 19:01:16 +00:00
else:
2020-04-03 16:55:55 +00:00
copyfile(imageFilename, mediaFilename)
_updateEtag(mediaFilename)
2019-12-04 18:52:27 +00:00
2019-07-12 19:08:46 +00:00
return postJson
2020-04-03 16:55:55 +00:00
def archiveMedia(baseDir: str, archiveDirectory: str, maxWeeks=4) -> None:
2019-07-12 19:51:10 +00:00
"""Any media older than the given number of weeks gets archived
"""
2020-12-08 14:09:54 +00:00
if maxWeeks == 0:
return
2020-04-03 16:55:55 +00:00
currTime = datetime.datetime.utcnow()
weeksSinceEpoch = int((currTime - datetime.datetime(1970, 1, 1)).days/7)
2020-12-08 14:09:54 +00:00
minWeek = weeksSinceEpoch - maxWeeks
2019-07-12 19:51:10 +00:00
2019-07-12 20:43:55 +00:00
if archiveDirectory:
if not os.path.isdir(archiveDirectory):
os.mkdir(archiveDirectory)
2020-04-03 16:55:55 +00:00
if not os.path.isdir(archiveDirectory + '/media'):
os.mkdir(archiveDirectory + '/media')
2020-03-22 21:16:02 +00:00
2020-04-03 16:55:55 +00:00
for subdir, dirs, files in os.walk(baseDir + '/media'):
2019-07-12 19:51:10 +00:00
for weekDir in dirs:
2020-04-03 16:55:55 +00:00
if int(weekDir) < minWeek:
2019-07-12 19:55:23 +00:00
if archiveDirectory:
2020-04-03 16:55:55 +00:00
move(os.path.join(baseDir + '/media', weekDir),
archiveDirectory + '/media')
2019-07-12 19:55:23 +00:00
else:
# archive to /dev/null
2020-04-03 16:55:55 +00:00
rmtree(os.path.join(baseDir + '/media', weekDir))
2020-12-13 22:13:45 +00:00
break