epicyon/media.py

__filename__ = "media.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
__version__ = "1.2.0"
__maintainer__ = "Bob Mottram"
__email__ = "bob@libreserver.org"
__status__ = "Production"
__module_group__ = "Timeline"

import os
import time
import datetime
import subprocess
import random
from random import randint
from hashlib import sha1
from auth import createPassword
from utils import get_base_content_from_post
from utils import get_full_domain
from utils import get_image_extensions
from utils import get_video_extensions
from utils import get_audio_extensions
from utils import get_media_extensions
from utils import has_object_dict
from utils import acct_dir
from shutil import copyfile
from shutil import rmtree
from shutil import move
from city import spoofGeolocation


def _getBlurHash() -> str:
    """You may laugh, but this is a lot less computationally intensive,
    especially on large images, while still providing some visual variety
    in the timeline
    """
    hashes = [
        "UfGuaW01%gRi%MM{azofozo0V@xuozn#ofs.",
        "UFD]o8-;9FIU~qD%j[%M-;j[ofWB?bt7IURj",
        "UyO|v_1#im=s%y#U%OxDwRt3W9R-ogjHj[WX",
        "U96vAQt6H;WBt7ofWBa#MbWBo#j[byaze-oe",
        "UJKA.q01M|IV%LM|RjNGIVj[f6oLjrofaeof",
        "U9MPjn]?~Cxut~.PS1%1xXIo0fEer_$*^jxG",
        "UtLENXWCRjju~qayaeaz00j[ofayIVkCkCfQ",
        "UHGbeg-pbzWZ.ANI$wsQ$H-;E9W?0Nx]?FjE",
        "UcHU%#4n_ND%?bxatRWBIU%MazxtNaRjs:of",
        "ULR:TsWr~6xZofWWf6s-~6oK9eR,oes-WXNJ",
        "U77VQB-:MaMx%L%MogRkMwkCxuoIS*WYjEsl",
        "U%Nm{8R+%MxuE1t6WBNG-=RjoIt6~Vj]RkR*",
        "UCM7u;?boft7oft7ayj[~qt7WBoft7oft7Rj"
    ]
    return random.choice(hashes)


def _replaceSiloDomain(post_json_object: {},
                       siloDomain: str, replacementDomain: str,
                       system_language: str) -> None:
    """Replace a silo domain with a replacement domain
    """
    if not replacementDomain:
        return
    if not has_object_dict(post_json_object):
        return
    if not post_json_object['object'].get('content'):
        return
    contentStr = get_base_content_from_post(post_json_object, system_language)
    if siloDomain not in contentStr:
        return
    contentStr = contentStr.replace(siloDomain, replacementDomain)
    post_json_object['object']['content'] = contentStr
    if post_json_object['object'].get('contentMap'):
        post_json_object['object']['contentMap'][system_language] = contentStr


def replaceYouTube(post_json_object: {}, replacementDomain: str,
                   system_language: str) -> None:
    """Replace YouTube with a replacement domain
    This denies Google some, but not all, tracking data
    """
    _replaceSiloDomain(post_json_object, 'www.youtube.com',
                       replacementDomain, system_language)


def replaceTwitter(post_json_object: {}, replacementDomain: str,
                   system_language: str) -> None:
    """Replace Twitter with a replacement domain
    This allows you to view twitter posts without having a twitter account
    """
    _replaceSiloDomain(post_json_object, 'twitter.com',
                       replacementDomain, system_language)


def _removeMetaData(image_filename: str, outputFilename: str) -> None:
    """Attempts to do this with pure python didn't work well,
    so better to use a dedicated tool if one is installed
    """
    copyfile(image_filename, outputFilename)
    if not os.path.isfile(outputFilename):
        print('ERROR: unable to remove metadata from ' + image_filename)
        return
    if os.path.isfile('/usr/bin/exiftool'):
        print('Removing metadata from ' + outputFilename + ' using exiftool')
        os.system('exiftool -all= ' + outputFilename)  # nosec
    elif os.path.isfile('/usr/bin/mogrify'):
        print('Removing metadata from ' + outputFilename + ' using mogrify')
        os.system('/usr/bin/mogrify -strip ' + outputFilename)  # nosec


def _spoofMetaData(base_dir: str, nickname: str, domain: str,
                   outputFilename: str, spoofCity: str,
                   content_license_url: str) -> None:
    """Spoof image metadata using a decoy model for a given city
    """
    if not os.path.isfile(outputFilename):
        print('ERROR: unable to spoof metadata within ' + outputFilename)
        return

    # get the random seed used to generate a unique pattern for this account
    decoySeedFilename = acct_dir(base_dir, nickname, domain) + '/decoyseed'
    decoySeed = 63725
    if os.path.isfile(decoySeedFilename):
        with open(decoySeedFilename, 'r') as fp:
            decoySeed = int(fp.read())
    else:
        decoySeed = randint(10000, 10000000000000000)
        try:
            with open(decoySeedFilename, 'w+') as fp:
                fp.write(str(decoySeed))
        except OSError:
            print('EX: unable to write ' + decoySeedFilename)

    if os.path.isfile('/usr/bin/exiftool'):
        print('Spoofing metadata in ' + outputFilename + ' using exiftool')
        curr_timeAdjusted = \
            datetime.datetime.utcnow() - \
            datetime.timedelta(minutes=randint(2, 120))
        published = curr_timeAdjusted.strftime("%Y:%m:%d %H:%M:%S+00:00")
        (latitude, longitude, latitudeRef, longitudeRef,
         camMake, camModel, camSerialNumber) = \
            spoofGeolocation(base_dir, spoofCity, curr_timeAdjusted,
                             decoySeed, None, None)
        if os.system('exiftool -artist=@"' + nickname + '@' + domain + '" ' +
                     '-Make="' + camMake + '" ' +
                     '-Model="' + camModel + '" ' +
                     '-Comment="' + str(camSerialNumber) + '" ' +
                     '-DateTimeOriginal="' + published + '" ' +
                     '-FileModifyDate="' + published + '" ' +
                     '-CreateDate="' + published + '" ' +
                     '-GPSLongitudeRef=' + longitudeRef + ' ' +
                     '-GPSAltitude=0 ' +
                     '-GPSLongitude=' + str(longitude) + ' ' +
                     '-GPSLatitudeRef=' + latitudeRef + ' ' +
                     '-GPSLatitude=' + str(latitude) + ' ' +
                     '-copyright="' + content_license_url + '" ' +
                     '-Comment="" ' +
                     outputFilename) != 0:  # nosec
            print('ERROR: exiftool failed to run')
    else:
        print('ERROR: exiftool is not installed')
        return


def convertImageToLowBandwidth(image_filename: str) -> None:
    """Converts an image to a low bandwidth version
    """
    low_bandwidthFilename = image_filename + '.low'
    if os.path.isfile(low_bandwidthFilename):
        try:
            os.remove(low_bandwidthFilename)
        except OSError:
            print('EX: convertImageToLowBandwidth unable to delete ' +
                  low_bandwidthFilename)

    cmd = \
        '/usr/bin/convert +noise Multiplicative ' + \
        '-evaluate median 10% -dither Floyd-Steinberg ' + \
        '-monochrome  ' + image_filename + ' ' + low_bandwidthFilename
    print('Low bandwidth image conversion: ' + cmd)
    subprocess.call(cmd, shell=True)
    # wait for conversion to happen
    ctr = 0
    while not os.path.isfile(low_bandwidthFilename):
        print('Waiting for low bandwidth image conversion ' + str(ctr))
        time.sleep(0.2)
        ctr += 1
        if ctr > 100:
            print('WARN: timed out waiting for low bandwidth image conversion')
            break
    if os.path.isfile(low_bandwidthFilename):
        try:
            os.remove(image_filename)
        except OSError:
            print('EX: convertImageToLowBandwidth unable to delete ' +
                  image_filename)
        os.rename(low_bandwidthFilename, image_filename)
        if os.path.isfile(image_filename):
            print('Image converted to low bandwidth ' + image_filename)
    else:
        print('Low bandwidth converted image not found: ' +
              low_bandwidthFilename)


def processMetaData(base_dir: str, nickname: str, domain: str,
                    image_filename: str, outputFilename: str,
                    city: str, content_license_url: str) -> None:
    """Handles image metadata. This tries to spoof the metadata
    if possible, but otherwise just removes it
    """
    # first remove the metadata
    _removeMetaData(image_filename, outputFilename)

    # now add some spoofed data to misdirect surveillance capitalists
    _spoofMetaData(base_dir, nickname, domain, outputFilename, city,
                   content_license_url)


def _isMedia(image_filename: str) -> bool:
    """Is the given file a media file?
    """
    if not os.path.isfile(image_filename):
        print('WARN: Media file does not exist ' + image_filename)
        return False
    permittedMedia = get_media_extensions()
    for m in permittedMedia:
        if image_filename.endswith('.' + m):
            return True
    print('WARN: ' + image_filename + ' is not a permitted media type')
    return False


def createMediaDirs(base_dir: str, mediaPath: str) -> None:
    if not os.path.isdir(base_dir + '/media'):
        os.mkdir(base_dir + '/media')
    if not os.path.isdir(base_dir + '/' + mediaPath):
        os.mkdir(base_dir + '/' + mediaPath)


def getMediaPath() -> str:
    curr_time = datetime.datetime.utcnow()
    weeksSinceEpoch = int((curr_time - datetime.datetime(1970, 1, 1)).days / 7)
    return 'media/' + str(weeksSinceEpoch)


def getAttachmentMediaType(filename: str) -> str:
    """Returns the type of media for the given file
    image, video or audio
    """
    mediaType = None
    imageTypes = get_image_extensions()
    for mType in imageTypes:
        if filename.endswith('.' + mType):
            return 'image'
    videoTypes = get_video_extensions()
    for mType in videoTypes:
        if filename.endswith('.' + mType):
            return 'video'
    audioTypes = get_audio_extensions()
    for mType in audioTypes:
        if filename.endswith('.' + mType):
            return 'audio'
    return mediaType


def _updateEtag(mediaFilename: str) -> None:
    """ calculate the etag, which is a sha1 of the data
    """
    # only create etags for media
    if '/media/' not in mediaFilename:
        return

    # check that the media exists
    if not os.path.isfile(mediaFilename):
        return

    # read the binary data
    data = None
    try:
        with open(mediaFilename, 'rb') as mediaFile:
            data = mediaFile.read()
    except OSError:
        print('EX: _updateEtag unable to read ' + str(mediaFilename))

    if not data:
        return
    # calculate hash
    etag = sha1(data).hexdigest()  # nosec
    # save the hash
    try:
        with open(mediaFilename + '.etag', 'w+') as etagFile:
            etagFile.write(etag)
    except OSError:
        print('EX: _updateEtag unable to write ' +
              str(mediaFilename) + '.etag')


def attachMedia(base_dir: str, http_prefix: str,
                nickname: str, domain: str, port: int,
                postJson: {}, image_filename: str,
                mediaType: str, description: str,
                city: str, low_bandwidth: bool,
                content_license_url: str) -> {}:
    """Attaches media to a json object post
    The description can be None
    """
    if not _isMedia(image_filename):
        return postJson

    fileExtension = None
    acceptedTypes = get_media_extensions()
    for mType in acceptedTypes:
        if image_filename.endswith('.' + mType):
            if mType == 'jpg':
                mType = 'jpeg'
            if mType == 'mp3':
                mType = 'mpeg'
            fileExtension = mType
    if not fileExtension:
        return postJson
    mediaType = mediaType + '/' + fileExtension
    print('Attached media type: ' + mediaType)

    if fileExtension == 'jpeg':
        fileExtension = 'jpg'
    if mediaType == 'audio/mpeg':
        fileExtension = 'mp3'

    domain = get_full_domain(domain, port)

    mPath = getMediaPath()
    mediaPath = mPath + '/' + createPassword(32) + '.' + fileExtension
    if base_dir:
        createMediaDirs(base_dir, mPath)
        mediaFilename = base_dir + '/' + mediaPath

    mediaPath = \
        mediaPath.replace('media/', 'system/media_attachments/files/', 1)
    attachmentJson = {
        'mediaType': mediaType,
        'name': description,
        'type': 'Document',
        'url': http_prefix + '://' + domain + '/' + mediaPath
    }
    if mediaType.startswith('image/'):
        attachmentJson['blurhash'] = _getBlurHash()
        # find the dimensions of the image and add them as metadata
        attachImageWidth, attachImageHeight = \
            getImageDimensions(image_filename)
        if attachImageWidth and attachImageHeight:
            attachmentJson['width'] = attachImageWidth
            attachmentJson['height'] = attachImageHeight

    postJson['attachment'] = [attachmentJson]

    if base_dir:
        if mediaType.startswith('image/'):
            if low_bandwidth:
                convertImageToLowBandwidth(image_filename)
            processMetaData(base_dir, nickname, domain,
                            image_filename, mediaFilename, city,
                            content_license_url)
        else:
            copyfile(image_filename, mediaFilename)
        _updateEtag(mediaFilename)

    return postJson


def archiveMedia(base_dir: str, archive_directory: str, maxWeeks: int) -> None:
    """Any media older than the given number of weeks gets archived
    """
    if maxWeeks == 0:
        return

    curr_time = datetime.datetime.utcnow()
    weeksSinceEpoch = int((curr_time - datetime.datetime(1970, 1, 1)).days/7)
    minWeek = weeksSinceEpoch - maxWeeks

    if archive_directory:
        if not os.path.isdir(archive_directory):
            os.mkdir(archive_directory)
        if not os.path.isdir(archive_directory + '/media'):
            os.mkdir(archive_directory + '/media')

    for subdir, dirs, files in os.walk(base_dir + '/media'):
        for weekDir in dirs:
            if int(weekDir) < minWeek:
                if archive_directory:
                    move(os.path.join(base_dir + '/media', weekDir),
                         archive_directory + '/media')
                else:
                    # archive to /dev/null
                    rmtree(os.path.join(base_dir + '/media', weekDir),
                           ignore_errors=False, onerror=None)
        break


def pathIsVideo(path: str) -> bool:
    if path.endswith('.ogv') or \
       path.endswith('.mp4'):
        return True
    return False


def pathIsAudio(path: str) -> bool:
    if path.endswith('.ogg') or \
       path.endswith('.mp3'):
        return True
    return False


def getImageDimensions(image_filename: str) -> (int, int):
    """Returns the dimensions of an image file
    """
    try:
        result = subprocess.run(['identify', '-format', '"%wx%h"',
                                 image_filename], stdout=subprocess.PIPE)
    except BaseException:
        print('EX: getImageDimensions unable to run identify command')
        return None, None
    if not result:
        return None, None
    dimensionsStr = result.stdout.decode('utf-8').replace('"', '')
    if 'x' not in dimensionsStr:
        return None, None
    widthStr = dimensionsStr.split('x')[0]
    if not widthStr.isdigit():
        return None, None
    heightStr = dimensionsStr.split('x')[1]
    if not heightStr.isdigit():
        return None, None
    return int(widthStr), int(heightStr)