epicyon/media.py

861 lines
28 KiB
Python
Raw Normal View History

2020-04-03 16:55:55 +00:00
__filename__ = "media.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
2024-01-21 19:01:20 +00:00
__version__ = "1.5.0"
2020-04-03 16:55:55 +00:00
__maintainer__ = "Bob Mottram"
2021-09-10 16:14:50 +00:00
__email__ = "bob@libreserver.org"
2020-04-03 16:55:55 +00:00
__status__ = "Production"
2021-06-15 15:08:12 +00:00
__module_group__ = "Timeline"
2019-07-12 19:08:46 +00:00
import os
2021-08-13 19:09:38 +00:00
import time
2019-07-12 19:08:46 +00:00
import datetime
import subprocess
2021-10-11 17:20:16 +00:00
import random
2021-05-09 12:17:55 +00:00
from random import randint
2019-12-04 18:52:27 +00:00
from hashlib import sha1
2021-12-28 21:36:27 +00:00
from auth import create_password
2023-11-20 22:27:58 +00:00
from utils import date_epoch
from utils import date_utcnow
from utils import safe_system_string
2021-12-26 11:29:40 +00:00
from utils import get_base_content_from_post
2021-12-26 12:45:03 +00:00
from utils import get_full_domain
2021-12-26 14:26:16 +00:00
from utils import get_image_extensions
2021-12-26 14:20:09 +00:00
from utils import get_video_extensions
2021-12-26 14:24:03 +00:00
from utils import get_audio_extensions
2021-12-26 14:39:49 +00:00
from utils import get_media_extensions
2021-12-26 10:57:03 +00:00
from utils import has_object_dict
2021-12-26 12:02:29 +00:00
from utils import acct_dir
from utils import get_watermark_file
2019-07-12 19:08:46 +00:00
from shutil import copyfile
2019-07-12 19:51:10 +00:00
from shutil import rmtree
2019-07-12 19:55:23 +00:00
from shutil import move
2021-12-29 21:55:09 +00:00
from city import spoof_geolocation
2019-07-12 19:08:46 +00:00
2020-04-03 16:55:55 +00:00
2022-05-20 17:01:12 +00:00
# music file ID3 v1 genres
music_genre = {
0: "Blues",
96: "Big Band",
1: "Classic Rock",
97: "Chorus",
2: "Country",
98: "Easy Listening",
3: "Dance",
99: "Acoustic",
4: "Disco",
100: "Humour",
5: "Funk",
101: "Speech",
6: "Grunge",
102: "Chanson",
7: "Hip Hop",
103: "Opera",
8: "Jazz",
104: "Chamber Music",
9: "Metal",
105: "Sonata",
10: "New Age",
106: "Symphony",
11: "Oldies",
107: "Booty Bass",
12: "Other",
108: "Primus",
13: "Pop",
109: "Porn Groove",
14: "RnB",
110: "Satire",
15: "Rap",
111: "Slow Jam",
16: "Reggae",
112: "Club",
17: "Rock",
113: "Tango",
18: "Techno",
114: "Samba",
19: "Industrial",
115: "Folklore",
20: "Alternative",
116: "Ballad",
21: "Ska",
117: "Power Ballad",
22: "Death Metal",
118: "Rhythmic Soul",
23: "Pranks",
119: "Freestyle",
24: "Soundtrack",
120: "Duet",
25: "Euro-Techno",
121: "Punk Rock",
26: "Ambient",
122: "Drum Solo",
27: "Trip Hop",
123: "A Cappella",
28: "Vocal",
124: "Euro House",
29: "Jazz Funk",
125: "Dance Hall",
30: "Fusion",
126: "Goa",
31: "Trance",
127: "Drum and Bass",
32: "Classical",
128: "Club House",
33: "Instrumental",
129: "Hardcore",
34: "Acid",
130: "Terror",
35: "House",
131: "Indie",
36: "Game",
132: "BritPop",
37: "Sound Clip",
133: "Negerpunk",
38: "Gospel",
134: "Polsk Punk",
39: "Noise",
135: "Beat",
40: "AlternRock",
136: "Christian Gangsta Rap",
41: "Bass",
137: "Heavy Metal",
42: "Soul",
138: "Black Metal",
43: "Punk",
139: "Crossover",
44: "Space",
140: "Contemporary Christian",
45: "Meditative",
141: "Christian Rock",
46: "Instrumental Pop",
142: "Merengue",
47: "Instrumental Rock",
143: "Salsa",
48: "Ethnic",
144: "Thrash Metal",
49: "Gothic",
145: "Anime",
50: "Darkwave",
146: "JPop",
51: "Techno Industrial",
147: "Synthpop",
52: "Electronic",
148: "Abstract",
53: "Pop Folk",
149: "Art Rock",
54: "Eurodance",
150: "Baroque",
55: "Dream",
151: "Bhangra",
56: "Southern Rock",
152: "Big Beat",
57: "Comedy",
153: "Breakbeat",
58: "Cult",
154: "Chillout",
59: "Gangsta Rap",
155: "Downtempo",
60: "Top 40",
156: "Dub",
61: "Christian Rap",
157: "EBM",
62: "Pop Funk",
158: "Eclectic",
63: "Jungle",
159: "Electro",
64: "Native American",
160: "Electroclash",
65: "Cabaret",
161: "Emo",
66: "New Wave",
162: "Experimental",
67: "Psychedelic",
163: "Garage",
68: "Rave",
164: "Global",
69: "Showtunes",
165: "IDM",
70: "Trailer",
166: "Illbient",
71: "Lo Fi",
167: "Industro Goth",
72: "Tribal",
168: "Jam Band",
73: "Acid Punk",
169: "Krautrock",
74: "Acid Jazz",
170: "Leftfield",
75: "Polka",
171: "Lounge",
76: "Retro",
172: "Math Rock",
77: "Musical",
173: "New Romantic",
78: "Rock and Roll",
174: "Nu-Breakz",
79: "Hard Rock",
175: "Post Punk",
80: "Folk",
176: "Post Rock",
81: "Folk Rock",
177: "Psytrance",
82: "National Folk",
178: "Shoegaze",
83: "Swing",
179: "Space Rock",
84: "Fast Fusion",
180: "Trop Rock",
85: "Bebob",
181: "World Music",
86: "Latin",
182: "Neoclassical",
87: "Revival",
183: "Audiobook",
88: "Celtic",
184: "Audio Theatre",
89: "Bluegrass",
185: "Neue Deutsche Welle",
90: "Avantgarde",
186: "Podcast",
91: "Gothic Rock",
187: "Indie Rock",
92: "Progressive Rock",
188: "G Funk",
93: "Psychedelic Rock",
189: "Dubstep",
94: "Symphonic Rock",
190: "Garage Rock",
95: "Slow Rock",
191: "Psybient"
}
2021-12-29 21:55:09 +00:00
def _get_blur_hash() -> str:
2021-10-11 17:20:16 +00:00
"""You may laugh, but this is a lot less computationally intensive,
especially on large images, while still providing some visual variety
in the timeline
"""
hashes = [
"UfGuaW01%gRi%MM{azofozo0V@xuozn#ofs.",
"UFD]o8-;9FIU~qD%j[%M-;j[ofWB?bt7IURj",
"UyO|v_1#im=s%y#U%OxDwRt3W9R-ogjHj[WX",
"U96vAQt6H;WBt7ofWBa#MbWBo#j[byaze-oe",
"UJKA.q01M|IV%LM|RjNGIVj[f6oLjrofaeof",
"U9MPjn]?~Cxut~.PS1%1xXIo0fEer_$*^jxG",
"UtLENXWCRjju~qayaeaz00j[ofayIVkCkCfQ",
"UHGbeg-pbzWZ.ANI$wsQ$H-;E9W?0Nx]?FjE",
"UcHU%#4n_ND%?bxatRWBIU%MazxtNaRjs:of",
"ULR:TsWr~6xZofWWf6s-~6oK9eR,oes-WXNJ",
"U77VQB-:MaMx%L%MogRkMwkCxuoIS*WYjEsl",
"U%Nm{8R+%MxuE1t6WBNG-=RjoIt6~Vj]RkR*",
"UCM7u;?boft7oft7ayj[~qt7WBoft7oft7Rj"
]
return random.choice(hashes)
2021-12-29 21:55:09 +00:00
def _replace_silo_domain(post_json_object: {},
2022-01-03 10:27:55 +00:00
silo_domain: str, replacement_domain: str,
2021-12-29 21:55:09 +00:00
system_language: str) -> None:
2021-09-18 17:20:01 +00:00
"""Replace a silo domain with a replacement domain
"""
2022-01-03 10:27:55 +00:00
if not replacement_domain:
return
2021-12-26 10:57:03 +00:00
if not has_object_dict(post_json_object):
return
2021-12-25 22:09:19 +00:00
if not post_json_object['object'].get('content'):
return
2022-01-03 10:27:55 +00:00
content_str = get_base_content_from_post(post_json_object, system_language)
if '/' + silo_domain not in content_str:
if '.' + silo_domain not in content_str:
return
content_str = content_str.replace('/' + silo_domain,
'/' + replacement_domain)
content_str = content_str.replace('.' + silo_domain,
'.' + replacement_domain)
2022-01-03 10:27:55 +00:00
post_json_object['object']['content'] = content_str
2021-12-25 22:09:19 +00:00
if post_json_object['object'].get('contentMap'):
2022-01-03 10:27:55 +00:00
post_json_object['object']['contentMap'][system_language] = content_str
2020-04-03 16:55:55 +00:00
2022-01-03 10:27:55 +00:00
def replace_you_tube(post_json_object: {}, replacement_domain: str,
2021-12-28 21:36:27 +00:00
system_language: str) -> None:
2021-09-18 17:20:01 +00:00
"""Replace YouTube with a replacement domain
This denies Google some, but not all, tracking data
"""
_replace_silo_domain(post_json_object, 'youtube.com',
2022-01-03 10:27:55 +00:00
replacement_domain, system_language)
2021-09-18 17:20:01 +00:00
2022-01-03 10:27:55 +00:00
def replace_twitter(post_json_object: {}, replacement_domain: str,
2021-12-28 21:36:27 +00:00
system_language: str) -> None:
2021-09-18 17:08:14 +00:00
"""Replace Twitter with a replacement domain
This allows you to view twitter posts without having a twitter account
"""
twitter_domains = ('x.com', 'twitter.com')
2022-02-28 13:36:32 +00:00
for tw_domain in twitter_domains:
_replace_silo_domain(post_json_object, tw_domain,
replacement_domain, system_language)
2021-09-18 17:08:14 +00:00
2022-01-03 10:27:55 +00:00
def _remove_meta_data(image_filename: str, output_filename: str) -> None:
2020-01-08 14:53:28 +00:00
"""Attempts to do this with pure python didn't work well,
so better to use a dedicated tool if one is installed
2020-01-08 14:31:25 +00:00
"""
2022-01-03 10:27:55 +00:00
copyfile(image_filename, output_filename)
if not os.path.isfile(output_filename):
2021-12-26 14:42:21 +00:00
print('ERROR: unable to remove metadata from ' + image_filename)
2020-07-08 14:32:11 +00:00
return
2020-01-08 14:53:28 +00:00
if os.path.isfile('/usr/bin/exiftool'):
2022-01-03 10:27:55 +00:00
print('Removing metadata from ' + output_filename + ' using exiftool')
cmd = 'exiftool -all= ' + safe_system_string(output_filename)
os.system(cmd) # nosec
2020-01-08 14:53:28 +00:00
elif os.path.isfile('/usr/bin/mogrify'):
2022-01-03 10:27:55 +00:00
print('Removing metadata from ' + output_filename + ' using mogrify')
cmd = \
'/usr/bin/mogrify -strip ' + safe_system_string(output_filename)
os.system(cmd) # nosec
2019-07-24 13:14:23 +00:00
2020-04-03 16:55:55 +00:00
2021-12-29 21:55:09 +00:00
def _spoof_meta_data(base_dir: str, nickname: str, domain: str,
2022-01-03 10:27:55 +00:00
output_filename: str, spoof_city: str,
2021-12-29 21:55:09 +00:00
content_license_url: str) -> None:
2021-05-10 11:25:03 +00:00
"""Spoof image metadata using a decoy model for a given city
2021-05-09 12:17:55 +00:00
"""
2022-01-03 10:27:55 +00:00
if not os.path.isfile(output_filename):
print('ERROR: unable to spoof metadata within ' + output_filename)
2021-05-09 12:17:55 +00:00
return
2021-05-10 10:46:45 +00:00
# get the random seed used to generate a unique pattern for this account
2022-01-03 10:27:55 +00:00
decoy_seed_filename = acct_dir(base_dir, nickname, domain) + '/decoyseed'
decoy_seed = 63725
if os.path.isfile(decoy_seed_filename):
try:
with open(decoy_seed_filename, 'r', encoding='utf-8') as fp_seed:
decoy_seed = int(fp_seed.read())
except OSError:
print('EX: _spoof_meta_data unable to read ' + decoy_seed_filename)
2021-05-10 10:46:45 +00:00
else:
2022-01-03 10:27:55 +00:00
decoy_seed = randint(10000, 10000000000000000)
try:
2022-06-10 14:32:48 +00:00
with open(decoy_seed_filename, 'w+',
encoding='utf-8') as fp_seed:
2022-01-03 10:27:55 +00:00
fp_seed.write(str(decoy_seed))
2021-11-25 21:18:53 +00:00
except OSError:
print('EX: _spoof_meta_data unable to write ' +
decoy_seed_filename)
2021-05-10 10:46:45 +00:00
2021-05-09 12:17:55 +00:00
if os.path.isfile('/usr/bin/exiftool'):
2022-01-03 10:27:55 +00:00
print('Spoofing metadata in ' + output_filename + ' using exiftool')
curr_time_adjusted = \
2023-11-20 22:27:58 +00:00
date_utcnow() - \
2021-05-09 14:05:52 +00:00
datetime.timedelta(minutes=randint(2, 120))
2022-01-03 10:27:55 +00:00
published = curr_time_adjusted.strftime("%Y:%m:%d %H:%M:%S+00:00")
(latitude, longitude, latitude_ref, longitude_ref,
cam_make, cam_model, cam_serial_number) = \
spoof_geolocation(base_dir, spoof_city, curr_time_adjusted,
decoy_seed, None, None)
safe_handle = safe_system_string(nickname + '@' + domain)
safe_license_url = safe_system_string(content_license_url)
if os.system('exiftool -artist=@"' + safe_handle + '" ' +
2022-01-03 10:27:55 +00:00
'-Make="' + cam_make + '" ' +
'-Model="' + cam_model + '" ' +
'-Comment="' + str(cam_serial_number) + '" ' +
2021-08-13 19:56:42 +00:00
'-DateTimeOriginal="' + published + '" ' +
'-FileModifyDate="' + published + '" ' +
'-CreateDate="' + published + '" ' +
2022-01-03 10:27:55 +00:00
'-GPSLongitudeRef=' + longitude_ref + ' ' +
2021-08-13 19:56:42 +00:00
'-GPSAltitude=0 ' +
'-GPSLongitude=' + str(longitude) + ' ' +
2022-01-03 10:27:55 +00:00
'-GPSLatitudeRef=' + latitude_ref + ' ' +
2021-08-13 19:56:42 +00:00
'-GPSLatitude=' + str(latitude) + ' ' +
'-copyright="' + safe_license_url + '" ' +
2021-08-13 19:56:42 +00:00
'-Comment="" ' +
2022-01-03 10:27:55 +00:00
output_filename) != 0: # nosec
2021-08-13 19:56:42 +00:00
print('ERROR: exiftool failed to run')
2021-05-09 12:17:55 +00:00
else:
print('ERROR: exiftool is not installed')
return
def get_music_metadata(filename: str) -> {}:
"""Returns metadata for a music file
"""
result = None
safe_filename = safe_system_string(filename)
try:
result = subprocess.run(['exiftool', '-v3', safe_filename],
stdout=subprocess.PIPE)
except BaseException as ex:
print('EX: get_music_metadata failed ' + str(ex))
if not result:
return {}
if not result.stdout:
return {}
try:
id3_lines = result.stdout.decode('utf-8').split('\n')
except BaseException:
print('EX: get_music_metadata unable to decode output')
return {}
fieldnames = (
'Title', 'Artist', 'Genre', 'Track', 'Album', 'Length', 'Band'
)
music_metadata = {}
for line in id3_lines:
for field in fieldnames:
2022-05-20 16:30:10 +00:00
if field + ' = ' not in line:
continue
field_value = line.split(field + ' = ')[1]
if '>' in field_value:
field_value = field_value.split('>')[0].strip()
if ':' in field_value and ' ' in field_value:
words = field_value.split(' ')
new_value = ''
for wrd in words:
if ':' not in wrd:
new_value += wrd + ' '
field_value = new_value.strip()
2022-05-20 17:01:12 +00:00
if field == 'Genre' and field_value.isdigit():
if music_genre.get(int(field_value)):
field_value = music_genre[int(field_value)]
2022-05-20 16:30:10 +00:00
music_metadata[field.lower()] = field_value
return music_metadata
2021-12-28 21:36:27 +00:00
def convert_image_to_low_bandwidth(image_filename: str) -> None:
"""Converts an image to a low bandwidth version
"""
2022-01-03 10:27:55 +00:00
low_bandwidth_filename = image_filename + '.low'
if os.path.isfile(low_bandwidth_filename):
2021-08-13 19:09:38 +00:00
try:
2022-01-03 10:27:55 +00:00
os.remove(low_bandwidth_filename)
2021-11-25 18:42:38 +00:00
except OSError:
2021-12-28 21:36:27 +00:00
print('EX: convert_image_to_low_bandwidth unable to delete ' +
2022-01-03 10:27:55 +00:00
low_bandwidth_filename)
2021-08-13 19:09:38 +00:00
cmd = \
2021-08-13 18:06:01 +00:00
'/usr/bin/convert +noise Multiplicative ' + \
'-evaluate median 10% -dither Floyd-Steinberg ' + \
'-monochrome ' + safe_system_string(image_filename) + \
' ' + safe_system_string(low_bandwidth_filename)
2021-08-13 17:43:19 +00:00
print('Low bandwidth image conversion: ' + cmd)
subprocess.call(cmd, shell=True)
2021-08-13 19:09:38 +00:00
# wait for conversion to happen
ctr = 0
2022-01-03 10:27:55 +00:00
while not os.path.isfile(low_bandwidth_filename):
2021-08-13 19:09:38 +00:00
print('Waiting for low bandwidth image conversion ' + str(ctr))
2021-08-13 20:18:36 +00:00
time.sleep(0.2)
2021-08-13 19:09:38 +00:00
ctr += 1
2021-08-13 20:18:36 +00:00
if ctr > 100:
2021-08-13 19:09:38 +00:00
print('WARN: timed out waiting for low bandwidth image conversion')
break
2022-01-03 10:27:55 +00:00
if os.path.isfile(low_bandwidth_filename):
2021-08-13 17:56:11 +00:00
try:
2021-12-26 14:42:21 +00:00
os.remove(image_filename)
2021-11-25 18:42:38 +00:00
except OSError:
2021-12-28 21:36:27 +00:00
print('EX: convert_image_to_low_bandwidth unable to delete ' +
2021-12-26 14:42:21 +00:00
image_filename)
2022-01-03 10:27:55 +00:00
os.rename(low_bandwidth_filename, image_filename)
2021-12-26 14:42:21 +00:00
if os.path.isfile(image_filename):
print('Image converted to low bandwidth ' + image_filename)
2021-08-13 17:56:11 +00:00
else:
print('Low bandwidth converted image not found: ' +
2022-01-03 10:27:55 +00:00
low_bandwidth_filename)
2021-12-28 21:36:27 +00:00
def process_meta_data(base_dir: str, nickname: str, domain: str,
2022-01-03 10:27:55 +00:00
image_filename: str, output_filename: str,
2021-12-28 21:36:27 +00:00
city: str, content_license_url: str) -> None:
2021-05-09 12:17:55 +00:00
"""Handles image metadata. This tries to spoof the metadata
if possible, but otherwise just removes it
"""
2021-05-09 19:11:05 +00:00
# first remove the metadata
2022-01-03 10:27:55 +00:00
_remove_meta_data(image_filename, output_filename)
2021-05-09 12:17:55 +00:00
2021-05-09 19:11:05 +00:00
# now add some spoofed data to misdirect surveillance capitalists
2022-01-03 10:27:55 +00:00
_spoof_meta_data(base_dir, nickname, domain, output_filename, city,
2021-12-29 21:55:09 +00:00
content_license_url)
2021-05-09 19:11:05 +00:00
2021-05-09 12:17:55 +00:00
2021-12-29 21:55:09 +00:00
def _is_media(image_filename: str) -> bool:
2021-03-06 23:03:14 +00:00
"""Is the given file a media file?
"""
2021-12-26 14:42:21 +00:00
if not os.path.isfile(image_filename):
print('WARN: Media file does not exist ' + image_filename)
2021-03-06 23:03:14 +00:00
return False
2022-01-03 10:27:55 +00:00
permitted_media = get_media_extensions()
for permit in permitted_media:
if image_filename.endswith('.' + permit):
2019-08-30 18:01:29 +00:00
return True
2021-12-26 14:42:21 +00:00
print('WARN: ' + image_filename + ' is not a permitted media type')
2019-07-12 19:08:46 +00:00
return False
2020-04-03 16:55:55 +00:00
2022-01-03 10:27:55 +00:00
def create_media_dirs(base_dir: str, media_path: str) -> None:
"""Creates stored media directories
"""
2021-12-25 16:17:53 +00:00
if not os.path.isdir(base_dir + '/media'):
os.mkdir(base_dir + '/media')
2022-01-03 10:27:55 +00:00
if not os.path.isdir(base_dir + '/' + media_path):
os.mkdir(base_dir + '/' + media_path)
2020-04-03 16:55:55 +00:00
2019-07-12 19:08:46 +00:00
2021-12-29 21:55:09 +00:00
def get_media_path() -> str:
2022-01-03 10:27:55 +00:00
"""Returns the path for stored media
"""
2023-11-20 22:27:58 +00:00
curr_time = date_utcnow()
2022-01-03 10:27:55 +00:00
weeks_since_epoch = \
2023-11-20 22:27:58 +00:00
int((curr_time - date_epoch()).days / 7)
2022-01-03 10:27:55 +00:00
return 'media/' + str(weeks_since_epoch)
2020-04-03 16:55:55 +00:00
2019-08-30 15:50:20 +00:00
2021-12-29 21:55:09 +00:00
def get_attachment_media_type(filename: str) -> str:
2019-08-30 15:50:20 +00:00
"""Returns the type of media for the given file
image, video or audio
"""
2022-01-03 10:27:55 +00:00
media_type = None
image_types = get_image_extensions()
for mtype in image_types:
if filename.endswith('.' + mtype):
2019-08-30 15:50:20 +00:00
return 'image'
2022-01-03 10:27:55 +00:00
video_types = get_video_extensions()
for mtype in video_types:
if filename.endswith('.' + mtype):
2019-08-30 15:50:20 +00:00
return 'video'
2022-01-03 10:27:55 +00:00
audio_types = get_audio_extensions()
for mtype in audio_types:
if filename.endswith('.' + mtype):
2019-08-30 15:50:20 +00:00
return 'audio'
2022-01-03 10:27:55 +00:00
return media_type
2019-08-30 15:50:20 +00:00
2020-04-03 16:55:55 +00:00
2022-01-03 10:27:55 +00:00
def _update_etag(media_filename: str) -> None:
2019-12-04 18:52:27 +00:00
""" calculate the etag, which is a sha1 of the data
"""
# only create etags for media
2022-01-03 10:27:55 +00:00
if '/media/' not in media_filename:
2019-12-04 18:52:27 +00:00
return
# check that the media exists
2022-01-03 10:27:55 +00:00
if not os.path.isfile(media_filename):
2019-12-04 18:52:27 +00:00
return
# read the binary data
2020-04-03 16:55:55 +00:00
data = None
2019-12-04 18:52:27 +00:00
try:
2024-07-14 13:01:46 +00:00
with open(media_filename, 'rb') as fp_media:
data = fp_media.read()
2021-11-25 22:22:54 +00:00
except OSError:
2022-01-03 10:27:55 +00:00
print('EX: _update_etag unable to read ' + str(media_filename))
2019-12-04 18:52:27 +00:00
if not data:
return
# calculate hash
2020-07-08 15:17:00 +00:00
etag = sha1(data).hexdigest() # nosec
2019-12-04 18:52:27 +00:00
# save the hash
try:
2024-07-16 12:20:58 +00:00
with open(media_filename + '.etag', 'w+',
encoding='utf-8') as fp_media:
fp_media.write(etag)
2021-11-25 21:18:53 +00:00
except OSError:
2021-12-29 21:55:09 +00:00
print('EX: _update_etag unable to write ' +
2022-01-03 10:27:55 +00:00
str(media_filename) + '.etag')
2019-12-04 18:52:27 +00:00
2020-04-03 16:55:55 +00:00
2023-02-18 22:32:50 +00:00
def _store_video_transcript(video_transcript: str,
media_filename: str) -> bool:
"""Stores a video transcript
"""
video_transcript = video_transcript.strip()
if not video_transcript.startswith('WEBVTT') or \
'-->' not in video_transcript or \
':' not in video_transcript or \
'- ' not in video_transcript:
print('WARN: does not look like a video transcript ' +
video_transcript)
return False
2023-02-18 22:32:50 +00:00
try:
with open(media_filename + '.vtt', 'w+', encoding='utf-8') as fp_vtt:
fp_vtt.write(video_transcript)
return True
except OSError:
print('EX: unable to save video transcript ' + media_filename + '.vtt')
return False
2021-12-28 21:36:27 +00:00
def attach_media(base_dir: str, http_prefix: str,
nickname: str, domain: str, port: int,
2021-12-29 21:55:09 +00:00
post_json: {}, image_filename: str,
2022-01-03 10:27:55 +00:00
media_type: str, description: str,
2023-02-18 22:10:15 +00:00
video_transcript: str,
2021-12-28 21:36:27 +00:00
city: str, low_bandwidth: bool,
2023-01-23 11:33:07 +00:00
content_license_url: str,
2023-02-18 22:10:15 +00:00
creator: str,
system_language: str) -> {}:
2019-08-30 18:32:34 +00:00
"""Attaches media to a json object post
2019-07-12 19:08:46 +00:00
The description can be None
"""
2021-12-29 21:55:09 +00:00
if not _is_media(image_filename):
return post_json
2020-03-22 21:16:02 +00:00
2022-01-03 10:27:55 +00:00
file_extension = None
accepted_types = get_media_extensions()
for mtype in accepted_types:
if image_filename.endswith('.' + mtype):
if mtype == 'jpg':
mtype = 'jpeg'
if mtype == 'mp3':
mtype = 'mpeg'
file_extension = mtype
if not file_extension:
2021-12-29 21:55:09 +00:00
return post_json
2022-01-03 10:27:55 +00:00
media_type = media_type + '/' + file_extension
print('Attached media type: ' + media_type)
2019-08-30 15:50:20 +00:00
2022-01-03 10:27:55 +00:00
if file_extension == 'jpeg':
file_extension = 'jpg'
if media_type == 'audio/mpeg':
file_extension = 'mp3'
2024-02-05 12:40:13 +00:00
if media_type in ('audio/speex', 'audio/x-speex'):
2022-10-20 19:37:59 +00:00
file_extension = 'spx'
2019-07-12 19:08:46 +00:00
2021-12-26 12:45:03 +00:00
domain = get_full_domain(domain, port)
2019-07-12 19:08:46 +00:00
2022-01-03 10:27:55 +00:00
mpath = get_media_path()
media_path = mpath + '/' + create_password(32) + '.' + file_extension
2021-12-25 16:17:53 +00:00
if base_dir:
2022-01-03 10:27:55 +00:00
create_media_dirs(base_dir, mpath)
media_filename = base_dir + '/' + media_path
2019-07-12 19:08:46 +00:00
2022-01-03 10:27:55 +00:00
media_path = \
media_path.replace('media/', 'system/media_attachments/files/', 1)
attachment_json = {
'mediaType': media_type,
2019-07-12 19:08:46 +00:00
'name': description,
2019-12-04 17:02:38 +00:00
'type': 'Document',
2022-01-03 10:27:55 +00:00
'url': http_prefix + '://' + domain + '/' + media_path
2019-07-12 19:08:46 +00:00
}
2023-01-23 11:33:07 +00:00
if content_license_url or creator:
2023-01-22 22:43:55 +00:00
attachment_json['@context'] = [
'https://www.w3.org/ns/activitystreams',
{'schema': 'https://schema.org#'}
]
2023-01-23 11:33:07 +00:00
if content_license_url:
2023-01-22 22:43:55 +00:00
attachment_json['schema:license'] = content_license_url
attachment_json['license'] = content_license_url
2023-01-23 11:33:07 +00:00
if creator:
attachment_json['schema:creator'] = creator
2023-05-17 20:52:59 +00:00
attachment_json['attribution'] = [creator]
2022-01-03 10:27:55 +00:00
if media_type.startswith('image/'):
attachment_json['blurhash'] = _get_blur_hash()
# find the dimensions of the image and add them as metadata
2022-01-03 10:27:55 +00:00
attach_image_width, attach_image_height = \
2021-12-29 21:55:09 +00:00
get_image_dimensions(image_filename)
2022-01-03 10:27:55 +00:00
if attach_image_width and attach_image_height:
attachment_json['width'] = attach_image_width
attachment_json['height'] = attach_image_height
# create video transcript
2022-01-03 10:27:55 +00:00
post_json['attachment'] = [attachment_json]
2023-02-18 22:10:15 +00:00
if video_transcript and 'video' in media_type:
2023-02-18 22:32:50 +00:00
if _store_video_transcript(video_transcript, media_filename):
video_transcript_json = {
'mediaType': 'text/vtt',
'name': system_language,
'type': 'Document',
2023-02-18 22:53:33 +00:00
'url': http_prefix + '://' + domain + '/' + media_path + '.vtt'
2023-02-18 22:32:50 +00:00
}
post_json['attachment'].append(video_transcript_json)
2019-07-12 19:08:46 +00:00
2021-12-25 16:17:53 +00:00
if base_dir:
2022-01-03 10:27:55 +00:00
if media_type.startswith('image/'):
2021-12-25 18:20:56 +00:00
if low_bandwidth:
2021-12-28 21:36:27 +00:00
convert_image_to_low_bandwidth(image_filename)
process_meta_data(base_dir, nickname, domain,
2022-01-03 10:27:55 +00:00
image_filename, media_filename, city,
2021-12-28 21:36:27 +00:00
content_license_url)
2019-08-30 19:01:16 +00:00
else:
2022-01-03 10:27:55 +00:00
copyfile(image_filename, media_filename)
_update_etag(media_filename)
2019-12-04 18:52:27 +00:00
2021-12-29 21:55:09 +00:00
return post_json
2019-07-12 19:08:46 +00:00
2020-04-03 16:55:55 +00:00
2021-12-29 21:55:09 +00:00
def archive_media(base_dir: str, archive_directory: str,
2022-01-03 10:27:55 +00:00
max_weeks: int) -> None:
2019-07-12 19:51:10 +00:00
"""Any media older than the given number of weeks gets archived
"""
2022-01-03 10:27:55 +00:00
if max_weeks == 0:
2020-12-08 14:09:54 +00:00
return
2023-11-20 22:27:58 +00:00
curr_time = date_utcnow()
weeks_since_epoch = int((curr_time - date_epoch()).days/7)
2022-01-03 10:27:55 +00:00
min_week = weeks_since_epoch - max_weeks
2019-07-12 19:51:10 +00:00
2021-12-25 23:41:17 +00:00
if archive_directory:
if not os.path.isdir(archive_directory):
os.mkdir(archive_directory)
if not os.path.isdir(archive_directory + '/media'):
os.mkdir(archive_directory + '/media')
2020-03-22 21:16:02 +00:00
2022-01-03 10:27:55 +00:00
for _, dirs, _ in os.walk(base_dir + '/media'):
for week_dir in dirs:
if int(week_dir) < min_week:
2021-12-25 23:41:17 +00:00
if archive_directory:
2022-01-03 10:27:55 +00:00
move(os.path.join(base_dir + '/media', week_dir),
2021-12-25 23:41:17 +00:00
archive_directory + '/media')
2019-07-12 19:55:23 +00:00
else:
# archive to /dev/null
2022-01-03 10:27:55 +00:00
rmtree(os.path.join(base_dir + '/media', week_dir),
2024-07-04 11:08:20 +00:00
ignore_errors=False, onexc=None)
2020-12-13 22:13:45 +00:00
break
2021-06-07 17:55:25 +00:00
2021-12-28 21:36:27 +00:00
def path_is_video(path: str) -> bool:
2022-01-03 10:27:55 +00:00
"""Is the given path a video file?
"""
2024-02-05 12:49:36 +00:00
extensions = get_video_extensions()
for ext in extensions:
if path.endswith('.' + ext):
return True
2021-06-07 17:55:25 +00:00
return False
2023-02-18 22:53:33 +00:00
def path_is_transcript(path: str) -> bool:
"""Is the given path a video transcript WebVTT file?
"""
if path.endswith('.vtt'):
return True
return False
2021-12-28 21:36:27 +00:00
def path_is_audio(path: str) -> bool:
2022-01-03 10:27:55 +00:00
"""Is the given path an audio file?
"""
2024-02-05 12:49:36 +00:00
extensions = get_audio_extensions()
for ext in extensions:
if path.endswith('.' + ext):
return True
2021-06-07 17:55:25 +00:00
return False
2021-12-29 21:55:09 +00:00
def get_image_dimensions(image_filename: str) -> (int, int):
"""Returns the dimensions of an image file
"""
safe_image_filename = safe_system_string(image_filename)
try:
result = subprocess.run(['identify', '-format', '"%wx%h"',
safe_image_filename],
stdout=subprocess.PIPE)
except BaseException:
2021-12-29 21:55:09 +00:00
print('EX: get_image_dimensions unable to run identify command')
return None, None
if not result:
return None, None
2022-01-03 10:27:55 +00:00
dimensions_str = result.stdout.decode('utf-8').replace('"', '')
if 'x' not in dimensions_str:
return None, None
2022-01-03 10:27:55 +00:00
width_str = dimensions_str.split('x')[0]
if not width_str.isdigit():
return None, None
2022-01-03 10:27:55 +00:00
height_str = dimensions_str.split('x')[1]
if not height_str.isdigit():
return None, None
2022-01-03 10:27:55 +00:00
return int(width_str), int(height_str)
def apply_watermark_to_image(base_dir: str, nickname: str, domain: str,
post_image_filename: str,
2024-07-25 13:53:15 +00:00
watermark_width_percent: int,
watermark_position: str,
watermark_opacity: int) -> bool:
"""Applies a watermark to the given image
"""
if not os.path.isfile(post_image_filename):
return False
if not os.path.isfile('/usr/bin/composite'):
return False
2024-07-29 16:23:59 +00:00
watermark_enabled_filename = \
acct_dir(base_dir, nickname, domain) + '/.watermarkEnabled'
if not os.path.isfile(watermark_enabled_filename):
return False
_, watermark_filename = get_watermark_file(base_dir, nickname, domain)
2024-07-29 16:23:59 +00:00
if not watermark_filename:
# does a default watermark filename exist?
default_watermark_file = base_dir + '/manual/manual-watermark-ai.png'
if os.path.isfile(default_watermark_file):
watermark_filename = default_watermark_file
if not watermark_filename:
return False
if not os.path.isfile(watermark_filename):
return False
# scale the watermark so that it is a fixed percentage of the image width
post_image_width, _ = \
get_image_dimensions(post_image_filename)
if not post_image_width:
return False
watermark_image_width, watermark_image_height = \
get_image_dimensions(post_image_filename)
if not watermark_image_width or not watermark_image_height:
return False
2024-07-25 18:35:45 +00:00
watermark_width_percent += randint(-5, 5)
if watermark_width_percent < 0:
watermark_width_percent = 0
if watermark_width_percent > 100:
watermark_width_percent = 100
scaled_watermark_image_width = \
int(post_image_width * watermark_width_percent / 100)
scaled_watermark_image_height = \
int(watermark_image_height *
scaled_watermark_image_width / watermark_image_width)
2024-07-25 16:24:57 +00:00
watermark_position = watermark_position.lower()
if watermark_position not in ('north', 'south',
'east', 'west',
'northeast', 'northwest',
'southeast', 'southwest',
'random'):
2024-07-25 13:53:15 +00:00
watermark_position = 'east'
2024-07-25 16:24:57 +00:00
# choose a random position for the watermark
if watermark_position == 'random':
watermark_position = \
random.choice(['north', 'south',
'east', 'west',
'northeast', 'northwest',
'southeast', 'southwest'])
2024-07-25 18:35:45 +00:00
watermark_opacity += randint(-5, 5)
if watermark_opacity < 0:
watermark_opacity = 0
if watermark_opacity > 100:
watermark_opacity = 100
cmd = \
'/usr/bin/composite ' + \
'-geometry ' + str(scaled_watermark_image_width) + 'x' + \
str(scaled_watermark_image_height) + '+30+5 ' + \
'-watermark ' + str(watermark_opacity) + '% ' + \
'-gravity ' + watermark_position + ' ' + \
safe_system_string(watermark_filename) + ' ' + \
safe_system_string(post_image_filename) + ' ' + \
safe_system_string(post_image_filename + '.watermarked')
subprocess.call(cmd, shell=True)
if not os.path.isfile(post_image_filename + '.watermarked'):
return False
try:
os.remove(post_image_filename)
except OSError:
print('EX: _apply_watermark_to_image unable to remove ' +
post_image_filename)
return False
try:
os.rename(post_image_filename + '.watermarked', post_image_filename)
except OSError:
print('EX: _apply_watermark_to_image unable to rename ' +
post_image_filename + '.watermarked')
return False
return True