2020-04-03 16:55:55 +00:00
|
|
|
__filename__ = "media.py"
|
|
|
|
__author__ = "Bob Mottram"
|
|
|
|
__license__ = "AGPL3+"
|
2022-02-03 13:58:20 +00:00
|
|
|
__version__ = "1.3.0"
|
2020-04-03 16:55:55 +00:00
|
|
|
__maintainer__ = "Bob Mottram"
|
2021-09-10 16:14:50 +00:00
|
|
|
__email__ = "bob@libreserver.org"
|
2020-04-03 16:55:55 +00:00
|
|
|
__status__ = "Production"
|
2021-06-15 15:08:12 +00:00
|
|
|
__module_group__ = "Timeline"
|
2019-07-12 19:08:46 +00:00
|
|
|
|
|
|
|
import os
|
2021-08-13 19:09:38 +00:00
|
|
|
import time
|
2019-07-12 19:08:46 +00:00
|
|
|
import datetime
|
2021-07-01 11:02:11 +00:00
|
|
|
import subprocess
|
2021-10-11 17:20:16 +00:00
|
|
|
import random
|
2021-05-09 12:17:55 +00:00
|
|
|
from random import randint
|
2019-12-04 18:52:27 +00:00
|
|
|
from hashlib import sha1
|
2021-12-28 21:36:27 +00:00
|
|
|
from auth import create_password
|
2022-07-22 10:54:57 +00:00
|
|
|
from utils import safe_system_string
|
2021-12-26 11:29:40 +00:00
|
|
|
from utils import get_base_content_from_post
|
2021-12-26 12:45:03 +00:00
|
|
|
from utils import get_full_domain
|
2021-12-26 14:26:16 +00:00
|
|
|
from utils import get_image_extensions
|
2021-12-26 14:20:09 +00:00
|
|
|
from utils import get_video_extensions
|
2021-12-26 14:24:03 +00:00
|
|
|
from utils import get_audio_extensions
|
2021-12-26 14:39:49 +00:00
|
|
|
from utils import get_media_extensions
|
2021-12-26 10:57:03 +00:00
|
|
|
from utils import has_object_dict
|
2021-12-26 12:02:29 +00:00
|
|
|
from utils import acct_dir
|
2019-07-12 19:08:46 +00:00
|
|
|
from shutil import copyfile
|
2019-07-12 19:51:10 +00:00
|
|
|
from shutil import rmtree
|
2019-07-12 19:55:23 +00:00
|
|
|
from shutil import move
|
2021-12-29 21:55:09 +00:00
|
|
|
from city import spoof_geolocation
|
2019-07-12 19:08:46 +00:00
|
|
|
|
2020-04-03 16:55:55 +00:00
|
|
|
|
2022-05-20 17:01:12 +00:00
|
|
|
# music file ID3 v1 genres
|
|
|
|
music_genre = {
|
|
|
|
0: "Blues",
|
|
|
|
96: "Big Band",
|
|
|
|
1: "Classic Rock",
|
|
|
|
97: "Chorus",
|
|
|
|
2: "Country",
|
|
|
|
98: "Easy Listening",
|
|
|
|
3: "Dance",
|
|
|
|
99: "Acoustic",
|
|
|
|
4: "Disco",
|
|
|
|
100: "Humour",
|
|
|
|
5: "Funk",
|
|
|
|
101: "Speech",
|
|
|
|
6: "Grunge",
|
|
|
|
102: "Chanson",
|
|
|
|
7: "Hip Hop",
|
|
|
|
103: "Opera",
|
|
|
|
8: "Jazz",
|
|
|
|
104: "Chamber Music",
|
|
|
|
9: "Metal",
|
|
|
|
105: "Sonata",
|
|
|
|
10: "New Age",
|
|
|
|
106: "Symphony",
|
|
|
|
11: "Oldies",
|
|
|
|
107: "Booty Bass",
|
|
|
|
12: "Other",
|
|
|
|
108: "Primus",
|
|
|
|
13: "Pop",
|
|
|
|
109: "Porn Groove",
|
|
|
|
14: "RnB",
|
|
|
|
110: "Satire",
|
|
|
|
15: "Rap",
|
|
|
|
111: "Slow Jam",
|
|
|
|
16: "Reggae",
|
|
|
|
112: "Club",
|
|
|
|
17: "Rock",
|
|
|
|
113: "Tango",
|
|
|
|
18: "Techno",
|
|
|
|
114: "Samba",
|
|
|
|
19: "Industrial",
|
|
|
|
115: "Folklore",
|
|
|
|
20: "Alternative",
|
|
|
|
116: "Ballad",
|
|
|
|
21: "Ska",
|
|
|
|
117: "Power Ballad",
|
|
|
|
22: "Death Metal",
|
|
|
|
118: "Rhythmic Soul",
|
|
|
|
23: "Pranks",
|
|
|
|
119: "Freestyle",
|
|
|
|
24: "Soundtrack",
|
|
|
|
120: "Duet",
|
|
|
|
25: "Euro-Techno",
|
|
|
|
121: "Punk Rock",
|
|
|
|
26: "Ambient",
|
|
|
|
122: "Drum Solo",
|
|
|
|
27: "Trip Hop",
|
|
|
|
123: "A Cappella",
|
|
|
|
28: "Vocal",
|
|
|
|
124: "Euro House",
|
|
|
|
29: "Jazz Funk",
|
|
|
|
125: "Dance Hall",
|
|
|
|
30: "Fusion",
|
|
|
|
126: "Goa",
|
|
|
|
31: "Trance",
|
|
|
|
127: "Drum and Bass",
|
|
|
|
32: "Classical",
|
|
|
|
128: "Club House",
|
|
|
|
33: "Instrumental",
|
|
|
|
129: "Hardcore",
|
|
|
|
34: "Acid",
|
|
|
|
130: "Terror",
|
|
|
|
35: "House",
|
|
|
|
131: "Indie",
|
|
|
|
36: "Game",
|
|
|
|
132: "BritPop",
|
|
|
|
37: "Sound Clip",
|
|
|
|
133: "Negerpunk",
|
|
|
|
38: "Gospel",
|
|
|
|
134: "Polsk Punk",
|
|
|
|
39: "Noise",
|
|
|
|
135: "Beat",
|
|
|
|
40: "AlternRock",
|
|
|
|
136: "Christian Gangsta Rap",
|
|
|
|
41: "Bass",
|
|
|
|
137: "Heavy Metal",
|
|
|
|
42: "Soul",
|
|
|
|
138: "Black Metal",
|
|
|
|
43: "Punk",
|
|
|
|
139: "Crossover",
|
|
|
|
44: "Space",
|
|
|
|
140: "Contemporary Christian",
|
|
|
|
45: "Meditative",
|
|
|
|
141: "Christian Rock",
|
|
|
|
46: "Instrumental Pop",
|
|
|
|
142: "Merengue",
|
|
|
|
47: "Instrumental Rock",
|
|
|
|
143: "Salsa",
|
|
|
|
48: "Ethnic",
|
|
|
|
144: "Thrash Metal",
|
|
|
|
49: "Gothic",
|
|
|
|
145: "Anime",
|
|
|
|
50: "Darkwave",
|
|
|
|
146: "JPop",
|
|
|
|
51: "Techno Industrial",
|
|
|
|
147: "Synthpop",
|
|
|
|
52: "Electronic",
|
|
|
|
148: "Abstract",
|
|
|
|
53: "Pop Folk",
|
|
|
|
149: "Art Rock",
|
|
|
|
54: "Eurodance",
|
|
|
|
150: "Baroque",
|
|
|
|
55: "Dream",
|
|
|
|
151: "Bhangra",
|
|
|
|
56: "Southern Rock",
|
|
|
|
152: "Big Beat",
|
|
|
|
57: "Comedy",
|
|
|
|
153: "Breakbeat",
|
|
|
|
58: "Cult",
|
|
|
|
154: "Chillout",
|
|
|
|
59: "Gangsta Rap",
|
|
|
|
155: "Downtempo",
|
|
|
|
60: "Top 40",
|
|
|
|
156: "Dub",
|
|
|
|
61: "Christian Rap",
|
|
|
|
157: "EBM",
|
|
|
|
62: "Pop Funk",
|
|
|
|
158: "Eclectic",
|
|
|
|
63: "Jungle",
|
|
|
|
159: "Electro",
|
|
|
|
64: "Native American",
|
|
|
|
160: "Electroclash",
|
|
|
|
65: "Cabaret",
|
|
|
|
161: "Emo",
|
|
|
|
66: "New Wave",
|
|
|
|
162: "Experimental",
|
|
|
|
67: "Psychedelic",
|
|
|
|
163: "Garage",
|
|
|
|
68: "Rave",
|
|
|
|
164: "Global",
|
|
|
|
69: "Showtunes",
|
|
|
|
165: "IDM",
|
|
|
|
70: "Trailer",
|
|
|
|
166: "Illbient",
|
|
|
|
71: "Lo Fi",
|
|
|
|
167: "Industro Goth",
|
|
|
|
72: "Tribal",
|
|
|
|
168: "Jam Band",
|
|
|
|
73: "Acid Punk",
|
|
|
|
169: "Krautrock",
|
|
|
|
74: "Acid Jazz",
|
|
|
|
170: "Leftfield",
|
|
|
|
75: "Polka",
|
|
|
|
171: "Lounge",
|
|
|
|
76: "Retro",
|
|
|
|
172: "Math Rock",
|
|
|
|
77: "Musical",
|
|
|
|
173: "New Romantic",
|
|
|
|
78: "Rock and Roll",
|
|
|
|
174: "Nu-Breakz",
|
|
|
|
79: "Hard Rock",
|
|
|
|
175: "Post Punk",
|
|
|
|
80: "Folk",
|
|
|
|
176: "Post Rock",
|
|
|
|
81: "Folk Rock",
|
|
|
|
177: "Psytrance",
|
|
|
|
82: "National Folk",
|
|
|
|
178: "Shoegaze",
|
|
|
|
83: "Swing",
|
|
|
|
179: "Space Rock",
|
|
|
|
84: "Fast Fusion",
|
|
|
|
180: "Trop Rock",
|
|
|
|
85: "Bebob",
|
|
|
|
181: "World Music",
|
|
|
|
86: "Latin",
|
|
|
|
182: "Neoclassical",
|
|
|
|
87: "Revival",
|
|
|
|
183: "Audiobook",
|
|
|
|
88: "Celtic",
|
|
|
|
184: "Audio Theatre",
|
|
|
|
89: "Bluegrass",
|
|
|
|
185: "Neue Deutsche Welle",
|
|
|
|
90: "Avantgarde",
|
|
|
|
186: "Podcast",
|
|
|
|
91: "Gothic Rock",
|
|
|
|
187: "Indie Rock",
|
|
|
|
92: "Progressive Rock",
|
|
|
|
188: "G Funk",
|
|
|
|
93: "Psychedelic Rock",
|
|
|
|
189: "Dubstep",
|
|
|
|
94: "Symphonic Rock",
|
|
|
|
190: "Garage Rock",
|
|
|
|
95: "Slow Rock",
|
|
|
|
191: "Psybient"
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2021-12-29 21:55:09 +00:00
|
|
|
def _get_blur_hash() -> str:
|
2021-10-11 17:20:16 +00:00
|
|
|
"""You may laugh, but this is a lot less computationally intensive,
|
|
|
|
especially on large images, while still providing some visual variety
|
|
|
|
in the timeline
|
|
|
|
"""
|
|
|
|
hashes = [
|
|
|
|
"UfGuaW01%gRi%MM{azofozo0V@xuozn#ofs.",
|
|
|
|
"UFD]o8-;9FIU~qD%j[%M-;j[ofWB?bt7IURj",
|
|
|
|
"UyO|v_1#im=s%y#U%OxDwRt3W9R-ogjHj[WX",
|
|
|
|
"U96vAQt6H;WBt7ofWBa#MbWBo#j[byaze-oe",
|
|
|
|
"UJKA.q01M|IV%LM|RjNGIVj[f6oLjrofaeof",
|
|
|
|
"U9MPjn]?~Cxut~.PS1%1xXIo0fEer_$*^jxG",
|
|
|
|
"UtLENXWCRjju~qayaeaz00j[ofayIVkCkCfQ",
|
|
|
|
"UHGbeg-pbzWZ.ANI$wsQ$H-;E9W?0Nx]?FjE",
|
|
|
|
"UcHU%#4n_ND%?bxatRWBIU%MazxtNaRjs:of",
|
|
|
|
"ULR:TsWr~6xZofWWf6s-~6oK9eR,oes-WXNJ",
|
|
|
|
"U77VQB-:MaMx%L%MogRkMwkCxuoIS*WYjEsl",
|
|
|
|
"U%Nm{8R+%MxuE1t6WBNG-=RjoIt6~Vj]RkR*",
|
|
|
|
"UCM7u;?boft7oft7ayj[~qt7WBoft7oft7Rj"
|
|
|
|
]
|
|
|
|
return random.choice(hashes)
|
|
|
|
|
|
|
|
|
2021-12-29 21:55:09 +00:00
|
|
|
def _replace_silo_domain(post_json_object: {},
|
2022-01-03 10:27:55 +00:00
|
|
|
silo_domain: str, replacement_domain: str,
|
2021-12-29 21:55:09 +00:00
|
|
|
system_language: str) -> None:
|
2021-09-18 17:20:01 +00:00
|
|
|
"""Replace a silo domain with a replacement domain
|
2020-01-15 10:56:39 +00:00
|
|
|
"""
|
2022-01-03 10:27:55 +00:00
|
|
|
if not replacement_domain:
|
2020-08-02 09:51:20 +00:00
|
|
|
return
|
2021-12-26 10:57:03 +00:00
|
|
|
if not has_object_dict(post_json_object):
|
2020-01-15 10:56:39 +00:00
|
|
|
return
|
2021-12-25 22:09:19 +00:00
|
|
|
if not post_json_object['object'].get('content'):
|
2020-01-15 10:56:39 +00:00
|
|
|
return
|
2022-01-03 10:27:55 +00:00
|
|
|
content_str = get_base_content_from_post(post_json_object, system_language)
|
|
|
|
if silo_domain not in content_str:
|
2020-01-15 10:56:39 +00:00
|
|
|
return
|
2022-01-03 10:27:55 +00:00
|
|
|
content_str = content_str.replace(silo_domain, replacement_domain)
|
|
|
|
post_json_object['object']['content'] = content_str
|
2021-12-25 22:09:19 +00:00
|
|
|
if post_json_object['object'].get('contentMap'):
|
2022-01-03 10:27:55 +00:00
|
|
|
post_json_object['object']['contentMap'][system_language] = content_str
|
2020-04-03 16:55:55 +00:00
|
|
|
|
2020-01-15 10:56:39 +00:00
|
|
|
|
2022-01-03 10:27:55 +00:00
|
|
|
def replace_you_tube(post_json_object: {}, replacement_domain: str,
|
2021-12-28 21:36:27 +00:00
|
|
|
system_language: str) -> None:
|
2021-09-18 17:20:01 +00:00
|
|
|
"""Replace YouTube with a replacement domain
|
|
|
|
This denies Google some, but not all, tracking data
|
|
|
|
"""
|
2021-12-29 21:55:09 +00:00
|
|
|
_replace_silo_domain(post_json_object, 'www.youtube.com',
|
2022-01-03 10:27:55 +00:00
|
|
|
replacement_domain, system_language)
|
2021-09-18 17:20:01 +00:00
|
|
|
|
|
|
|
|
2022-01-03 10:27:55 +00:00
|
|
|
def replace_twitter(post_json_object: {}, replacement_domain: str,
|
2021-12-28 21:36:27 +00:00
|
|
|
system_language: str) -> None:
|
2021-09-18 17:08:14 +00:00
|
|
|
"""Replace Twitter with a replacement domain
|
|
|
|
This allows you to view twitter posts without having a twitter account
|
|
|
|
"""
|
2022-02-28 13:36:32 +00:00
|
|
|
twitter_domains = ('mobile.twitter.com', 'twitter.com')
|
|
|
|
for tw_domain in twitter_domains:
|
|
|
|
_replace_silo_domain(post_json_object, tw_domain,
|
|
|
|
replacement_domain, system_language)
|
2021-09-18 17:08:14 +00:00
|
|
|
|
|
|
|
|
2022-01-03 10:27:55 +00:00
|
|
|
def _remove_meta_data(image_filename: str, output_filename: str) -> None:
|
2020-01-08 14:53:28 +00:00
|
|
|
"""Attempts to do this with pure python didn't work well,
|
|
|
|
so better to use a dedicated tool if one is installed
|
2020-01-08 14:31:25 +00:00
|
|
|
"""
|
2022-01-03 10:27:55 +00:00
|
|
|
copyfile(image_filename, output_filename)
|
|
|
|
if not os.path.isfile(output_filename):
|
2021-12-26 14:42:21 +00:00
|
|
|
print('ERROR: unable to remove metadata from ' + image_filename)
|
2020-07-08 14:32:11 +00:00
|
|
|
return
|
2020-01-08 14:53:28 +00:00
|
|
|
if os.path.isfile('/usr/bin/exiftool'):
|
2022-01-03 10:27:55 +00:00
|
|
|
print('Removing metadata from ' + output_filename + ' using exiftool')
|
2022-07-22 10:54:57 +00:00
|
|
|
cmd = 'exiftool -all= ' + safe_system_string(output_filename)
|
|
|
|
os.system(cmd) # nosec
|
2020-01-08 14:53:28 +00:00
|
|
|
elif os.path.isfile('/usr/bin/mogrify'):
|
2022-01-03 10:27:55 +00:00
|
|
|
print('Removing metadata from ' + output_filename + ' using mogrify')
|
2022-07-22 10:54:57 +00:00
|
|
|
cmd = \
|
|
|
|
'/usr/bin/mogrify -strip ' + safe_system_string(output_filename)
|
|
|
|
os.system(cmd) # nosec
|
2019-07-24 13:14:23 +00:00
|
|
|
|
2020-04-03 16:55:55 +00:00
|
|
|
|
2021-12-29 21:55:09 +00:00
|
|
|
def _spoof_meta_data(base_dir: str, nickname: str, domain: str,
|
2022-01-03 10:27:55 +00:00
|
|
|
output_filename: str, spoof_city: str,
|
2021-12-29 21:55:09 +00:00
|
|
|
content_license_url: str) -> None:
|
2021-05-10 11:25:03 +00:00
|
|
|
"""Spoof image metadata using a decoy model for a given city
|
2021-05-09 12:17:55 +00:00
|
|
|
"""
|
2022-01-03 10:27:55 +00:00
|
|
|
if not os.path.isfile(output_filename):
|
|
|
|
print('ERROR: unable to spoof metadata within ' + output_filename)
|
2021-05-09 12:17:55 +00:00
|
|
|
return
|
2021-05-10 10:46:45 +00:00
|
|
|
|
|
|
|
# get the random seed used to generate a unique pattern for this account
|
2022-01-03 10:27:55 +00:00
|
|
|
decoy_seed_filename = acct_dir(base_dir, nickname, domain) + '/decoyseed'
|
|
|
|
decoy_seed = 63725
|
|
|
|
if os.path.isfile(decoy_seed_filename):
|
2022-06-10 14:32:48 +00:00
|
|
|
with open(decoy_seed_filename, 'r', encoding='utf-8') as fp_seed:
|
2022-01-03 10:27:55 +00:00
|
|
|
decoy_seed = int(fp_seed.read())
|
2021-05-10 10:46:45 +00:00
|
|
|
else:
|
2022-01-03 10:27:55 +00:00
|
|
|
decoy_seed = randint(10000, 10000000000000000)
|
2021-06-21 22:53:04 +00:00
|
|
|
try:
|
2022-06-10 14:32:48 +00:00
|
|
|
with open(decoy_seed_filename, 'w+',
|
|
|
|
encoding='utf-8') as fp_seed:
|
2022-01-03 10:27:55 +00:00
|
|
|
fp_seed.write(str(decoy_seed))
|
2021-11-25 21:18:53 +00:00
|
|
|
except OSError:
|
2022-01-03 10:27:55 +00:00
|
|
|
print('EX: unable to write ' + decoy_seed_filename)
|
2021-05-10 10:46:45 +00:00
|
|
|
|
2021-05-09 12:17:55 +00:00
|
|
|
if os.path.isfile('/usr/bin/exiftool'):
|
2022-01-03 10:27:55 +00:00
|
|
|
print('Spoofing metadata in ' + output_filename + ' using exiftool')
|
|
|
|
curr_time_adjusted = \
|
2021-05-09 14:05:52 +00:00
|
|
|
datetime.datetime.utcnow() - \
|
|
|
|
datetime.timedelta(minutes=randint(2, 120))
|
2022-01-03 10:27:55 +00:00
|
|
|
published = curr_time_adjusted.strftime("%Y:%m:%d %H:%M:%S+00:00")
|
|
|
|
(latitude, longitude, latitude_ref, longitude_ref,
|
|
|
|
cam_make, cam_model, cam_serial_number) = \
|
|
|
|
spoof_geolocation(base_dir, spoof_city, curr_time_adjusted,
|
|
|
|
decoy_seed, None, None)
|
2022-07-22 10:54:57 +00:00
|
|
|
safe_handle = safe_system_string(nickname + '@' + domain)
|
|
|
|
safe_license_url = safe_system_string(content_license_url)
|
|
|
|
if os.system('exiftool -artist=@"' + safe_handle + '" ' +
|
2022-01-03 10:27:55 +00:00
|
|
|
'-Make="' + cam_make + '" ' +
|
|
|
|
'-Model="' + cam_model + '" ' +
|
|
|
|
'-Comment="' + str(cam_serial_number) + '" ' +
|
2021-08-13 19:56:42 +00:00
|
|
|
'-DateTimeOriginal="' + published + '" ' +
|
|
|
|
'-FileModifyDate="' + published + '" ' +
|
|
|
|
'-CreateDate="' + published + '" ' +
|
2022-01-03 10:27:55 +00:00
|
|
|
'-GPSLongitudeRef=' + longitude_ref + ' ' +
|
2021-08-13 19:56:42 +00:00
|
|
|
'-GPSAltitude=0 ' +
|
|
|
|
'-GPSLongitude=' + str(longitude) + ' ' +
|
2022-01-03 10:27:55 +00:00
|
|
|
'-GPSLatitudeRef=' + latitude_ref + ' ' +
|
2021-08-13 19:56:42 +00:00
|
|
|
'-GPSLatitude=' + str(latitude) + ' ' +
|
2022-07-22 10:54:57 +00:00
|
|
|
'-copyright="' + safe_license_url + '" ' +
|
2021-08-13 19:56:42 +00:00
|
|
|
'-Comment="" ' +
|
2022-01-03 10:27:55 +00:00
|
|
|
output_filename) != 0: # nosec
|
2021-08-13 19:56:42 +00:00
|
|
|
print('ERROR: exiftool failed to run')
|
2021-05-09 12:17:55 +00:00
|
|
|
else:
|
|
|
|
print('ERROR: exiftool is not installed')
|
|
|
|
return
|
|
|
|
|
|
|
|
|
2022-05-20 15:18:02 +00:00
|
|
|
def get_music_metadata(filename: str) -> {}:
|
|
|
|
"""Returns metadata for a music file
|
|
|
|
"""
|
|
|
|
result = None
|
2022-07-22 10:54:57 +00:00
|
|
|
safe_filename = safe_system_string(filename)
|
2022-05-20 15:18:02 +00:00
|
|
|
try:
|
2022-07-22 10:54:57 +00:00
|
|
|
result = subprocess.run(['exiftool', '-v3', safe_filename],
|
2022-05-20 15:18:02 +00:00
|
|
|
stdout=subprocess.PIPE)
|
|
|
|
except BaseException as ex:
|
|
|
|
print('EX: get_music_metadata failed ' + str(ex))
|
|
|
|
if not result:
|
|
|
|
return {}
|
|
|
|
if not result.stdout:
|
|
|
|
return {}
|
|
|
|
try:
|
|
|
|
id3_lines = result.stdout.decode('utf-8').split('\n')
|
|
|
|
except BaseException:
|
|
|
|
print('EX: get_music_metadata unable to decode output')
|
|
|
|
return {}
|
|
|
|
fieldnames = (
|
|
|
|
'Title', 'Artist', 'Genre', 'Track', 'Album', 'Length', 'Band'
|
|
|
|
)
|
|
|
|
music_metadata = {}
|
|
|
|
for line in id3_lines:
|
|
|
|
for field in fieldnames:
|
2022-05-20 16:30:10 +00:00
|
|
|
if field + ' = ' not in line:
|
|
|
|
continue
|
|
|
|
field_value = line.split(field + ' = ')[1]
|
|
|
|
if '>' in field_value:
|
|
|
|
field_value = field_value.split('>')[0].strip()
|
|
|
|
if ':' in field_value and ' ' in field_value:
|
|
|
|
words = field_value.split(' ')
|
|
|
|
new_value = ''
|
|
|
|
for wrd in words:
|
|
|
|
if ':' not in wrd:
|
|
|
|
new_value += wrd + ' '
|
|
|
|
field_value = new_value.strip()
|
2022-05-20 17:01:12 +00:00
|
|
|
if field == 'Genre' and field_value.isdigit():
|
|
|
|
if music_genre.get(int(field_value)):
|
|
|
|
field_value = music_genre[int(field_value)]
|
2022-05-20 16:30:10 +00:00
|
|
|
music_metadata[field.lower()] = field_value
|
2022-05-20 15:18:02 +00:00
|
|
|
return music_metadata
|
|
|
|
|
|
|
|
|
2021-12-28 21:36:27 +00:00
|
|
|
def convert_image_to_low_bandwidth(image_filename: str) -> None:
|
2021-08-13 17:08:50 +00:00
|
|
|
"""Converts an image to a low bandwidth version
|
|
|
|
"""
|
2022-01-03 10:27:55 +00:00
|
|
|
low_bandwidth_filename = image_filename + '.low'
|
|
|
|
if os.path.isfile(low_bandwidth_filename):
|
2021-08-13 19:09:38 +00:00
|
|
|
try:
|
2022-01-03 10:27:55 +00:00
|
|
|
os.remove(low_bandwidth_filename)
|
2021-11-25 18:42:38 +00:00
|
|
|
except OSError:
|
2021-12-28 21:36:27 +00:00
|
|
|
print('EX: convert_image_to_low_bandwidth unable to delete ' +
|
2022-01-03 10:27:55 +00:00
|
|
|
low_bandwidth_filename)
|
2021-08-13 19:09:38 +00:00
|
|
|
|
2021-08-13 17:08:50 +00:00
|
|
|
cmd = \
|
2021-08-13 18:06:01 +00:00
|
|
|
'/usr/bin/convert +noise Multiplicative ' + \
|
2021-08-13 17:08:50 +00:00
|
|
|
'-evaluate median 10% -dither Floyd-Steinberg ' + \
|
2022-07-22 10:54:57 +00:00
|
|
|
'-monochrome ' + safe_system_string(image_filename) + \
|
|
|
|
' ' + safe_system_string(low_bandwidth_filename)
|
2021-08-13 17:43:19 +00:00
|
|
|
print('Low bandwidth image conversion: ' + cmd)
|
2021-08-13 17:08:50 +00:00
|
|
|
subprocess.call(cmd, shell=True)
|
2021-08-13 19:09:38 +00:00
|
|
|
# wait for conversion to happen
|
|
|
|
ctr = 0
|
2022-01-03 10:27:55 +00:00
|
|
|
while not os.path.isfile(low_bandwidth_filename):
|
2021-08-13 19:09:38 +00:00
|
|
|
print('Waiting for low bandwidth image conversion ' + str(ctr))
|
2021-08-13 20:18:36 +00:00
|
|
|
time.sleep(0.2)
|
2021-08-13 19:09:38 +00:00
|
|
|
ctr += 1
|
2021-08-13 20:18:36 +00:00
|
|
|
if ctr > 100:
|
2021-08-13 19:09:38 +00:00
|
|
|
print('WARN: timed out waiting for low bandwidth image conversion')
|
|
|
|
break
|
2022-01-03 10:27:55 +00:00
|
|
|
if os.path.isfile(low_bandwidth_filename):
|
2021-08-13 17:56:11 +00:00
|
|
|
try:
|
2021-12-26 14:42:21 +00:00
|
|
|
os.remove(image_filename)
|
2021-11-25 18:42:38 +00:00
|
|
|
except OSError:
|
2021-12-28 21:36:27 +00:00
|
|
|
print('EX: convert_image_to_low_bandwidth unable to delete ' +
|
2021-12-26 14:42:21 +00:00
|
|
|
image_filename)
|
2022-01-03 10:27:55 +00:00
|
|
|
os.rename(low_bandwidth_filename, image_filename)
|
2021-12-26 14:42:21 +00:00
|
|
|
if os.path.isfile(image_filename):
|
|
|
|
print('Image converted to low bandwidth ' + image_filename)
|
2021-08-13 17:56:11 +00:00
|
|
|
else:
|
|
|
|
print('Low bandwidth converted image not found: ' +
|
2022-01-03 10:27:55 +00:00
|
|
|
low_bandwidth_filename)
|
2021-08-13 17:08:50 +00:00
|
|
|
|
|
|
|
|
2021-12-28 21:36:27 +00:00
|
|
|
def process_meta_data(base_dir: str, nickname: str, domain: str,
|
2022-01-03 10:27:55 +00:00
|
|
|
image_filename: str, output_filename: str,
|
2021-12-28 21:36:27 +00:00
|
|
|
city: str, content_license_url: str) -> None:
|
2021-05-09 12:17:55 +00:00
|
|
|
"""Handles image metadata. This tries to spoof the metadata
|
|
|
|
if possible, but otherwise just removes it
|
|
|
|
"""
|
2021-05-09 19:11:05 +00:00
|
|
|
# first remove the metadata
|
2022-01-03 10:27:55 +00:00
|
|
|
_remove_meta_data(image_filename, output_filename)
|
2021-05-09 12:17:55 +00:00
|
|
|
|
2021-05-09 19:11:05 +00:00
|
|
|
# now add some spoofed data to misdirect surveillance capitalists
|
2022-01-03 10:27:55 +00:00
|
|
|
_spoof_meta_data(base_dir, nickname, domain, output_filename, city,
|
2021-12-29 21:55:09 +00:00
|
|
|
content_license_url)
|
2021-05-09 19:11:05 +00:00
|
|
|
|
2021-05-09 12:17:55 +00:00
|
|
|
|
2021-12-29 21:55:09 +00:00
|
|
|
def _is_media(image_filename: str) -> bool:
|
2021-03-06 23:03:14 +00:00
|
|
|
"""Is the given file a media file?
|
|
|
|
"""
|
2021-12-26 14:42:21 +00:00
|
|
|
if not os.path.isfile(image_filename):
|
|
|
|
print('WARN: Media file does not exist ' + image_filename)
|
2021-03-06 23:03:14 +00:00
|
|
|
return False
|
2022-01-03 10:27:55 +00:00
|
|
|
permitted_media = get_media_extensions()
|
|
|
|
for permit in permitted_media:
|
|
|
|
if image_filename.endswith('.' + permit):
|
2019-08-30 18:01:29 +00:00
|
|
|
return True
|
2021-12-26 14:42:21 +00:00
|
|
|
print('WARN: ' + image_filename + ' is not a permitted media type')
|
2019-07-12 19:08:46 +00:00
|
|
|
return False
|
|
|
|
|
2020-04-03 16:55:55 +00:00
|
|
|
|
2022-01-03 10:27:55 +00:00
|
|
|
def create_media_dirs(base_dir: str, media_path: str) -> None:
|
|
|
|
"""Creates stored media directories
|
|
|
|
"""
|
2021-12-25 16:17:53 +00:00
|
|
|
if not os.path.isdir(base_dir + '/media'):
|
|
|
|
os.mkdir(base_dir + '/media')
|
2022-01-03 10:27:55 +00:00
|
|
|
if not os.path.isdir(base_dir + '/' + media_path):
|
|
|
|
os.mkdir(base_dir + '/' + media_path)
|
2020-04-03 16:55:55 +00:00
|
|
|
|
2019-07-12 19:08:46 +00:00
|
|
|
|
2021-12-29 21:55:09 +00:00
|
|
|
def get_media_path() -> str:
|
2022-01-03 10:27:55 +00:00
|
|
|
"""Returns the path for stored media
|
|
|
|
"""
|
2021-12-26 13:17:46 +00:00
|
|
|
curr_time = datetime.datetime.utcnow()
|
2022-01-03 10:27:55 +00:00
|
|
|
weeks_since_epoch = \
|
|
|
|
int((curr_time - datetime.datetime(1970, 1, 1)).days / 7)
|
|
|
|
return 'media/' + str(weeks_since_epoch)
|
2020-04-03 16:55:55 +00:00
|
|
|
|
2019-08-30 15:50:20 +00:00
|
|
|
|
2021-12-29 21:55:09 +00:00
|
|
|
def get_attachment_media_type(filename: str) -> str:
|
2019-08-30 15:50:20 +00:00
|
|
|
"""Returns the type of media for the given file
|
|
|
|
image, video or audio
|
|
|
|
"""
|
2022-01-03 10:27:55 +00:00
|
|
|
media_type = None
|
|
|
|
image_types = get_image_extensions()
|
|
|
|
for mtype in image_types:
|
|
|
|
if filename.endswith('.' + mtype):
|
2019-08-30 15:50:20 +00:00
|
|
|
return 'image'
|
2022-01-03 10:27:55 +00:00
|
|
|
video_types = get_video_extensions()
|
|
|
|
for mtype in video_types:
|
|
|
|
if filename.endswith('.' + mtype):
|
2019-08-30 15:50:20 +00:00
|
|
|
return 'video'
|
2022-01-03 10:27:55 +00:00
|
|
|
audio_types = get_audio_extensions()
|
|
|
|
for mtype in audio_types:
|
|
|
|
if filename.endswith('.' + mtype):
|
2019-08-30 15:50:20 +00:00
|
|
|
return 'audio'
|
2022-01-03 10:27:55 +00:00
|
|
|
return media_type
|
2019-08-30 15:50:20 +00:00
|
|
|
|
2020-04-03 16:55:55 +00:00
|
|
|
|
2022-01-03 10:27:55 +00:00
|
|
|
def _update_etag(media_filename: str) -> None:
|
2019-12-04 18:52:27 +00:00
|
|
|
""" calculate the etag, which is a sha1 of the data
|
|
|
|
"""
|
|
|
|
# only create etags for media
|
2022-01-03 10:27:55 +00:00
|
|
|
if '/media/' not in media_filename:
|
2019-12-04 18:52:27 +00:00
|
|
|
return
|
|
|
|
|
|
|
|
# check that the media exists
|
2022-01-03 10:27:55 +00:00
|
|
|
if not os.path.isfile(media_filename):
|
2019-12-04 18:52:27 +00:00
|
|
|
return
|
|
|
|
|
|
|
|
# read the binary data
|
2020-04-03 16:55:55 +00:00
|
|
|
data = None
|
2019-12-04 18:52:27 +00:00
|
|
|
try:
|
2022-01-03 10:27:55 +00:00
|
|
|
with open(media_filename, 'rb') as media_file:
|
|
|
|
data = media_file.read()
|
2021-11-25 22:22:54 +00:00
|
|
|
except OSError:
|
2022-01-03 10:27:55 +00:00
|
|
|
print('EX: _update_etag unable to read ' + str(media_filename))
|
2019-12-04 18:52:27 +00:00
|
|
|
|
|
|
|
if not data:
|
|
|
|
return
|
|
|
|
# calculate hash
|
2020-07-08 15:17:00 +00:00
|
|
|
etag = sha1(data).hexdigest() # nosec
|
2019-12-04 18:52:27 +00:00
|
|
|
# save the hash
|
2021-06-21 22:53:04 +00:00
|
|
|
try:
|
2022-06-09 14:46:30 +00:00
|
|
|
with open(media_filename + '.etag', 'w+', encoding='utf-8') as efile:
|
2022-01-03 10:27:55 +00:00
|
|
|
efile.write(etag)
|
2021-11-25 21:18:53 +00:00
|
|
|
except OSError:
|
2021-12-29 21:55:09 +00:00
|
|
|
print('EX: _update_etag unable to write ' +
|
2022-01-03 10:27:55 +00:00
|
|
|
str(media_filename) + '.etag')
|
2019-12-04 18:52:27 +00:00
|
|
|
|
2020-04-03 16:55:55 +00:00
|
|
|
|
2021-12-28 21:36:27 +00:00
|
|
|
def attach_media(base_dir: str, http_prefix: str,
|
|
|
|
nickname: str, domain: str, port: int,
|
2021-12-29 21:55:09 +00:00
|
|
|
post_json: {}, image_filename: str,
|
2022-01-03 10:27:55 +00:00
|
|
|
media_type: str, description: str,
|
2021-12-28 21:36:27 +00:00
|
|
|
city: str, low_bandwidth: bool,
|
|
|
|
content_license_url: str) -> {}:
|
2019-08-30 18:32:34 +00:00
|
|
|
"""Attaches media to a json object post
|
2019-07-12 19:08:46 +00:00
|
|
|
The description can be None
|
|
|
|
"""
|
2021-12-29 21:55:09 +00:00
|
|
|
if not _is_media(image_filename):
|
|
|
|
return post_json
|
2020-03-22 21:16:02 +00:00
|
|
|
|
2022-01-03 10:27:55 +00:00
|
|
|
file_extension = None
|
|
|
|
accepted_types = get_media_extensions()
|
|
|
|
for mtype in accepted_types:
|
|
|
|
if image_filename.endswith('.' + mtype):
|
|
|
|
if mtype == 'jpg':
|
|
|
|
mtype = 'jpeg'
|
|
|
|
if mtype == 'mp3':
|
|
|
|
mtype = 'mpeg'
|
|
|
|
file_extension = mtype
|
|
|
|
if not file_extension:
|
2021-12-29 21:55:09 +00:00
|
|
|
return post_json
|
2022-01-03 10:27:55 +00:00
|
|
|
media_type = media_type + '/' + file_extension
|
|
|
|
print('Attached media type: ' + media_type)
|
2019-08-30 15:50:20 +00:00
|
|
|
|
2022-01-03 10:27:55 +00:00
|
|
|
if file_extension == 'jpeg':
|
|
|
|
file_extension = 'jpg'
|
|
|
|
if media_type == 'audio/mpeg':
|
|
|
|
file_extension = 'mp3'
|
2019-07-12 19:08:46 +00:00
|
|
|
|
2021-12-26 12:45:03 +00:00
|
|
|
domain = get_full_domain(domain, port)
|
2019-07-12 19:08:46 +00:00
|
|
|
|
2022-01-03 10:27:55 +00:00
|
|
|
mpath = get_media_path()
|
|
|
|
media_path = mpath + '/' + create_password(32) + '.' + file_extension
|
2021-12-25 16:17:53 +00:00
|
|
|
if base_dir:
|
2022-01-03 10:27:55 +00:00
|
|
|
create_media_dirs(base_dir, mpath)
|
|
|
|
media_filename = base_dir + '/' + media_path
|
2019-07-12 19:08:46 +00:00
|
|
|
|
2022-01-03 10:27:55 +00:00
|
|
|
media_path = \
|
|
|
|
media_path.replace('media/', 'system/media_attachments/files/', 1)
|
|
|
|
attachment_json = {
|
|
|
|
'mediaType': media_type,
|
2019-07-12 19:08:46 +00:00
|
|
|
'name': description,
|
2019-12-04 17:02:38 +00:00
|
|
|
'type': 'Document',
|
2022-01-03 10:27:55 +00:00
|
|
|
'url': http_prefix + '://' + domain + '/' + media_path
|
2019-07-12 19:08:46 +00:00
|
|
|
}
|
2022-01-03 10:27:55 +00:00
|
|
|
if media_type.startswith('image/'):
|
|
|
|
attachment_json['blurhash'] = _get_blur_hash()
|
2021-07-01 11:02:11 +00:00
|
|
|
# find the dimensions of the image and add them as metadata
|
2022-01-03 10:27:55 +00:00
|
|
|
attach_image_width, attach_image_height = \
|
2021-12-29 21:55:09 +00:00
|
|
|
get_image_dimensions(image_filename)
|
2022-01-03 10:27:55 +00:00
|
|
|
if attach_image_width and attach_image_height:
|
|
|
|
attachment_json['width'] = attach_image_width
|
|
|
|
attachment_json['height'] = attach_image_height
|
2021-07-01 11:02:11 +00:00
|
|
|
|
2022-01-03 10:27:55 +00:00
|
|
|
post_json['attachment'] = [attachment_json]
|
2019-07-12 19:08:46 +00:00
|
|
|
|
2021-12-25 16:17:53 +00:00
|
|
|
if base_dir:
|
2022-01-03 10:27:55 +00:00
|
|
|
if media_type.startswith('image/'):
|
2021-12-25 18:20:56 +00:00
|
|
|
if low_bandwidth:
|
2021-12-28 21:36:27 +00:00
|
|
|
convert_image_to_low_bandwidth(image_filename)
|
|
|
|
process_meta_data(base_dir, nickname, domain,
|
2022-01-03 10:27:55 +00:00
|
|
|
image_filename, media_filename, city,
|
2021-12-28 21:36:27 +00:00
|
|
|
content_license_url)
|
2019-08-30 19:01:16 +00:00
|
|
|
else:
|
2022-01-03 10:27:55 +00:00
|
|
|
copyfile(image_filename, media_filename)
|
|
|
|
_update_etag(media_filename)
|
2019-12-04 18:52:27 +00:00
|
|
|
|
2021-12-29 21:55:09 +00:00
|
|
|
return post_json
|
2019-07-12 19:08:46 +00:00
|
|
|
|
2020-04-03 16:55:55 +00:00
|
|
|
|
2021-12-29 21:55:09 +00:00
|
|
|
def archive_media(base_dir: str, archive_directory: str,
|
2022-01-03 10:27:55 +00:00
|
|
|
max_weeks: int) -> None:
|
2019-07-12 19:51:10 +00:00
|
|
|
"""Any media older than the given number of weeks gets archived
|
|
|
|
"""
|
2022-01-03 10:27:55 +00:00
|
|
|
if max_weeks == 0:
|
2020-12-08 14:09:54 +00:00
|
|
|
return
|
|
|
|
|
2021-12-26 13:17:46 +00:00
|
|
|
curr_time = datetime.datetime.utcnow()
|
2022-01-03 10:27:55 +00:00
|
|
|
weeks_since_epoch = int((curr_time - datetime.datetime(1970, 1, 1)).days/7)
|
|
|
|
min_week = weeks_since_epoch - max_weeks
|
2019-07-12 19:51:10 +00:00
|
|
|
|
2021-12-25 23:41:17 +00:00
|
|
|
if archive_directory:
|
|
|
|
if not os.path.isdir(archive_directory):
|
|
|
|
os.mkdir(archive_directory)
|
|
|
|
if not os.path.isdir(archive_directory + '/media'):
|
|
|
|
os.mkdir(archive_directory + '/media')
|
2020-03-22 21:16:02 +00:00
|
|
|
|
2022-01-03 10:27:55 +00:00
|
|
|
for _, dirs, _ in os.walk(base_dir + '/media'):
|
|
|
|
for week_dir in dirs:
|
|
|
|
if int(week_dir) < min_week:
|
2021-12-25 23:41:17 +00:00
|
|
|
if archive_directory:
|
2022-01-03 10:27:55 +00:00
|
|
|
move(os.path.join(base_dir + '/media', week_dir),
|
2021-12-25 23:41:17 +00:00
|
|
|
archive_directory + '/media')
|
2019-07-12 19:55:23 +00:00
|
|
|
else:
|
|
|
|
# archive to /dev/null
|
2022-01-03 10:27:55 +00:00
|
|
|
rmtree(os.path.join(base_dir + '/media', week_dir),
|
2021-10-29 18:48:15 +00:00
|
|
|
ignore_errors=False, onerror=None)
|
2020-12-13 22:13:45 +00:00
|
|
|
break
|
2021-06-07 17:55:25 +00:00
|
|
|
|
|
|
|
|
2021-12-28 21:36:27 +00:00
|
|
|
def path_is_video(path: str) -> bool:
|
2022-01-03 10:27:55 +00:00
|
|
|
"""Is the given path a video file?
|
|
|
|
"""
|
2021-06-07 17:55:25 +00:00
|
|
|
if path.endswith('.ogv') or \
|
|
|
|
path.endswith('.mp4'):
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
2021-12-28 21:36:27 +00:00
|
|
|
def path_is_audio(path: str) -> bool:
|
2022-01-03 10:27:55 +00:00
|
|
|
"""Is the given path an audio file?
|
|
|
|
"""
|
2021-06-07 17:55:25 +00:00
|
|
|
if path.endswith('.ogg') or \
|
2022-04-18 13:21:45 +00:00
|
|
|
path.endswith('.opus') or \
|
2022-04-18 13:44:08 +00:00
|
|
|
path.endswith('.flac') or \
|
2021-06-07 17:55:25 +00:00
|
|
|
path.endswith('.mp3'):
|
|
|
|
return True
|
|
|
|
return False
|
2021-07-01 11:02:11 +00:00
|
|
|
|
|
|
|
|
2021-12-29 21:55:09 +00:00
|
|
|
def get_image_dimensions(image_filename: str) -> (int, int):
|
2021-07-01 11:02:11 +00:00
|
|
|
"""Returns the dimensions of an image file
|
|
|
|
"""
|
2022-07-22 10:54:57 +00:00
|
|
|
safe_image_filename = safe_system_string(image_filename)
|
2021-07-01 11:02:11 +00:00
|
|
|
try:
|
|
|
|
result = subprocess.run(['identify', '-format', '"%wx%h"',
|
2022-07-22 10:54:57 +00:00
|
|
|
safe_image_filename],
|
|
|
|
stdout=subprocess.PIPE)
|
2021-07-01 11:02:11 +00:00
|
|
|
except BaseException:
|
2021-12-29 21:55:09 +00:00
|
|
|
print('EX: get_image_dimensions unable to run identify command')
|
2021-07-01 11:02:11 +00:00
|
|
|
return None, None
|
|
|
|
if not result:
|
|
|
|
return None, None
|
2022-01-03 10:27:55 +00:00
|
|
|
dimensions_str = result.stdout.decode('utf-8').replace('"', '')
|
|
|
|
if 'x' not in dimensions_str:
|
2021-07-01 11:02:11 +00:00
|
|
|
return None, None
|
2022-01-03 10:27:55 +00:00
|
|
|
width_str = dimensions_str.split('x')[0]
|
|
|
|
if not width_str.isdigit():
|
2021-07-01 11:02:11 +00:00
|
|
|
return None, None
|
2022-01-03 10:27:55 +00:00
|
|
|
height_str = dimensions_str.split('x')[1]
|
|
|
|
if not height_str.isdigit():
|
2021-07-01 11:02:11 +00:00
|
|
|
return None, None
|
2022-01-03 10:27:55 +00:00
|
|
|
return int(width_str), int(height_str)
|