epicyon/webapp_media.py

401 lines
15 KiB
Python
Raw Normal View History

2020-11-09 19:41:01 +00:00
__filename__ = "webapp_media.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
2024-01-21 19:01:20 +00:00
__version__ = "1.5.0"
2020-11-09 19:41:01 +00:00
__maintainer__ = "Bob Mottram"
2021-09-10 16:14:50 +00:00
__email__ = "bob@libreserver.org"
2020-11-09 19:41:01 +00:00
__status__ = "Production"
2021-06-26 11:27:14 +00:00
__module_group__ = "Timeline"
2020-11-09 19:41:01 +00:00
2020-12-24 10:18:34 +00:00
import os
2024-05-12 12:35:26 +00:00
from utils import data_dir
2024-04-10 12:36:17 +00:00
from utils import string_ends_with
2021-12-26 18:10:53 +00:00
from utils import valid_url_prefix
2020-12-24 10:18:34 +00:00
2021-12-29 21:55:09 +00:00
def load_peertube_instances(base_dir: str, peertube_instances: []) -> None:
2020-12-24 10:18:34 +00:00
"""Loads peertube instances from file into the given list
"""
2022-01-03 23:35:42 +00:00
peertube_list = None
2024-05-12 12:35:26 +00:00
peertube_instances_filename = data_dir(base_dir) + '/peertube.txt'
2022-01-03 23:35:42 +00:00
if os.path.isfile(peertube_instances_filename):
try:
with open(peertube_instances_filename, 'r',
encoding='utf-8') as fp_inst:
peertube_str = fp_inst.read()
if peertube_str:
peertube_str = peertube_str.replace('\r', '')
peertube_list = peertube_str.split('\n')
except OSError as exc:
print('EX: load_peertube_instances unable to read ' +
peertube_instances_filename + ' ' + str(exc))
2022-01-03 23:35:42 +00:00
if not peertube_list:
return
2022-01-03 23:35:42 +00:00
for url in peertube_list:
2021-12-25 23:38:53 +00:00
if url in peertube_instances:
continue
2021-12-25 23:38:53 +00:00
peertube_instances.append(url)
2024-02-26 11:44:50 +00:00
def _add_embedded_video_from_sites(content: str,
2021-12-29 21:55:09 +00:00
peertube_instances: [],
width: int, height: int,
domain: str) -> str:
2020-11-09 19:41:01 +00:00
"""Adds embedded videos
"""
if '<iframe' in content:
return content
2022-10-08 14:09:21 +00:00
if 'www.twitch.tv/' in content:
url = content.split('www.twitch.tv/')[1]
if '<' in url:
channel = url.split('<')[0]
2022-10-08 14:15:06 +00:00
if channel and \
'/' not in channel and \
'?' not in channel and \
'=' not in channel and \
' ' not in channel:
2022-10-08 14:09:21 +00:00
content += \
'<center>\n<span itemprop="video">\n' + \
'<iframe src="https://player.twitch.tv/?channel=' + \
channel + '&parent=' + domain + '" ' + \
2022-10-08 14:09:21 +00:00
'frameborder="0" allowfullscreen="true" ' + \
2022-10-08 18:31:29 +00:00
'scrolling="no" height="' + str(height) + \
2023-10-30 13:19:01 +00:00
'" width="' + str(width) + \
'" sandbox="allow-scripts allow-same-origin">' + \
2023-10-30 10:21:37 +00:00
'</iframe></span>\n</center>\n'
2022-10-08 14:09:21 +00:00
return content
2020-11-09 19:41:01 +00:00
if '>vimeo.com/' in content:
url = content.split('>vimeo.com/')[1]
if '<' in url:
url = url.split('<')[0]
2022-11-07 11:15:08 +00:00
if url:
content += \
"<center>\n<span itemprop=\"video\">\n" + \
"<iframe loading=\"lazy\" decoding=\"async\" " + \
"src=\"https://player.vimeo.com/video/" + \
url + "\" width=\"" + str(width) + \
"\" height=\"" + str(height) + \
"\" frameborder=\"0\" allow=\"" + \
"fullscreen\" allowfullscreen " + \
2023-10-30 13:19:01 +00:00
"tabindex=\"10\" " + \
"sandbox=\"allow-scripts allow-same-origin\">" + \
"</iframe>\n</span>\n</center>\n"
2022-11-07 11:15:08 +00:00
return content
2020-11-09 19:41:01 +00:00
2022-01-03 23:35:42 +00:00
video_site = 'https://www.youtube.com'
2022-02-22 10:08:56 +00:00
if 'https://m.youtube.com' in content:
content = content.replace('https://m.youtube.com', video_site)
2022-01-03 23:35:42 +00:00
if '"' + video_site in content:
url = content.split('"' + video_site)[1]
if '"' in url:
url = url.split('"')[0]
2022-11-07 11:15:08 +00:00
if url and '/channel/' not in url and '/playlist' not in url:
url = url.replace('/watch?v=', '/embed/')
if '&' in url:
url = url.split('&')[0]
if '?utm_' in url:
url = url.split('?utm_')[0]
content += \
"<center>\n<span itemprop=\"video\">\n" + \
"<iframe loading=\"lazy\" " + \
2022-03-28 08:47:53 +00:00
"decoding=\"async\" src=\"" + \
video_site + url + "\" width=\"" + str(width) + \
"\" height=\"" + str(height) + \
"\" frameborder=\"0\" allow=\"fullscreen\" " + \
2023-10-30 13:19:01 +00:00
"allowfullscreen tabindex=\"10\" " + \
"sandbox=\"allow-scripts allow-same-origin\">" + \
2023-10-30 10:21:37 +00:00
"</iframe>\n</span></center>\n"
return content
2022-01-30 11:46:40 +00:00
video_site = 'https://youtu.be/'
if '"' + video_site in content:
url = content.split('"' + video_site)[1]
if '"' in url:
url = url.split('"')[0]
2022-11-07 11:15:08 +00:00
if url and '/channel/' not in url and '/playlist' not in url:
url = 'embed/' + url
if '&' in url:
url = url.split('&')[0]
if '?utm_' in url:
url = url.split('?utm_')[0]
video_site = 'https://www.youtube.com/'
content += \
"<center>\n<span itemprop=\"video\">\n" + \
"<iframe loading=\"lazy\" " + \
2022-03-28 08:47:53 +00:00
"decoding=\"async\" src=\"" + \
video_site + url + "\" width=\"" + str(width) + \
"\" height=\"" + str(height) + \
"\" frameborder=\"0\" allow=\"fullscreen\" " + \
2023-10-30 13:19:01 +00:00
"allowfullscreen tabindex=\"10\" " + \
"sandbox=\"allow-scripts allow-same-origin\">" + \
2023-10-30 10:21:37 +00:00
"</iframe>\n</span></center>\n"
return content
2020-11-09 19:41:01 +00:00
2022-01-03 23:35:42 +00:00
invidious_sites = (
'https://invidious.snopyta.org',
'https://yewtu.be',
'https://tube.connect.cafe',
'https://invidious.kavin.rocks',
'https://invidiou.site',
'https://invidious.tube',
'https://invidious.xyz',
'https://invidious.zapashcanon.fr',
'http://c7hqkpkpemu6e7emz5b4vy' +
'z7idjgdvgaaa3dyimmeojqbgpea3xqjoid.onion',
'http://axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4' +
'bzzsg2ii4fv2iid.onion'
)
for video_site in invidious_sites:
if '"' + video_site in content:
url = content.split('"' + video_site)[1]
2020-11-09 19:41:01 +00:00
if '"' in url:
2022-11-07 11:15:08 +00:00
url = url.split('"')[0]
if not url:
continue
url = url.replace('/watch?v=', '/embed/')
2020-11-09 19:41:01 +00:00
if '&' in url:
url = url.split('&')[0]
if '?utm_' in url:
url = url.split('?utm_')[0]
# explicitly turn off autoplay
if '?' in url:
if '&autoplay=' not in url:
url += '&autoplay=0'
else:
url = url.replace('&autoplay=1', '&autoplay=0')
else:
if '?autoplay=' not in url:
url += '?autoplay=0'
else:
url = url.replace('?autoplay=1', '?autoplay=0')
if not url:
continue
2022-01-14 18:48:43 +00:00
content += \
"<center>\n<span itemprop=\"video\">\n" + \
"<iframe loading=\"lazy\" " + \
2022-03-28 08:47:53 +00:00
"decoding=\"async\" src=\"" + \
2022-01-03 23:35:42 +00:00
video_site + url + "\" width=\"" + \
2020-11-09 19:41:01 +00:00
str(width) + "\" height=\"" + str(height) + \
"\" frameborder=\"0\" allow=\"fullscreen\" " + \
2023-10-30 13:19:01 +00:00
"allowfullscreen tabindex=\"10\" " + \
"sandbox=\"allow-scripts allow-same-origin\">" + \
"</iframe>\n</span>\n</center>\n"
2020-11-09 19:41:01 +00:00
return content
2022-01-03 23:35:42 +00:00
video_site = 'https://media.ccc.de'
if '"' + video_site in content:
url = content.split('"' + video_site)[1]
2020-11-09 19:41:01 +00:00
if '"' in url:
url = url.split('"')[0]
2022-11-07 11:15:08 +00:00
if url:
video_site_settings = ''
if '#' in url:
video_site_settings = '#' + url.split('#', 1)[1]
url = url.split('#')[0]
if not url.endswith('/oembed'):
url = url + '/oembed'
url += video_site_settings
content += \
"<center>\n<span itemprop=\"video\">\n" + \
"<iframe loading=\"lazy\" " + \
"decoding=\"async\" src=\"" + \
video_site + url + "\" width=\"" + \
str(width) + "\" height=\"" + str(height) + \
"\" frameborder=\"0\" allow=\"fullscreen\" " + \
2023-10-30 13:19:01 +00:00
"allowfullscreen tabindex=\"10\" " + \
"sandbox=\"allow-scripts allow-same-origin\">" + \
"</iframe>\n</span>\n</center>\n"
2022-11-07 11:15:08 +00:00
return content
2020-11-09 19:41:01 +00:00
if '"https://' in content:
2021-12-25 23:38:53 +00:00
if peertube_instances:
2021-07-06 09:44:45 +00:00
# only create an embedded video for a limited set of
# peertube sites.
2022-01-03 23:35:42 +00:00
peertube_sites = peertube_instances
2020-12-23 23:59:49 +00:00
else:
# A default minimal set of peertube instances
2020-12-24 10:13:21 +00:00
# Also see https://peertube_isolation.frama.io/list/ for
# adversarial instances. Nothing in that list should be
# in the defaults below.
2022-01-03 23:35:42 +00:00
peertube_sites = (
2021-12-29 21:55:09 +00:00
'share.tube',
'visionon.tv',
2022-03-23 14:29:55 +00:00
'anarchy.tube',
2021-12-29 21:55:09 +00:00
'peertube.fr',
2022-03-23 15:37:32 +00:00
'video.nerdcave.site',
2021-12-29 21:55:09 +00:00
'kolektiva.media',
'peertube.social',
2023-06-18 15:15:14 +00:00
'videos.lescommuns.org',
'neat.tube'
2021-12-29 21:55:09 +00:00
)
2022-01-03 23:35:42 +00:00
for site in peertube_sites:
2020-12-24 13:57:02 +00:00
site = site.strip()
if not site:
continue
if len(site) < 5:
continue
if '.' not in site:
continue
2022-01-03 23:35:42 +00:00
site_str = site
2020-12-24 13:13:03 +00:00
if site.startswith('http://'):
site = site.replace('http://', '')
elif site.startswith('https://'):
site = site.replace('https://', '')
if site.endswith('.onion') or site.endswith('.i2p'):
2022-01-03 23:35:42 +00:00
site_str = 'http://' + site
2020-12-24 13:13:03 +00:00
else:
2022-01-03 23:35:42 +00:00
site_str = 'https://' + site
site_str = '"' + site_str
if site_str not in content:
2021-07-06 09:44:45 +00:00
continue
2022-01-03 23:35:42 +00:00
url = content.split(site_str)[1]
2021-07-06 09:44:45 +00:00
if '"' not in url:
continue
2022-02-22 09:51:36 +00:00
url = url.split('"')[0]
if not url:
continue
2024-04-10 12:36:17 +00:00
possible_endings = (
'/trending', '/home', '/overview',
'/recently-added', '/local', '/about')
if string_ends_with(url, possible_endings):
2022-07-03 09:24:40 +00:00
# ignore various peertube endpoints
continue
2023-06-17 09:28:16 +00:00
if '/c/' in url or '/m/' in url:
# don't try to embed peertube channel page
continue
if '?sort=' in url:
# don't try to embed a sorted list
continue
2022-02-22 12:03:56 +00:00
if '/w/' in url:
if '/videos/' not in url:
url = url.replace('/w/', '/videos/embed/')
else:
url = url.replace('/w/', '/embed/')
2022-02-22 09:51:36 +00:00
url = url.replace('/watch/', '/embed/')
2022-02-22 12:03:56 +00:00
2022-01-14 18:48:43 +00:00
content += \
"<center>\n<span itemprop=\"video\">\n" + \
"<iframe loading=\"lazy\" decoding=\"async\" " + \
2021-07-06 09:44:45 +00:00
"sandbox=\"allow-same-origin " + \
"allow-scripts\" src=\"https://" + \
site + url + "\" width=\"" + str(width) + \
"\" height=\"" + str(height) + \
"\" frameborder=\"0\" allow=\"" + \
2023-10-30 13:19:01 +00:00
"fullscreen\" allowfullscreen tabindex=\"10\" " + \
"sandbox=\"allow-scripts allow-same-origin\">" + \
2023-10-30 10:21:37 +00:00
"</iframe>\n</span>\n</center>\n"
2021-07-06 09:44:45 +00:00
return content
2020-11-09 19:41:01 +00:00
return content
2021-12-29 21:55:09 +00:00
def _add_embedded_audio(translate: {}, content: str) -> str:
2022-10-31 11:05:11 +00:00
"""Adds embedded audio for mp3/ogg/opus/wav
2020-11-09 19:41:01 +00:00
"""
2022-04-18 13:44:08 +00:00
if not ('.mp3' in content or
'.ogg' in content or
2022-10-31 11:05:11 +00:00
'.wav' in content or
2022-04-18 13:44:08 +00:00
'.opus' in content or
2022-10-20 19:37:59 +00:00
'.spx' in content or
2022-04-18 13:44:08 +00:00
'.flac' in content):
2020-11-09 19:41:01 +00:00
return content
if '<audio ' in content:
return content
extension = '.mp3'
if '.ogg' in content:
extension = '.ogg'
2022-10-31 11:05:11 +00:00
elif '.wav' in content:
extension = '.wav'
2022-04-18 13:21:45 +00:00
elif '.opus' in content:
extension = '.opus'
2022-10-20 19:37:59 +00:00
elif '.spx' in content:
extension = '.spx'
2022-04-18 13:44:08 +00:00
elif '.flac' in content:
extension = '.flac'
2020-11-09 19:41:01 +00:00
words = content.strip('\n').split(' ')
2022-01-03 23:35:42 +00:00
for wrd in words:
if extension not in wrd:
2020-11-09 19:41:01 +00:00
continue
2022-01-03 23:35:42 +00:00
wrd = wrd.replace('href="', '').replace('">', '')
if wrd.endswith('.'):
wrd = wrd[:-1]
if wrd.endswith('"'):
wrd = wrd[:-1]
if wrd.endswith(';'):
wrd = wrd[:-1]
if wrd.endswith(':'):
wrd = wrd[:-1]
if not wrd.endswith(extension):
2020-11-09 19:41:01 +00:00
continue
2022-01-03 23:35:42 +00:00
if not valid_url_prefix(wrd):
2020-11-09 19:41:01 +00:00
continue
content += \
'<center>\n<span itemprop="audio">' + \
2022-06-10 16:32:38 +00:00
'<audio controls tabindex="10">\n' + \
2022-01-03 23:35:42 +00:00
'<source src="' + wrd + '" type="audio/' + \
2022-06-10 16:32:38 +00:00
extension.replace('.', '') + '">' + \
2021-07-06 09:44:45 +00:00
translate['Your browser does not support the audio element.'] + \
'</audio>\n</span>\n</center>\n'
2020-11-09 19:41:01 +00:00
return content
2021-12-29 21:55:09 +00:00
def _add_embedded_video(translate: {}, content: str) -> str:
2020-11-09 19:41:01 +00:00
"""Adds embedded video for mp4/webm/ogv
"""
if not ('.mp4' in content or '.webm' in content or '.ogv' in content):
return content
if '<video ' in content:
return content
extension = '.mp4'
if '.webm' in content:
extension = '.webm'
elif '.ogv' in content:
extension = '.ogv'
words = content.strip('\n').split(' ')
2022-01-03 23:35:42 +00:00
for wrd in words:
if extension not in wrd:
2020-11-09 19:41:01 +00:00
continue
2022-01-03 23:35:42 +00:00
wrd = wrd.replace('href="', '').replace('">', '')
if wrd.endswith('.'):
wrd = wrd[:-1]
if wrd.endswith('"'):
wrd = wrd[:-1]
if wrd.endswith(';'):
wrd = wrd[:-1]
if wrd.endswith(':'):
wrd = wrd[:-1]
if not wrd.endswith(extension):
2020-11-09 19:41:01 +00:00
continue
2022-01-03 23:35:42 +00:00
if not valid_url_prefix(wrd):
2020-11-09 19:41:01 +00:00
continue
content += \
'<center><span itemprop="video">\n' + \
'<figure id="videoContainer" ' + \
2021-03-07 12:06:01 +00:00
'data-fullscreen="false">\n' + \
' <video id="video" controls ' + \
2022-06-10 16:32:38 +00:00
'preload="metadata" tabindex="10">\n' + \
2022-01-03 23:35:42 +00:00
'<source src="' + wrd + '" type="video/' + \
2022-06-10 16:32:38 +00:00
extension.replace('.', '') + '">\n' + \
2021-07-06 09:44:45 +00:00
translate['Your browser does not support the video element.'] + \
'</video>\n</figure>\n</span>\n</center>\n'
2020-11-09 19:41:01 +00:00
return content
2021-12-29 21:55:09 +00:00
def add_embedded_elements(translate: {}, content: str,
peertube_instances: [], domain: str) -> str:
2020-11-09 19:41:01 +00:00
"""Adds embedded elements for various media types
"""
2024-02-26 11:44:50 +00:00
content = _add_embedded_video_from_sites(content,
peertube_instances,
400, 300, domain)
2021-12-29 21:55:09 +00:00
content = _add_embedded_audio(translate, content)
return _add_embedded_video(translate, content)