epicyon/webapp_podcast.py

525 lines
20 KiB
Python
Raw Normal View History

2022-01-11 18:25:13 +00:00
__filename__ = "webapp_podcast.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
2024-01-21 19:01:20 +00:00
__version__ = "1.5.0"
2022-01-11 18:25:13 +00:00
__maintainer__ = "Bob Mottram"
__email__ = "bob@libreserver.org"
__status__ = "Production"
__module_group__ = "Web Interface Columns"
import os
2022-01-12 19:40:12 +00:00
import html
2022-05-03 16:38:16 +00:00
import datetime
2022-01-12 20:01:28 +00:00
import urllib.parse
2022-01-11 18:25:13 +00:00
from shutil import copyfile
from utils import resembles_url
from utils import get_nickname_from_actor
from utils import get_domain_from_actor
2024-05-12 12:35:26 +00:00
from utils import data_dir
2023-12-09 14:18:24 +00:00
from utils import get_url_from_post
2022-01-11 18:25:13 +00:00
from utils import get_config_param
from utils import remove_html
from media import path_is_audio
2022-01-14 10:20:37 +00:00
from content import safe_web_text
2022-01-11 18:25:13 +00:00
from webapp_utils import get_broken_link_substitute
from webapp_utils import html_header_with_external_style
from webapp_utils import html_footer
from webapp_utils import html_keyboard_navigation
2023-08-13 09:58:02 +00:00
from session import get_json_valid
2022-05-03 16:38:16 +00:00
from session import get_json
2024-06-05 20:00:23 +00:00
MAX_LINK_LENGTH = 40
2022-05-03 16:38:16 +00:00
def _html_podcast_chapters(link_url: str,
session, session_onion, session_i2p,
http_prefix: str, domain: str,
2024-02-26 11:52:51 +00:00
podcast_properties: {},
2024-12-17 13:50:48 +00:00
debug: bool,
mitm_servers: []) -> str:
2022-05-03 16:38:16 +00:00
"""Returns html for chapters of a podcast
"""
if not podcast_properties:
return ''
key = 'chapters'
if not podcast_properties.get(key):
return ''
if not isinstance(podcast_properties[key], dict):
return ''
if podcast_properties[key].get('url'):
2023-12-09 14:18:24 +00:00
url_str = get_url_from_post(podcast_properties[key]['url'])
chapters_url = remove_html(url_str)
2022-05-03 16:38:16 +00:00
elif podcast_properties[key].get('uri'):
chapters_url = podcast_properties[key]['uri']
else:
return ''
html_str = ''
if podcast_properties[key].get('type'):
url_type = podcast_properties[key]['type']
curr_session = session
if chapters_url.endswith('.onion'):
curr_session = session_onion
elif chapters_url.endswith('.i2p'):
curr_session = session_i2p
as_header = {
'Accept': url_type
}
if 'json' in url_type:
chapters_json = \
get_json(None, curr_session, chapters_url,
2024-12-17 13:50:48 +00:00
as_header, None, debug, mitm_servers, __version__,
2022-05-03 16:38:16 +00:00
http_prefix, domain)
2023-08-13 09:58:02 +00:00
if not get_json_valid(chapters_json):
2022-05-03 16:38:16 +00:00
return ''
if not chapters_json.get('chapters'):
return ''
if not isinstance(chapters_json['chapters'], list):
return ''
chapters_html = ''
for chapter in chapters_json['chapters']:
if not isinstance(chapter, dict):
continue
if not chapter.get('title'):
continue
if not chapter.get('startTime'):
continue
chapter_title = chapter['title']
chapter_url = ''
if chapter.get('url'):
2023-12-09 14:18:24 +00:00
url_str = get_url_from_post(chapter['url'])
chapter_url = remove_html(url_str)
2022-05-03 16:38:16 +00:00
chapter_title = \
'<a href="' + chapter_url + '">' + \
chapter['title'] + '<\a>'
start_sec = chapter['startTime']
skip_url = link_url + '#t=' + str(start_sec)
start_time_str = \
'<a href="' + skip_url + '">' + \
str(datetime.timedelta(seconds=start_sec)) + \
'</a>'
if chapter.get('img'):
chapters_html += \
' <li>\n' + \
' ' + start_time_str + '\n' + \
' <img loading="lazy" ' + \
2022-05-03 16:38:16 +00:00
'decoding="async" ' + \
'src="' + chapter['img'] + \
2022-05-03 16:57:30 +00:00
'" alt="" />\n' + \
' ' + chapter_title + '\n' + \
' </li>\n'
2022-05-03 16:38:16 +00:00
if chapters_html:
2022-05-03 16:57:30 +00:00
html_str = \
'<div class="chapters">\n' + \
2022-06-27 16:21:48 +00:00
' <u>\n' + chapters_html + ' </u>\n</div>\n'
2022-05-03 16:38:16 +00:00
return html_str
2022-01-11 18:25:13 +00:00
2022-05-03 11:55:16 +00:00
def _html_podcast_transcripts(podcast_properties: {}, translate: {}) -> str:
"""Returns html for transcripts of a podcast
"""
if not podcast_properties:
return ''
key = 'transcripts'
if not podcast_properties.get(key):
return ''
2022-05-03 16:38:16 +00:00
if not isinstance(podcast_properties[key], list):
return ''
2022-05-03 11:55:16 +00:00
ctr = 1
html_str = ''
2022-05-30 18:33:51 +00:00
for _ in podcast_properties[key]:
2022-05-03 11:55:16 +00:00
transcript_url = None
if podcast_properties[key].get('url'):
2023-12-09 14:18:24 +00:00
url_str = get_url_from_post(podcast_properties[key]['url'])
transcript_url = remove_html(url_str)
2022-05-03 11:55:16 +00:00
elif podcast_properties[key].get('uri'):
transcript_url = podcast_properties[key]['uri']
if not transcript_url:
continue
if ctr > 1:
html_str += '<br>'
html_str += '<a href="' + transcript_url + '">'
html_str += translate['Transcript']
if ctr > 1:
html_str += ' ' + str(ctr)
html_str += '</a>\n'
ctr += 1
return html_str
2022-02-12 20:37:15 +00:00
def _html_podcast_social_interactions(podcast_properties: {},
translate: {},
nickname: str) -> str:
"""Returns html for social interactions with a podcast
"""
if not podcast_properties:
return ''
2022-04-29 13:54:13 +00:00
key = 'discussion'
if not podcast_properties.get(key):
key = 'socialInteract'
if not podcast_properties.get(key):
return ''
2022-05-03 16:38:16 +00:00
if not isinstance(podcast_properties[key], dict):
return ''
2022-04-29 13:54:13 +00:00
if podcast_properties[key].get('uri'):
episode_post_url = podcast_properties[key]['uri']
elif podcast_properties[key].get('url'):
2023-12-09 14:18:24 +00:00
url_str = get_url_from_post(podcast_properties[key]['url'])
episode_post_url = remove_html(url_str)
2022-04-29 13:54:13 +00:00
elif podcast_properties[key].get('text'):
episode_post_url = podcast_properties[key]['text']
else:
2022-02-12 20:37:15 +00:00
return ''
2022-02-13 11:30:11 +00:00
actor_str = ''
podcast_account_id = None
2022-04-29 13:54:13 +00:00
if podcast_properties[key].get('accountId'):
podcast_account_id = podcast_properties[key]['accountId']
elif podcast_properties[key].get('podcastAccountUrl'):
podcast_account_id = \
2022-04-29 13:54:13 +00:00
podcast_properties[key]['podcastAccountUrl']
if podcast_account_id:
actor_handle = podcast_account_id
2022-02-13 11:30:11 +00:00
if actor_handle.startswith('@'):
actor_handle = actor_handle[1:]
actor_str = '?actor=' + actor_handle
2022-02-12 20:37:15 +00:00
podcast_str = \
'<center>\n' + \
' <a href="/users/' + nickname + \
2022-02-13 11:30:11 +00:00
'?replyto=' + episode_post_url + actor_str + '" target="_blank" ' + \
2022-02-12 20:37:15 +00:00
'rel="nofollow noopener noreferrer">💬 ' + \
translate['Leave a comment'] + '</a>\n' + \
' <span itemprop="comment">\n' + \
' <a href="' + episode_post_url + '" target="_blank" ' + \
2022-02-12 20:37:15 +00:00
'rel="nofollow noopener noreferrer">' + \
translate['View comments'] + '</a>\n </span>\n' + \
2022-02-12 20:37:15 +00:00
'</center>\n'
return podcast_str
2022-01-11 18:25:13 +00:00
def _html_podcast_performers(podcast_properties: {}) -> str:
"""Returns html for performers of a podcast
"""
2022-01-13 23:06:04 +00:00
if not podcast_properties:
return ''
2022-05-03 16:38:16 +00:00
key = 'persons'
if not podcast_properties.get(key):
return ''
if not isinstance(podcast_properties[key], list):
2022-01-11 18:25:13 +00:00
return ''
# list of performers
podcast_str = '<div class="performers">\n'
podcast_str += ' <center>\n'
podcast_str += '<ul>\n'
2022-05-03 16:38:16 +00:00
for performer in podcast_properties[key]:
2022-01-11 18:25:13 +00:00
if not performer.get('text'):
continue
2022-05-03 16:38:16 +00:00
performer_name = \
'<span itemprop="name">' + performer['text'] + '</span>'
2022-01-11 18:25:13 +00:00
performer_title = performer_name
if performer.get('role'):
2022-05-03 16:38:16 +00:00
performer_title += \
' (<span itemprop="hasOccupation">' + \
performer['role'] + '</span>)'
2022-01-11 18:25:13 +00:00
if performer.get('group'):
performer_title += ', <i>' + performer['group'] + '</i>'
performer_title = remove_html(performer_title)
performer_url = ''
if performer.get('href'):
performer_url = remove_html(performer['href'])
2022-01-11 18:25:13 +00:00
performer_img = ''
if performer.get('img'):
performer_img = performer['img']
podcast_str += ' <li>\n'
podcast_str += ' <figure>\n'
2022-05-03 16:38:16 +00:00
podcast_str += ' <span itemprop="creator" ' + \
'itemscope itemtype="https://schema.org/Person">\n'
podcast_str += \
' <a href="' + performer_url + '" itemprop="url">\n'
2022-01-11 18:25:13 +00:00
podcast_str += \
2022-03-28 08:47:53 +00:00
' <img loading="lazy" decoding="async" ' + \
2022-05-03 16:38:16 +00:00
'src="' + performer_img + '" alt="" itemprop="image" />\n'
2022-01-11 18:25:13 +00:00
podcast_str += \
' <figcaption>' + performer_title + '</figcaption>\n'
podcast_str += ' </a>\n'
2022-05-03 16:38:16 +00:00
podcast_str += ' </span></figure>\n'
2022-01-11 18:25:13 +00:00
podcast_str += ' </li>\n'
podcast_str += '</ul>\n'
podcast_str += '</div>\n'
return podcast_str
def _html_podcast_soundbites(link_url: str, extension: str,
podcast_properties: {},
translate: {}) -> str:
"""Returns html for podcast soundbites
"""
2022-01-13 23:06:04 +00:00
if not podcast_properties:
return ''
2022-01-11 18:25:13 +00:00
if not podcast_properties.get('soundbites'):
return ''
podcast_str = '<div class="performers">\n'
podcast_str += ' <center>\n'
podcast_str += '<ul>\n'
ctr = 1
for performer in podcast_properties['soundbites']:
if not performer.get('startTime'):
continue
if not performer['startTime'].isdigit():
continue
if not performer.get('duration'):
continue
if not performer['duration'].isdigit():
continue
end_time = str(float(performer['startTime']) +
float(performer['duration']))
podcast_str += ' <li>\n'
preview_url = \
link_url + '#t=' + performer['startTime'] + ',' + end_time
soundbite_title = translate['Preview']
if ctr > 0:
soundbite_title += ' ' + str(ctr)
podcast_str += \
2022-05-03 09:52:30 +00:00
' <span itemprop="trailer">\n' + \
2022-06-10 16:32:38 +00:00
' <audio controls tabindex="10">\n' + \
2022-01-11 18:25:13 +00:00
' <p>' + soundbite_title + '</p>\n' + \
' <source src="' + preview_url + '" type="audio/' + \
extension.replace('.', '') + '">' + \
translate['Your browser does not support the audio element.'] + \
2022-05-03 09:52:30 +00:00
'</audio>\n </span>\n'
2022-01-11 18:25:13 +00:00
podcast_str += ' </li>\n'
ctr += 1
podcast_str += '</ul>\n'
podcast_str += '</div>\n'
return podcast_str
2022-07-12 19:03:30 +00:00
def html_podcast_episode(translate: {},
2022-01-11 18:25:13 +00:00
base_dir: str, nickname: str, domain: str,
2024-02-26 11:58:48 +00:00
newswire_item: [],
text_mode_banner: str,
2022-05-03 16:38:16 +00:00
session, session_onion, session_i2p,
2024-12-17 13:50:48 +00:00
http_prefix: str, debug: bool,
mitm_servers: []) -> str:
2023-01-04 13:33:05 +00:00
"""Returns html for a podcast episode, an item from the newswire
2022-01-11 18:25:13 +00:00
"""
css_filename = base_dir + '/epicyon-podcast.css'
if os.path.isfile(base_dir + '/podcast.css'):
css_filename = base_dir + '/podcast.css'
2024-05-12 12:35:26 +00:00
dir_str = data_dir(base_dir)
if os.path.isfile(dir_str + '/podcast-background-custom.jpg'):
if not os.path.isfile(dir_str + '/podcast-background.jpg'):
copyfile(dir_str + '/podcast-background.jpg',
dir_str + '/podcast-background.jpg')
2022-01-11 18:25:13 +00:00
instance_title = get_config_param(base_dir, 'instanceTitle')
2024-10-22 12:12:16 +00:00
preload_images = []
2022-01-11 18:25:13 +00:00
podcast_str = \
2024-10-22 12:12:16 +00:00
html_header_with_external_style(css_filename, instance_title, None,
preload_images)
2022-01-11 18:25:13 +00:00
podcast_properties = newswire_item[8]
image_url = ''
image_src = 'src'
if podcast_properties.get('images'):
if podcast_properties['images'].get('srcset'):
image_url = podcast_properties['images']['srcset']
image_src = 'srcset'
if not image_url and podcast_properties.get('image'):
image_url = podcast_properties['image']
link_url = newswire_item[1]
2024-02-19 18:31:04 +00:00
podcast_str += html_keyboard_navigation(text_mode_banner, {}, {},
None, None, None, False)
2022-01-11 18:25:13 +00:00
podcast_str += '<br><br>\n'
2022-05-03 09:52:30 +00:00
podcast_str += \
'<div class="options" itemscope ' + \
'itemtype="http://schema.org/PodcastEpisode">\n'
2022-01-11 18:25:13 +00:00
podcast_str += ' <div class="optionsAvatar">\n'
podcast_str += ' <center>\n'
podcast_str += ' <a href="' + link_url + '" itemprop="url">\n'
podcast_str += ' <span itemprop="image">\n'
2022-01-11 18:25:13 +00:00
if image_src == 'srcset':
podcast_str += ' <img loading="lazy" decoding="async" ' + \
2022-03-28 08:47:53 +00:00
'srcset="' + image_url + \
2022-05-03 09:52:30 +00:00
'" alt="" ' + get_broken_link_substitute() + '/>\n'
2022-01-11 18:25:13 +00:00
else:
podcast_str += ' <img loading="lazy" decoding="async" ' + \
2022-03-28 08:47:53 +00:00
'src="' + image_url + \
2022-05-03 09:52:30 +00:00
'" alt="" ' + get_broken_link_substitute() + '/>\n'
podcast_str += ' </span></a>\n'
podcast_str += ' </center>\n'
2022-01-11 18:25:13 +00:00
podcast_str += ' </div>\n'
2022-01-12 19:21:12 +00:00
podcast_str += ' <center>\n'
2022-01-12 18:35:15 +00:00
audio_extension = None
2022-01-11 18:25:13 +00:00
if path_is_audio(link_url):
if '.mp3' in link_url:
2022-01-12 18:35:15 +00:00
audio_extension = 'mpeg'
2022-04-18 13:21:45 +00:00
elif '.opus' in link_url:
audio_extension = 'opus'
2022-10-20 19:37:59 +00:00
elif '.spx' in link_url:
audio_extension = 'spx'
2022-04-18 13:44:08 +00:00
elif '.flac' in link_url:
audio_extension = 'flac'
2022-10-31 11:05:11 +00:00
elif '.wav' in link_url:
audio_extension = 'wav'
2022-01-11 18:25:13 +00:00
else:
2022-01-12 18:35:15 +00:00
audio_extension = 'ogg'
else:
if podcast_properties.get('linkMimeType'):
if 'audio' in podcast_properties['linkMimeType']:
audio_extension = \
podcast_properties['linkMimeType'].split('/')[1]
# show widgets for soundbites
if audio_extension:
podcast_str += _html_podcast_soundbites(link_url, audio_extension,
2022-01-11 18:25:13 +00:00
podcast_properties,
translate)
# podcast player widget
podcast_str += \
2022-05-03 09:52:30 +00:00
' <span itemprop="audio">\n' + \
2022-06-10 16:32:38 +00:00
' <audio controls tabindex="10">\n' + \
2022-01-12 19:21:12 +00:00
' <source src="' + link_url + '" type="audio/' + \
2022-01-12 18:35:15 +00:00
audio_extension.replace('.', '') + '">' + \
2022-01-11 18:25:13 +00:00
translate['Your browser does not support the audio element.'] + \
2022-05-03 09:52:30 +00:00
'\n </audio>\n </span>\n'
2022-01-13 11:11:18 +00:00
elif podcast_properties.get('linkMimeType'):
2022-01-14 19:08:01 +00:00
if '/youtube' in podcast_properties['linkMimeType']:
url = link_url.replace('/watch?v=', '/embed/')
if '&' in url:
url = url.split('&')[0]
if '?utm_' in url:
url = url.split('?utm_')[0]
2022-01-14 18:48:43 +00:00
podcast_str += \
2022-05-03 09:52:30 +00:00
' <span itemprop="video">\n' + \
2022-03-28 08:47:53 +00:00
" <iframe loading=\"lazy\" decoding=\"async\" src=\"" + \
2022-01-14 20:04:39 +00:00
url + "\" width=\"400\" height=\"300\" " + \
2022-04-06 10:23:46 +00:00
"frameborder=\"0\" allow=\"fullscreen\" " + \
2023-10-30 13:19:01 +00:00
"allowfullscreen " + \
"sandbox=\"allow-scripts allow-same-origin\">\n" + \
" </iframe>\n </span>\n"
2022-01-14 18:48:43 +00:00
elif 'video' in podcast_properties['linkMimeType']:
2022-01-13 11:19:52 +00:00
video_mime_type = podcast_properties['linkMimeType']
2022-01-13 11:11:18 +00:00
video_msg = 'Your browser does not support the video element.'
podcast_str += \
2022-05-03 09:52:30 +00:00
' <span itemprop="video">\n' + \
2022-01-13 11:24:15 +00:00
' <figure id="videoContainer" ' + \
'data-fullscreen="false">\n' + \
2022-06-10 16:32:38 +00:00
' <video id="video" controls preload="metadata" ' + \
'tabindex="10">\n' + \
2022-01-13 11:19:52 +00:00
'<source src="' + link_url + '" ' + \
2022-01-13 11:24:15 +00:00
'type="' + video_mime_type + '">' + \
2022-05-03 09:52:30 +00:00
translate[video_msg] + \
'</video>\n </figure>\n </span>\n'
2022-01-11 18:25:13 +00:00
2022-01-12 20:01:28 +00:00
podcast_title = \
2022-01-12 20:40:14 +00:00
remove_html(html.unescape(urllib.parse.unquote_plus(newswire_item[0])))
2022-01-11 18:25:13 +00:00
if podcast_title:
podcast_str += \
2022-05-03 09:52:30 +00:00
'<p><label class="podcast-title">' + \
'<span itemprop="headline">' + \
podcast_title + \
'</span></label></p>\n'
2023-12-05 11:32:34 +00:00
if podcast_properties.get('author'):
author = podcast_properties['author']
podcast_str += '<p>' + author + '</p>\n'
2022-05-03 11:55:16 +00:00
transcripts = _html_podcast_transcripts(podcast_properties, translate)
if transcripts:
podcast_str += '<p>' + transcripts + '</p>\n'
2022-01-11 18:25:13 +00:00
if newswire_item[4]:
2022-01-12 19:21:12 +00:00
podcast_description = \
2022-01-12 20:01:28 +00:00
html.unescape(urllib.parse.unquote_plus(newswire_item[4]))
2022-01-14 10:20:37 +00:00
podcast_description = safe_web_text(podcast_description)
2022-01-11 18:25:13 +00:00
if podcast_description:
2022-05-03 09:52:30 +00:00
podcast_str += \
'<p><span itemprop="description">' + \
podcast_description + '</span></p>\n'
2022-01-11 18:25:13 +00:00
# donate button
if podcast_properties.get('funding'):
if podcast_properties['funding'].get('url'):
2023-12-09 14:18:24 +00:00
url_str = get_url_from_post(podcast_properties['funding']['url'])
donate_url = remove_html(url_str)
2022-01-11 18:25:13 +00:00
podcast_str += \
2022-05-03 09:52:30 +00:00
'<p><span itemprop="funding"><a href="' + donate_url + \
2023-07-09 09:24:46 +00:00
'" rel="donation"><button class="donateButton">' + \
translate['Donate'] + '</button></a></span></p>\n'
fediverse_handle = ''
if len(newswire_item) > 9:
fediverse_handle = newswire_item[9]
podcast_nickname = get_nickname_from_actor(fediverse_handle)
podcast_domain, _ = get_domain_from_actor(fediverse_handle)
if podcast_nickname and podcast_domain:
podcast_str += \
2024-06-05 19:38:22 +00:00
'<p><a href="' + fediverse_handle + '">@' + \
podcast_nickname + '@' + podcast_domain + '</a></p>\n'
extra_links = []
if len(newswire_item) > 10:
extra_links = newswire_item[10]
if extra_links:
links_text = ''
for link_str in extra_links:
link_str = remove_html(link_str)
if not resembles_url(link_str):
continue
2024-06-05 21:00:49 +00:00
if link_str in podcast_str:
continue
if not links_text:
links_text = '<p>\n'
2024-06-05 20:00:23 +00:00
link_url = link_str
2024-06-05 20:39:20 +00:00
# check that the link is not too long so that it does not
# mess up display on mobile
2024-06-05 20:00:23 +00:00
if len(link_str) > MAX_LINK_LENGTH:
link_str = link_str[:MAX_LINK_LENGTH-1]
links_text += \
2024-06-05 20:00:23 +00:00
'<a href="' + link_url + '">' + link_str + '</a><br>\n'
if links_text:
links_text += '</p>\n'
podcast_str += links_text
2022-01-11 18:25:13 +00:00
if podcast_properties['categories']:
tags_str = ''
for tag in podcast_properties['categories']:
2022-05-03 09:52:30 +00:00
tag = tag.replace('#', '')
tag_link = '/users/' + nickname + '/tags/' + tag
tags_str += \
'#<a href="' + tag_link + '">' + \
'<span itemprop="keywords">' + tag + '</span>' + \
'</a> '
podcast_str += '<p>' + tags_str.strip() + '</p>\n'
2022-01-11 18:25:13 +00:00
podcast_str += _html_podcast_performers(podcast_properties)
2022-02-12 20:37:15 +00:00
podcast_str += \
_html_podcast_social_interactions(podcast_properties, translate,
nickname)
2022-05-03 16:38:16 +00:00
podcast_str += \
_html_podcast_chapters(link_url,
session, session_onion, session_i2p,
http_prefix, domain,
2024-12-17 13:50:48 +00:00
podcast_properties, debug, mitm_servers)
2022-01-11 18:25:13 +00:00
podcast_str += ' </center>\n'
podcast_str += '</div>\n'
podcast_str += html_footer()
return podcast_str