mirror of https://gitlab.com/bashrc2/epicyon
Improve detection of podcast mime type
parent
d5553fe8a0
commit
02f886d4de
26
newswire.py
26
newswire.py
|
@ -482,27 +482,33 @@ def xml_podcast_to_dict(xml_str: str) -> {}:
|
|||
return podcast_properties
|
||||
|
||||
|
||||
def get_link_from_rss_item(rss_item: str) -> str:
|
||||
def get_link_from_rss_item(rss_item: str) -> (str, str):
|
||||
"""Extracts rss link from rss item string
|
||||
"""
|
||||
mime_type = None
|
||||
|
||||
if '<enclosure ' in rss_item:
|
||||
# get link from audio or video enclosure
|
||||
enclosure = rss_item.split('<enclosure ')[1]
|
||||
if '>' in enclosure:
|
||||
enclosure = enclosure.split('>')[0]
|
||||
if ' type="' in enclosure:
|
||||
mime_type = enclosure.split(' type="')[1]
|
||||
if '"' in mime_type:
|
||||
mime_type = mime_type.split('"')[0]
|
||||
if 'url="' in enclosure and \
|
||||
('"audio/' in enclosure or '"video/' in enclosure):
|
||||
link_str = enclosure.split('url="')[1]
|
||||
if '"' in link_str:
|
||||
link = link_str.split('"')[0]
|
||||
if '://' in link:
|
||||
return link
|
||||
return link, mime_type
|
||||
|
||||
link = rss_item.split('<link>')[1]
|
||||
link = link.split('</link>')[0]
|
||||
if '://' not in link:
|
||||
return None
|
||||
return link
|
||||
return None, None
|
||||
return link, mime_type
|
||||
|
||||
|
||||
def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
|
||||
|
@ -560,7 +566,7 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
description = description.split('</media:description>')[0]
|
||||
description = remove_html(description)
|
||||
|
||||
link = get_link_from_rss_item(rss_item)
|
||||
link, link_mime_type = get_link_from_rss_item(rss_item)
|
||||
if not link:
|
||||
continue
|
||||
|
||||
|
@ -579,6 +585,8 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
post_filename = ''
|
||||
votes_status = []
|
||||
podcast_properties = xml_podcast_to_dict(rss_item)
|
||||
if podcast_properties:
|
||||
podcast_properties['linkMimeType'] = link_mime_type
|
||||
_add_newswire_dict_entry(base_dir, domain,
|
||||
result, pub_date_str,
|
||||
title, link,
|
||||
|
@ -651,7 +659,7 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
description = description.split('</media:description>')[0]
|
||||
description = remove_html(description)
|
||||
|
||||
link = get_link_from_rss_item(rss_item)
|
||||
link, link_mime_type = get_link_from_rss_item(rss_item)
|
||||
if not link:
|
||||
continue
|
||||
|
||||
|
@ -670,6 +678,8 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
post_filename = ''
|
||||
votes_status = []
|
||||
podcast_properties = xml_podcast_to_dict(rss_item)
|
||||
if podcast_properties:
|
||||
podcast_properties['linkMimeType'] = link_mime_type
|
||||
_add_newswire_dict_entry(base_dir, domain,
|
||||
result, pub_date_str,
|
||||
title, link,
|
||||
|
@ -730,7 +740,7 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
description = description.split('</media:description>')[0]
|
||||
description = remove_html(description)
|
||||
|
||||
link = get_link_from_rss_item(atom_item)
|
||||
link, link_mime_type = get_link_from_rss_item(atom_item)
|
||||
if not link:
|
||||
continue
|
||||
|
||||
|
@ -749,6 +759,8 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
post_filename = ''
|
||||
votes_status = []
|
||||
podcast_properties = xml_podcast_to_dict(atom_item)
|
||||
if podcast_properties:
|
||||
podcast_properties['linkMimeType'] = link_mime_type
|
||||
_add_newswire_dict_entry(base_dir, domain,
|
||||
result, pub_date_str,
|
||||
title, link,
|
||||
|
|
7
tests.py
7
tests.py
|
@ -6460,9 +6460,11 @@ def _test_get_link_from_rss_item() -> None:
|
|||
'play/46054222/https%3A%2F%2Fd3ctxlq1ktw2nl.cloudfront.net' + \
|
||||
'%2Fstaging%2F2022-0-12%2F7352f28c-a928-ea7a-65ae-' + \
|
||||
'ccb5edffbac1.mp3" length="67247880" type="audio/mpeg"/>'
|
||||
link = get_link_from_rss_item(rss_item)
|
||||
link, mime_type = get_link_from_rss_item(rss_item)
|
||||
assert link
|
||||
assert link.endswith('.mp3')
|
||||
assert mime_type
|
||||
assert mime_type == 'audio/mpeg'
|
||||
|
||||
rss_item = \
|
||||
'<link>' + \
|
||||
|
@ -6470,9 +6472,10 @@ def _test_get_link_from_rss_item() -> None:
|
|||
'Hessel-van-Oorschot-of-Tribe-of-Noise--Free-Music-Archive-e1crvce' + \
|
||||
'</link>' + \
|
||||
'<pubDate>Wed, 12 Jan 2022 14:28:46 GMT</pubDate>'
|
||||
link = get_link_from_rss_item(rss_item)
|
||||
link, mime_type = get_link_from_rss_item(rss_item)
|
||||
assert link
|
||||
assert link.startswith('https://anchor.fm')
|
||||
assert not mime_type
|
||||
|
||||
|
||||
def run_all_tests():
|
||||
|
|
|
@ -270,8 +270,7 @@ def _html_newswire(base_dir: str, newswire: {}, nickname: str, moderator: bool,
|
|||
# change the link url to a podcast episode screen
|
||||
podcast_properties = item[8]
|
||||
if podcast_properties:
|
||||
if podcast_properties.get('image') and \
|
||||
'explicit' in podcast_properties:
|
||||
if podcast_properties.get('image'):
|
||||
episode_id = date_str.replace(' ', '__')
|
||||
episode_id = episode_id.replace(':', 'aa')
|
||||
link_url = \
|
||||
|
|
|
@ -156,13 +156,20 @@ def html_podcast_episode(css_cache: {}, translate: {},
|
|||
'" alt="" ' + get_broken_link_substitute() + '/></a>\n'
|
||||
podcast_str += ' </div>\n'
|
||||
|
||||
audio_extension = None
|
||||
if path_is_audio(link_url):
|
||||
if '.mp3' in link_url:
|
||||
extension = 'mp3'
|
||||
audio_extension = 'mpeg'
|
||||
else:
|
||||
extension = 'ogg'
|
||||
|
||||
podcast_str += _html_podcast_soundbites(link_url, extension,
|
||||
audio_extension = 'ogg'
|
||||
else:
|
||||
if podcast_properties.get('linkMimeType'):
|
||||
if 'audio' in podcast_properties['linkMimeType']:
|
||||
audio_extension = \
|
||||
podcast_properties['linkMimeType'].split('/')[1]
|
||||
# show widgets for soundbites
|
||||
if audio_extension:
|
||||
podcast_str += _html_podcast_soundbites(link_url, audio_extension,
|
||||
podcast_properties,
|
||||
translate)
|
||||
|
||||
|
@ -170,7 +177,7 @@ def html_podcast_episode(css_cache: {}, translate: {},
|
|||
podcast_str += \
|
||||
'<audio controls>\n' + \
|
||||
'<source src="' + link_url + '" type="audio/' + \
|
||||
extension.replace('.', '') + '">' + \
|
||||
audio_extension.replace('.', '') + '">' + \
|
||||
translate['Your browser does not support the audio element.'] + \
|
||||
'</audio>\n'
|
||||
|
||||
|
|
Loading…
Reference in New Issue