Improve detection of podcast mime type

merge-requests/26/head
Bob Mottram 2022-01-12 18:35:15 +00:00
parent d5553fe8a0
commit 02f886d4de
4 changed files with 37 additions and 16 deletions

View File

@ -482,27 +482,33 @@ def xml_podcast_to_dict(xml_str: str) -> {}:
return podcast_properties
def get_link_from_rss_item(rss_item: str) -> str:
def get_link_from_rss_item(rss_item: str) -> (str, str):
"""Extracts rss link from rss item string
"""
mime_type = None
if '<enclosure ' in rss_item:
# get link from audio or video enclosure
enclosure = rss_item.split('<enclosure ')[1]
if '>' in enclosure:
enclosure = enclosure.split('>')[0]
if ' type="' in enclosure:
mime_type = enclosure.split(' type="')[1]
if '"' in mime_type:
mime_type = mime_type.split('"')[0]
if 'url="' in enclosure and \
('"audio/' in enclosure or '"video/' in enclosure):
link_str = enclosure.split('url="')[1]
if '"' in link_str:
link = link_str.split('"')[0]
if '://' in link:
return link
return link, mime_type
link = rss_item.split('<link>')[1]
link = link.split('</link>')[0]
if '://' not in link:
return None
return link
return None, None
return link, mime_type
def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
@ -560,7 +566,7 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
description = description.split('</media:description>')[0]
description = remove_html(description)
link = get_link_from_rss_item(rss_item)
link, link_mime_type = get_link_from_rss_item(rss_item)
if not link:
continue
@ -579,6 +585,8 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
post_filename = ''
votes_status = []
podcast_properties = xml_podcast_to_dict(rss_item)
if podcast_properties:
podcast_properties['linkMimeType'] = link_mime_type
_add_newswire_dict_entry(base_dir, domain,
result, pub_date_str,
title, link,
@ -651,7 +659,7 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
description = description.split('</media:description>')[0]
description = remove_html(description)
link = get_link_from_rss_item(rss_item)
link, link_mime_type = get_link_from_rss_item(rss_item)
if not link:
continue
@ -670,6 +678,8 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
post_filename = ''
votes_status = []
podcast_properties = xml_podcast_to_dict(rss_item)
if podcast_properties:
podcast_properties['linkMimeType'] = link_mime_type
_add_newswire_dict_entry(base_dir, domain,
result, pub_date_str,
title, link,
@ -730,7 +740,7 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
description = description.split('</media:description>')[0]
description = remove_html(description)
link = get_link_from_rss_item(atom_item)
link, link_mime_type = get_link_from_rss_item(atom_item)
if not link:
continue
@ -749,6 +759,8 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
post_filename = ''
votes_status = []
podcast_properties = xml_podcast_to_dict(atom_item)
if podcast_properties:
podcast_properties['linkMimeType'] = link_mime_type
_add_newswire_dict_entry(base_dir, domain,
result, pub_date_str,
title, link,

View File

@ -6460,9 +6460,11 @@ def _test_get_link_from_rss_item() -> None:
'play/46054222/https%3A%2F%2Fd3ctxlq1ktw2nl.cloudfront.net' + \
'%2Fstaging%2F2022-0-12%2F7352f28c-a928-ea7a-65ae-' + \
'ccb5edffbac1.mp3" length="67247880" type="audio/mpeg"/>'
link = get_link_from_rss_item(rss_item)
link, mime_type = get_link_from_rss_item(rss_item)
assert link
assert link.endswith('.mp3')
assert mime_type
assert mime_type == 'audio/mpeg'
rss_item = \
'<link>' + \
@ -6470,9 +6472,10 @@ def _test_get_link_from_rss_item() -> None:
'Hessel-van-Oorschot-of-Tribe-of-Noise--Free-Music-Archive-e1crvce' + \
'</link>' + \
'<pubDate>Wed, 12 Jan 2022 14:28:46 GMT</pubDate>'
link = get_link_from_rss_item(rss_item)
link, mime_type = get_link_from_rss_item(rss_item)
assert link
assert link.startswith('https://anchor.fm')
assert not mime_type
def run_all_tests():

View File

@ -270,8 +270,7 @@ def _html_newswire(base_dir: str, newswire: {}, nickname: str, moderator: bool,
# change the link url to a podcast episode screen
podcast_properties = item[8]
if podcast_properties:
if podcast_properties.get('image') and \
'explicit' in podcast_properties:
if podcast_properties.get('image'):
episode_id = date_str.replace(' ', '__')
episode_id = episode_id.replace(':', 'aa')
link_url = \

View File

@ -156,13 +156,20 @@ def html_podcast_episode(css_cache: {}, translate: {},
'" alt="" ' + get_broken_link_substitute() + '/></a>\n'
podcast_str += ' </div>\n'
audio_extension = None
if path_is_audio(link_url):
if '.mp3' in link_url:
extension = 'mp3'
audio_extension = 'mpeg'
else:
extension = 'ogg'
podcast_str += _html_podcast_soundbites(link_url, extension,
audio_extension = 'ogg'
else:
if podcast_properties.get('linkMimeType'):
if 'audio' in podcast_properties['linkMimeType']:
audio_extension = \
podcast_properties['linkMimeType'].split('/')[1]
# show widgets for soundbites
if audio_extension:
podcast_str += _html_podcast_soundbites(link_url, audio_extension,
podcast_properties,
translate)
@ -170,7 +177,7 @@ def html_podcast_episode(css_cache: {}, translate: {},
podcast_str += \
'<audio controls>\n' + \
'<source src="' + link_url + '" type="audio/' + \
extension.replace('.', '') + '">' + \
audio_extension.replace('.', '') + '">' + \
translate['Your browser does not support the audio element.'] + \
'</audio>\n'