mirror of https://gitlab.com/bashrc2/epicyon
Improve detection of podcast mime type
parent
d5553fe8a0
commit
02f886d4de
26
newswire.py
26
newswire.py
|
@ -482,27 +482,33 @@ def xml_podcast_to_dict(xml_str: str) -> {}:
|
||||||
return podcast_properties
|
return podcast_properties
|
||||||
|
|
||||||
|
|
||||||
def get_link_from_rss_item(rss_item: str) -> str:
|
def get_link_from_rss_item(rss_item: str) -> (str, str):
|
||||||
"""Extracts rss link from rss item string
|
"""Extracts rss link from rss item string
|
||||||
"""
|
"""
|
||||||
|
mime_type = None
|
||||||
|
|
||||||
if '<enclosure ' in rss_item:
|
if '<enclosure ' in rss_item:
|
||||||
# get link from audio or video enclosure
|
# get link from audio or video enclosure
|
||||||
enclosure = rss_item.split('<enclosure ')[1]
|
enclosure = rss_item.split('<enclosure ')[1]
|
||||||
if '>' in enclosure:
|
if '>' in enclosure:
|
||||||
enclosure = enclosure.split('>')[0]
|
enclosure = enclosure.split('>')[0]
|
||||||
|
if ' type="' in enclosure:
|
||||||
|
mime_type = enclosure.split(' type="')[1]
|
||||||
|
if '"' in mime_type:
|
||||||
|
mime_type = mime_type.split('"')[0]
|
||||||
if 'url="' in enclosure and \
|
if 'url="' in enclosure and \
|
||||||
('"audio/' in enclosure or '"video/' in enclosure):
|
('"audio/' in enclosure or '"video/' in enclosure):
|
||||||
link_str = enclosure.split('url="')[1]
|
link_str = enclosure.split('url="')[1]
|
||||||
if '"' in link_str:
|
if '"' in link_str:
|
||||||
link = link_str.split('"')[0]
|
link = link_str.split('"')[0]
|
||||||
if '://' in link:
|
if '://' in link:
|
||||||
return link
|
return link, mime_type
|
||||||
|
|
||||||
link = rss_item.split('<link>')[1]
|
link = rss_item.split('<link>')[1]
|
||||||
link = link.split('</link>')[0]
|
link = link.split('</link>')[0]
|
||||||
if '://' not in link:
|
if '://' not in link:
|
||||||
return None
|
return None, None
|
||||||
return link
|
return link, mime_type
|
||||||
|
|
||||||
|
|
||||||
def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
|
def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
|
||||||
|
@ -560,7 +566,7 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
|
||||||
description = description.split('</media:description>')[0]
|
description = description.split('</media:description>')[0]
|
||||||
description = remove_html(description)
|
description = remove_html(description)
|
||||||
|
|
||||||
link = get_link_from_rss_item(rss_item)
|
link, link_mime_type = get_link_from_rss_item(rss_item)
|
||||||
if not link:
|
if not link:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -579,6 +585,8 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
|
||||||
post_filename = ''
|
post_filename = ''
|
||||||
votes_status = []
|
votes_status = []
|
||||||
podcast_properties = xml_podcast_to_dict(rss_item)
|
podcast_properties = xml_podcast_to_dict(rss_item)
|
||||||
|
if podcast_properties:
|
||||||
|
podcast_properties['linkMimeType'] = link_mime_type
|
||||||
_add_newswire_dict_entry(base_dir, domain,
|
_add_newswire_dict_entry(base_dir, domain,
|
||||||
result, pub_date_str,
|
result, pub_date_str,
|
||||||
title, link,
|
title, link,
|
||||||
|
@ -651,7 +659,7 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
|
||||||
description = description.split('</media:description>')[0]
|
description = description.split('</media:description>')[0]
|
||||||
description = remove_html(description)
|
description = remove_html(description)
|
||||||
|
|
||||||
link = get_link_from_rss_item(rss_item)
|
link, link_mime_type = get_link_from_rss_item(rss_item)
|
||||||
if not link:
|
if not link:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -670,6 +678,8 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
|
||||||
post_filename = ''
|
post_filename = ''
|
||||||
votes_status = []
|
votes_status = []
|
||||||
podcast_properties = xml_podcast_to_dict(rss_item)
|
podcast_properties = xml_podcast_to_dict(rss_item)
|
||||||
|
if podcast_properties:
|
||||||
|
podcast_properties['linkMimeType'] = link_mime_type
|
||||||
_add_newswire_dict_entry(base_dir, domain,
|
_add_newswire_dict_entry(base_dir, domain,
|
||||||
result, pub_date_str,
|
result, pub_date_str,
|
||||||
title, link,
|
title, link,
|
||||||
|
@ -730,7 +740,7 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
|
||||||
description = description.split('</media:description>')[0]
|
description = description.split('</media:description>')[0]
|
||||||
description = remove_html(description)
|
description = remove_html(description)
|
||||||
|
|
||||||
link = get_link_from_rss_item(atom_item)
|
link, link_mime_type = get_link_from_rss_item(atom_item)
|
||||||
if not link:
|
if not link:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -749,6 +759,8 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
|
||||||
post_filename = ''
|
post_filename = ''
|
||||||
votes_status = []
|
votes_status = []
|
||||||
podcast_properties = xml_podcast_to_dict(atom_item)
|
podcast_properties = xml_podcast_to_dict(atom_item)
|
||||||
|
if podcast_properties:
|
||||||
|
podcast_properties['linkMimeType'] = link_mime_type
|
||||||
_add_newswire_dict_entry(base_dir, domain,
|
_add_newswire_dict_entry(base_dir, domain,
|
||||||
result, pub_date_str,
|
result, pub_date_str,
|
||||||
title, link,
|
title, link,
|
||||||
|
|
7
tests.py
7
tests.py
|
@ -6460,9 +6460,11 @@ def _test_get_link_from_rss_item() -> None:
|
||||||
'play/46054222/https%3A%2F%2Fd3ctxlq1ktw2nl.cloudfront.net' + \
|
'play/46054222/https%3A%2F%2Fd3ctxlq1ktw2nl.cloudfront.net' + \
|
||||||
'%2Fstaging%2F2022-0-12%2F7352f28c-a928-ea7a-65ae-' + \
|
'%2Fstaging%2F2022-0-12%2F7352f28c-a928-ea7a-65ae-' + \
|
||||||
'ccb5edffbac1.mp3" length="67247880" type="audio/mpeg"/>'
|
'ccb5edffbac1.mp3" length="67247880" type="audio/mpeg"/>'
|
||||||
link = get_link_from_rss_item(rss_item)
|
link, mime_type = get_link_from_rss_item(rss_item)
|
||||||
assert link
|
assert link
|
||||||
assert link.endswith('.mp3')
|
assert link.endswith('.mp3')
|
||||||
|
assert mime_type
|
||||||
|
assert mime_type == 'audio/mpeg'
|
||||||
|
|
||||||
rss_item = \
|
rss_item = \
|
||||||
'<link>' + \
|
'<link>' + \
|
||||||
|
@ -6470,9 +6472,10 @@ def _test_get_link_from_rss_item() -> None:
|
||||||
'Hessel-van-Oorschot-of-Tribe-of-Noise--Free-Music-Archive-e1crvce' + \
|
'Hessel-van-Oorschot-of-Tribe-of-Noise--Free-Music-Archive-e1crvce' + \
|
||||||
'</link>' + \
|
'</link>' + \
|
||||||
'<pubDate>Wed, 12 Jan 2022 14:28:46 GMT</pubDate>'
|
'<pubDate>Wed, 12 Jan 2022 14:28:46 GMT</pubDate>'
|
||||||
link = get_link_from_rss_item(rss_item)
|
link, mime_type = get_link_from_rss_item(rss_item)
|
||||||
assert link
|
assert link
|
||||||
assert link.startswith('https://anchor.fm')
|
assert link.startswith('https://anchor.fm')
|
||||||
|
assert not mime_type
|
||||||
|
|
||||||
|
|
||||||
def run_all_tests():
|
def run_all_tests():
|
||||||
|
|
|
@ -270,8 +270,7 @@ def _html_newswire(base_dir: str, newswire: {}, nickname: str, moderator: bool,
|
||||||
# change the link url to a podcast episode screen
|
# change the link url to a podcast episode screen
|
||||||
podcast_properties = item[8]
|
podcast_properties = item[8]
|
||||||
if podcast_properties:
|
if podcast_properties:
|
||||||
if podcast_properties.get('image') and \
|
if podcast_properties.get('image'):
|
||||||
'explicit' in podcast_properties:
|
|
||||||
episode_id = date_str.replace(' ', '__')
|
episode_id = date_str.replace(' ', '__')
|
||||||
episode_id = episode_id.replace(':', 'aa')
|
episode_id = episode_id.replace(':', 'aa')
|
||||||
link_url = \
|
link_url = \
|
||||||
|
|
|
@ -156,13 +156,20 @@ def html_podcast_episode(css_cache: {}, translate: {},
|
||||||
'" alt="" ' + get_broken_link_substitute() + '/></a>\n'
|
'" alt="" ' + get_broken_link_substitute() + '/></a>\n'
|
||||||
podcast_str += ' </div>\n'
|
podcast_str += ' </div>\n'
|
||||||
|
|
||||||
|
audio_extension = None
|
||||||
if path_is_audio(link_url):
|
if path_is_audio(link_url):
|
||||||
if '.mp3' in link_url:
|
if '.mp3' in link_url:
|
||||||
extension = 'mp3'
|
audio_extension = 'mpeg'
|
||||||
else:
|
else:
|
||||||
extension = 'ogg'
|
audio_extension = 'ogg'
|
||||||
|
else:
|
||||||
podcast_str += _html_podcast_soundbites(link_url, extension,
|
if podcast_properties.get('linkMimeType'):
|
||||||
|
if 'audio' in podcast_properties['linkMimeType']:
|
||||||
|
audio_extension = \
|
||||||
|
podcast_properties['linkMimeType'].split('/')[1]
|
||||||
|
# show widgets for soundbites
|
||||||
|
if audio_extension:
|
||||||
|
podcast_str += _html_podcast_soundbites(link_url, audio_extension,
|
||||||
podcast_properties,
|
podcast_properties,
|
||||||
translate)
|
translate)
|
||||||
|
|
||||||
|
@ -170,7 +177,7 @@ def html_podcast_episode(css_cache: {}, translate: {},
|
||||||
podcast_str += \
|
podcast_str += \
|
||||||
'<audio controls>\n' + \
|
'<audio controls>\n' + \
|
||||||
'<source src="' + link_url + '" type="audio/' + \
|
'<source src="' + link_url + '" type="audio/' + \
|
||||||
extension.replace('.', '') + '">' + \
|
audio_extension.replace('.', '') + '">' + \
|
||||||
translate['Your browser does not support the audio element.'] + \
|
translate['Your browser does not support the audio element.'] + \
|
||||||
'</audio>\n'
|
'</audio>\n'
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue