Parse alternate enclosures for podcast rss

merge-requests/30/head
Bob Mottram 2022-04-22 13:53:34 +01:00
parent 84697cef2b
commit 637687ca23
2 changed files with 87 additions and 6 deletions

View File

@ -623,11 +623,61 @@ def xml_podcast_to_dict(base_dir: str, xml_item: str, xml_str: str) -> {}:
return podcast_properties
def get_link_from_rss_item(rss_item: str) -> (str, str):
def get_link_from_rss_item(rss_item: str,
preferred_mime_types: [] = None,
proxy_type: str = None) -> (str, str):
"""Extracts rss link from rss item string
"""
mime_type = None
if preferred_mime_types and '<podcast:alternateEnclosure ' in rss_item:
enclosures = rss_item.split('<podcast:alternateEnclosure ')
ctr = 0
for enclosure in enclosures:
if ctr == 0:
ctr += 1
continue
ctr += 1
if '</podcast:alternateEnclosure' not in enclosure:
continue
enclosure = enclosure.split('</podcast:alternateEnclosure')[0]
if 'type="' not in enclosure:
continue
mime_type = enclosure.split('type="')[1]
if '"' in mime_type:
mime_type = mime_type.split('"')[0]
if mime_type not in preferred_mime_types:
continue
if 'uri="' not in enclosure:
continue
uris = enclosure.split('uri="')
ctr2 = 0
for uri in uris:
if ctr2 == 0:
ctr2 += 1
continue
ctr2 += 1
if '"' not in uri:
continue
link = uri.split('"')[0]
if '://' not in link:
continue
if proxy_type:
if proxy_type == 'tor' and \
'.onion/' not in link:
continue
if proxy_type == 'onion' and \
'.onion/' not in link:
continue
if proxy_type == 'i2p' and \
'.i2p/' not in link:
continue
return link, mime_type
else:
if '.onion/' not in link and \
'.i2p/' not in link:
return link, mime_type
if '<enclosure ' in rss_item:
# get link from audio or video enclosure
enclosure = rss_item.split('<enclosure ')[1]

View File

@ -6795,18 +6795,49 @@ def _test_get_link_from_rss_item() -> None:
'<link>' + \
'https://anchor.fm/creativecommons/episodes/' + \
'Hessel-van-Oorschot-of-Tribe-of-Noise--Free-Music-Archive-e1crvce' + \
'</link>' + \
'<pubDate>Wed, 12 Jan 2022 14:28:46 GMT</pubDate>' + \
'</link>\n' + \
'<pubDate>Wed, 12 Jan 2022 14:28:46 GMT</pubDate>\n' + \
'<enclosure url="https://anchor.fm/s/4d70d828/podcast/' + \
'play/46054222/https%3A%2F%2Fd3ctxlq1ktw2nl.cloudfront.net' + \
'%2Fstaging%2F2022-0-12%2F7352f28c-a928-ea7a-65ae-' + \
'ccb5edffbac1.mp3" length="67247880" type="audio/mpeg"/>'
link, mime_type = get_link_from_rss_item(rss_item)
'ccb5edffbac1.mp3" length="67247880" type="audio/mpeg"/>\n' + \
'<podcast:alternateEnclosure type="audio/mpeg" ' + \
'length="27800000" bitrate="128000" default="true" ' + \
'title="Standard">\n' + \
'<podcast:source uri="https://whoframed.rodger/rabbit.mp3" />\n' + \
'<podcast:source uri="http://randomaddress.onion/rabbit.mp3" />\n' + \
'<podcast:source uri="http://randomaddress.i2p/rabbit.mp3" />\n' + \
'</podcast:alternateEnclosure>\n' + \
'<podcast:alternateEnclosure type="audio/opus" ' + \
'length="19200000" bitrate="128000" ' + \
'title="High Quality">\n' + \
'<podcast:source uri="https://whoframed.rodger/rabbit.opus" />\n' + \
'<podcast:source uri="http://randomaddress.onion/rabbit.opus" />\n' + \
'<podcast:source uri="http://randomaddress.i2p/rabbit.opus" />\n' + \
'</podcast:alternateEnclosure>\n'
link, mime_type = get_link_from_rss_item(rss_item, None, None)
assert link
assert link.endswith('.mp3')
assert link.endswith('1.mp3')
assert mime_type
assert mime_type == 'audio/mpeg'
link, mime_type = get_link_from_rss_item(rss_item, ['audio/opus'], None)
assert mime_type
if mime_type != 'audio/opus':
print('mime_type: ' + mime_type)
assert mime_type == 'audio/opus'
assert link
assert link == 'https://whoframed.rodger/rabbit.opus'
link, mime_type = get_link_from_rss_item(rss_item, ['audio/opus'], 'tor')
assert mime_type
if mime_type != 'audio/opus':
print('mime_type: ' + mime_type)
assert mime_type == 'audio/opus'
assert link
assert link == 'http://randomaddress.onion/rabbit.opus'
rss_item = \
'<link>' + \
'https://anchor.fm/creativecommons/episodes/' + \