mirror of https://gitlab.com/bashrc2/epicyon
Parse alternate enclosures for podcast rss
parent
84697cef2b
commit
637687ca23
52
newswire.py
52
newswire.py
|
@ -623,11 +623,61 @@ def xml_podcast_to_dict(base_dir: str, xml_item: str, xml_str: str) -> {}:
|
|||
return podcast_properties
|
||||
|
||||
|
||||
def get_link_from_rss_item(rss_item: str) -> (str, str):
|
||||
def get_link_from_rss_item(rss_item: str,
|
||||
preferred_mime_types: [] = None,
|
||||
proxy_type: str = None) -> (str, str):
|
||||
"""Extracts rss link from rss item string
|
||||
"""
|
||||
mime_type = None
|
||||
|
||||
if preferred_mime_types and '<podcast:alternateEnclosure ' in rss_item:
|
||||
enclosures = rss_item.split('<podcast:alternateEnclosure ')
|
||||
ctr = 0
|
||||
for enclosure in enclosures:
|
||||
if ctr == 0:
|
||||
ctr += 1
|
||||
continue
|
||||
ctr += 1
|
||||
if '</podcast:alternateEnclosure' not in enclosure:
|
||||
continue
|
||||
enclosure = enclosure.split('</podcast:alternateEnclosure')[0]
|
||||
if 'type="' not in enclosure:
|
||||
continue
|
||||
mime_type = enclosure.split('type="')[1]
|
||||
if '"' in mime_type:
|
||||
mime_type = mime_type.split('"')[0]
|
||||
if mime_type not in preferred_mime_types:
|
||||
continue
|
||||
if 'uri="' not in enclosure:
|
||||
continue
|
||||
uris = enclosure.split('uri="')
|
||||
ctr2 = 0
|
||||
for uri in uris:
|
||||
if ctr2 == 0:
|
||||
ctr2 += 1
|
||||
continue
|
||||
ctr2 += 1
|
||||
if '"' not in uri:
|
||||
continue
|
||||
link = uri.split('"')[0]
|
||||
if '://' not in link:
|
||||
continue
|
||||
if proxy_type:
|
||||
if proxy_type == 'tor' and \
|
||||
'.onion/' not in link:
|
||||
continue
|
||||
if proxy_type == 'onion' and \
|
||||
'.onion/' not in link:
|
||||
continue
|
||||
if proxy_type == 'i2p' and \
|
||||
'.i2p/' not in link:
|
||||
continue
|
||||
return link, mime_type
|
||||
else:
|
||||
if '.onion/' not in link and \
|
||||
'.i2p/' not in link:
|
||||
return link, mime_type
|
||||
|
||||
if '<enclosure ' in rss_item:
|
||||
# get link from audio or video enclosure
|
||||
enclosure = rss_item.split('<enclosure ')[1]
|
||||
|
|
41
tests.py
41
tests.py
|
@ -6795,18 +6795,49 @@ def _test_get_link_from_rss_item() -> None:
|
|||
'<link>' + \
|
||||
'https://anchor.fm/creativecommons/episodes/' + \
|
||||
'Hessel-van-Oorschot-of-Tribe-of-Noise--Free-Music-Archive-e1crvce' + \
|
||||
'</link>' + \
|
||||
'<pubDate>Wed, 12 Jan 2022 14:28:46 GMT</pubDate>' + \
|
||||
'</link>\n' + \
|
||||
'<pubDate>Wed, 12 Jan 2022 14:28:46 GMT</pubDate>\n' + \
|
||||
'<enclosure url="https://anchor.fm/s/4d70d828/podcast/' + \
|
||||
'play/46054222/https%3A%2F%2Fd3ctxlq1ktw2nl.cloudfront.net' + \
|
||||
'%2Fstaging%2F2022-0-12%2F7352f28c-a928-ea7a-65ae-' + \
|
||||
'ccb5edffbac1.mp3" length="67247880" type="audio/mpeg"/>'
|
||||
link, mime_type = get_link_from_rss_item(rss_item)
|
||||
'ccb5edffbac1.mp3" length="67247880" type="audio/mpeg"/>\n' + \
|
||||
'<podcast:alternateEnclosure type="audio/mpeg" ' + \
|
||||
'length="27800000" bitrate="128000" default="true" ' + \
|
||||
'title="Standard">\n' + \
|
||||
'<podcast:source uri="https://whoframed.rodger/rabbit.mp3" />\n' + \
|
||||
'<podcast:source uri="http://randomaddress.onion/rabbit.mp3" />\n' + \
|
||||
'<podcast:source uri="http://randomaddress.i2p/rabbit.mp3" />\n' + \
|
||||
'</podcast:alternateEnclosure>\n' + \
|
||||
'<podcast:alternateEnclosure type="audio/opus" ' + \
|
||||
'length="19200000" bitrate="128000" ' + \
|
||||
'title="High Quality">\n' + \
|
||||
'<podcast:source uri="https://whoframed.rodger/rabbit.opus" />\n' + \
|
||||
'<podcast:source uri="http://randomaddress.onion/rabbit.opus" />\n' + \
|
||||
'<podcast:source uri="http://randomaddress.i2p/rabbit.opus" />\n' + \
|
||||
'</podcast:alternateEnclosure>\n'
|
||||
|
||||
link, mime_type = get_link_from_rss_item(rss_item, None, None)
|
||||
assert link
|
||||
assert link.endswith('.mp3')
|
||||
assert link.endswith('1.mp3')
|
||||
assert mime_type
|
||||
assert mime_type == 'audio/mpeg'
|
||||
|
||||
link, mime_type = get_link_from_rss_item(rss_item, ['audio/opus'], None)
|
||||
assert mime_type
|
||||
if mime_type != 'audio/opus':
|
||||
print('mime_type: ' + mime_type)
|
||||
assert mime_type == 'audio/opus'
|
||||
assert link
|
||||
assert link == 'https://whoframed.rodger/rabbit.opus'
|
||||
|
||||
link, mime_type = get_link_from_rss_item(rss_item, ['audio/opus'], 'tor')
|
||||
assert mime_type
|
||||
if mime_type != 'audio/opus':
|
||||
print('mime_type: ' + mime_type)
|
||||
assert mime_type == 'audio/opus'
|
||||
assert link
|
||||
assert link == 'http://randomaddress.onion/rabbit.opus'
|
||||
|
||||
rss_item = \
|
||||
'<link>' + \
|
||||
'https://anchor.fm/creativecommons/episodes/' + \
|
||||
|
|
Loading…
Reference in New Issue