mirror of https://gitlab.com/bashrc2/epicyon
Support for podcast fields within rss feeds
parent
1085f97070
commit
df6b71009e
|
@ -627,6 +627,10 @@ def _convert_rs_sto_activity_pub(base_dir: str, http_prefix: str,
|
|||
'<br><a href="' + post_url + '">' + \
|
||||
translate['Read more...'] + '</a>'
|
||||
|
||||
# podcast_properties = None
|
||||
# if len(item) > 8:
|
||||
# podcast_properties = item[8]
|
||||
|
||||
followers_only = False
|
||||
# NOTE: the id when the post is created will not be
|
||||
# consistent (it's based on the current time, not the
|
||||
|
|
92
newswire.py
92
newswire.py
|
@ -203,7 +203,8 @@ def _add_newswire_dict_entry(base_dir: str, domain: str,
|
|||
description: str, moderated: bool,
|
||||
mirrored: bool,
|
||||
tags: [],
|
||||
max_tags: int, session, debug: bool) -> None:
|
||||
max_tags: int, session, debug: bool,
|
||||
podcast_properties: {}) -> None:
|
||||
"""Update the newswire dictionary
|
||||
"""
|
||||
# remove any markup
|
||||
|
@ -246,7 +247,8 @@ def _add_newswire_dict_entry(base_dir: str, domain: str,
|
|||
description,
|
||||
moderated,
|
||||
post_tags,
|
||||
mirrored
|
||||
mirrored,
|
||||
podcast_properties
|
||||
]
|
||||
|
||||
|
||||
|
@ -377,6 +379,71 @@ def _xml2str_to_hashtag_categories(base_dir: str, xml_str: str,
|
|||
False, force)
|
||||
|
||||
|
||||
def xml_podcast_to_dict(xml_str: str) -> {}:
|
||||
"""podcasting extensions for RSS feeds
|
||||
"""
|
||||
if 'podcastindex.org/namespace/1.0' not in xml_str:
|
||||
return {}
|
||||
if '<podcast:' not in xml_str:
|
||||
return {}
|
||||
|
||||
podcast_properties = {
|
||||
"locations": [],
|
||||
"persons": [],
|
||||
"soundbites": [],
|
||||
"transcripts": [],
|
||||
"valueRecipients": []
|
||||
}
|
||||
|
||||
pod_lines = xml_str.split('<podcast:')
|
||||
ctr = 0
|
||||
for pod_line in pod_lines:
|
||||
if ctr == 0 or '>' not in pod_line:
|
||||
ctr += 1
|
||||
continue
|
||||
if ' ' not in pod_line.split('>')[0]:
|
||||
pod_key = pod_line.split('>')[0].strip()
|
||||
pod_val = pod_line.split('>', 1)[1].strip()
|
||||
if '<' in pod_val:
|
||||
pod_val = pod_val.split('<')[0]
|
||||
podcast_properties[pod_key] = pod_val
|
||||
ctr += 1
|
||||
continue
|
||||
pod_key = pod_line.split(' ')[0]
|
||||
|
||||
pod_fields = (
|
||||
'url', 'geo', 'osm', 'type', 'method', 'group',
|
||||
'owner', 'srcset', 'img', 'role', 'address', 'suggested',
|
||||
'startTime', 'duration', 'href', 'name'
|
||||
)
|
||||
pod_entry = {}
|
||||
for pod_field in pod_fields:
|
||||
if pod_field + '="' not in pod_line:
|
||||
continue
|
||||
pod_str = pod_line.split(pod_field + '="')[1]
|
||||
if '"' not in pod_str:
|
||||
continue
|
||||
pod_val = pod_str.split('"')[0]
|
||||
pod_entry[pod_field] = pod_val
|
||||
|
||||
pod_text = pod_line.split('>')[1]
|
||||
if '<' in pod_text:
|
||||
pod_text = pod_text.split('<')[0].strip()
|
||||
if pod_text:
|
||||
pod_entry['text'] = pod_text
|
||||
|
||||
if pod_key + 's' in podcast_properties:
|
||||
if isinstance(podcast_properties[pod_key + 's'], list):
|
||||
podcast_properties[pod_key + 's'].append(pod_entry)
|
||||
else:
|
||||
podcast_properties[pod_key] = pod_entry
|
||||
else:
|
||||
podcast_properties[pod_key] = pod_entry
|
||||
ctr += 1
|
||||
|
||||
return podcast_properties
|
||||
|
||||
|
||||
def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
|
||||
moderated: bool, mirrored: bool,
|
||||
max_posts_per_source: int,
|
||||
|
@ -446,12 +513,14 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
if _valid_feed_date(pub_date_str):
|
||||
post_filename = ''
|
||||
votes_status = []
|
||||
podcast_properties = xml_podcast_to_dict(xml_str)
|
||||
_add_newswire_dict_entry(base_dir, domain,
|
||||
result, pub_date_str,
|
||||
title, link,
|
||||
votes_status, post_filename,
|
||||
description, moderated,
|
||||
mirrored, [], 32, session, debug)
|
||||
mirrored, [], 32, session, debug,
|
||||
podcast_properties)
|
||||
post_ctr += 1
|
||||
if post_ctr >= max_posts_per_source:
|
||||
break
|
||||
|
@ -534,12 +603,14 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
if _valid_feed_date(pub_date_str):
|
||||
post_filename = ''
|
||||
votes_status = []
|
||||
podcast_properties = xml_podcast_to_dict(xml_str)
|
||||
_add_newswire_dict_entry(base_dir, domain,
|
||||
result, pub_date_str,
|
||||
title, link,
|
||||
votes_status, post_filename,
|
||||
description, moderated,
|
||||
mirrored, [], 32, session, debug)
|
||||
mirrored, [], 32, session, debug,
|
||||
podcast_properties)
|
||||
post_ctr += 1
|
||||
if post_ctr >= max_posts_per_source:
|
||||
break
|
||||
|
@ -610,12 +681,14 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
if _valid_feed_date(pub_date_str):
|
||||
post_filename = ''
|
||||
votes_status = []
|
||||
podcast_properties = xml_podcast_to_dict(xml_str)
|
||||
_add_newswire_dict_entry(base_dir, domain,
|
||||
result, pub_date_str,
|
||||
title, link,
|
||||
votes_status, post_filename,
|
||||
description, moderated,
|
||||
mirrored, [], 32, session, debug)
|
||||
mirrored, [], 32, session, debug,
|
||||
podcast_properties)
|
||||
post_ctr += 1
|
||||
if post_ctr >= max_posts_per_source:
|
||||
break
|
||||
|
@ -727,7 +800,8 @@ def _json_feed_v1to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
title, link,
|
||||
votes_status, post_filename,
|
||||
description, moderated,
|
||||
mirrored, [], 32, session, debug)
|
||||
mirrored, [], 32, session, debug,
|
||||
None)
|
||||
post_ctr += 1
|
||||
if post_ctr >= max_posts_per_source:
|
||||
break
|
||||
|
@ -800,7 +874,8 @@ def _atom_feed_yt_to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
title, link,
|
||||
votes_status, post_filename,
|
||||
description, moderated, mirrored,
|
||||
[], 32, session, debug)
|
||||
[], 32, session, debug,
|
||||
None)
|
||||
post_ctr += 1
|
||||
if post_ctr >= max_posts_per_source:
|
||||
break
|
||||
|
@ -1077,7 +1152,8 @@ def _add_account_blogs_to_newswire(base_dir: str, nickname: str, domain: str,
|
|||
votes, full_post_filename,
|
||||
description, moderated, False,
|
||||
tags_from_post,
|
||||
max_tags, session, debug)
|
||||
max_tags, session, debug,
|
||||
None)
|
||||
|
||||
ctr += 1
|
||||
if ctr >= max_blogs_per_account:
|
||||
|
|
83
tests.py
83
tests.py
|
@ -150,6 +150,7 @@ from linked_data_sig import generate_json_signature
|
|||
from linked_data_sig import verify_json_signature
|
||||
from newsdaemon import hashtag_rule_tree
|
||||
from newsdaemon import hashtag_rule_resolve
|
||||
from newswire import xml_podcast_to_dict
|
||||
from newswire import get_newswire_tags
|
||||
from newswire import parse_feed_date
|
||||
from newswire import limit_word_lengths
|
||||
|
@ -6354,7 +6355,7 @@ def _test_httpsig_base_new(with_digest: bool, base_dir: str,
|
|||
|
||||
|
||||
def _test_get_actor_from_in_reply_to() -> None:
|
||||
print('testGetActorFromInReplyTo')
|
||||
print('test_get_actor_from_in_reply_to')
|
||||
in_reply_to = \
|
||||
'https://fosstodon.org/users/bashrc/statuses/107400700612621140'
|
||||
reply_actor = get_actor_from_in_reply_to(in_reply_to)
|
||||
|
@ -6365,6 +6366,85 @@ def _test_get_actor_from_in_reply_to() -> None:
|
|||
assert reply_actor is None
|
||||
|
||||
|
||||
def _test_xml_podcast_dict() -> None:
|
||||
print('test_xml_podcast_dict')
|
||||
xml_str = \
|
||||
'<?xml version="1.0" encoding="UTF-8" ?>\n' + \
|
||||
'<rss version="2.0" xmlns:podcast="' + \
|
||||
'https://podcastindex.org/namespace/1.0">\n' + \
|
||||
'<podcast:episode>5</podcast:episode>\n' + \
|
||||
'<podcast:chapters ' + \
|
||||
'url="https://whoframed.rodger/ep1_chapters.json" ' + \
|
||||
'type="application/json"/>\n' + \
|
||||
'<podcast:funding ' + \
|
||||
'url="https://whoframed.rodger/donate">' + \
|
||||
'Support the show</podcast:funding>\n' + \
|
||||
'<podcast:images ' + \
|
||||
'srcset="https://whoframed.rodger/images/ep1/' + \
|
||||
'pci_avatar-massive.jpg 1500w, ' + \
|
||||
'https://whoframed.rodger/images/ep1/pci_avatar-middle.jpg 600w, ' + \
|
||||
'https://whoframed.rodger/images/ep1/pci_avatar-small.jpg 300w, ' + \
|
||||
'https://whoframed.rodger/images/ep1/' + \
|
||||
'pci_avatar-microfiche.jpg 50w" />\n' + \
|
||||
'<podcast:location geo="geo:57.4272,34.63763" osm="R472152">' + \
|
||||
'Nowheresville</podcast:location>\n' + \
|
||||
'<podcast:locked owner="podcastowner@whoframed.rodger">yes' + \
|
||||
'</podcast:locked>\n' + \
|
||||
'<podcast:person group="visuals" role="cover art designer" ' + \
|
||||
'href="https://whoframed.rodger/artist/rodgetrabbit">' + \
|
||||
'Rodger Rabbit</podcast:person>\n' + \
|
||||
'<podcast:person href="https://whoframed.rodger" ' + \
|
||||
'img="http://whoframed.rodger/images/rr.jpg">Rodger Rabbit' + \
|
||||
'</podcast:person>\n' + \
|
||||
'<podcast:person href="https://whoframed.rodger" ' + \
|
||||
'img="http://whoframed.rodger/images/jr.jpg">' + \
|
||||
'Jessica Rabbit</podcast:person>\n' + \
|
||||
'<podcast:person role="guest" ' + \
|
||||
'href="https://whoframed.rodger/blog/bettyboop/" ' + \
|
||||
'img="http://whoframed.rodger/images/bb.jpg">' + \
|
||||
'Betty Boop</podcast:person>\n' + \
|
||||
'<podcast:person role="guest" ' + \
|
||||
'href="https://goodto.talk/bobhoskins/" ' + \
|
||||
'img="https://goodto.talk/images/bhosk.jpg">' + \
|
||||
'Bob Hoskins</podcast:person>\n' + \
|
||||
'<podcast:season name="Podcasting 2.0">1</podcast:season>\n' + \
|
||||
'<podcast:soundbite startTime="15.27" duration="8.0" />\n' + \
|
||||
'<podcast:soundbite startTime="21.34" duration="32.0" />\n' + \
|
||||
'<podcast:transcript ' + \
|
||||
'url="https://whoframed.rodger/ep1/transcript.txt" ' + \
|
||||
'type="text/plain" />\n' + \
|
||||
'<podcast:transcript ' + \
|
||||
'url="https://whoframed.rodger/ep2/transcript.txt" ' + \
|
||||
'type="text/plain" />\n' + \
|
||||
'<podcast:transcript ' + \
|
||||
'url="https://whoframed.rodger/ep3/transcript.txt" ' + \
|
||||
'type="text/plain" />\n' + \
|
||||
'<podcast:value type="donate" method="keysend" ' + \
|
||||
'suggested="2.95">\n' + \
|
||||
' <podcast:valueRecipient name="hosting company" ' + \
|
||||
'type="node" address="someaddress1" split="1" />\n' + \
|
||||
' <podcast:valueRecipient name="podcaster" type="node" ' + \
|
||||
'address="someaddress2" split="99" />\n' + \
|
||||
'</podcast:value>\n' + \
|
||||
'</rss>'
|
||||
podcast_properties = xml_podcast_to_dict(xml_str)
|
||||
assert podcast_properties
|
||||
# pprint(podcast_properties)
|
||||
assert podcast_properties.get('valueRecipients')
|
||||
assert podcast_properties.get('persons')
|
||||
assert podcast_properties.get('soundbites')
|
||||
assert podcast_properties.get('locations')
|
||||
assert podcast_properties.get('transcripts')
|
||||
assert podcast_properties.get('episode')
|
||||
assert podcast_properties.get('funding')
|
||||
assert int(podcast_properties['episode']) == 5
|
||||
assert podcast_properties['funding']['text'] == "Support the show"
|
||||
assert len(podcast_properties['transcripts']) == 3
|
||||
assert len(podcast_properties['valueRecipients']) == 2
|
||||
assert len(podcast_properties['persons']) == 5
|
||||
assert len(podcast_properties['locations']) == 1
|
||||
|
||||
|
||||
def run_all_tests():
|
||||
base_dir = os.getcwd()
|
||||
print('Running tests...')
|
||||
|
@ -6381,6 +6461,7 @@ def run_all_tests():
|
|||
'message_json', 'liked_post_json'])
|
||||
_test_checkbox_names()
|
||||
_test_functions()
|
||||
_test_xml_podcast_dict()
|
||||
_test_get_actor_from_in_reply_to()
|
||||
_test_valid_emoji_content()
|
||||
_test_add_cw_lists(base_dir)
|
||||
|
|
Loading…
Reference in New Issue