diff --git a/newsdaemon.py b/newsdaemon.py
index cf1b21aeb..4d01008df 100644
--- a/newsdaemon.py
+++ b/newsdaemon.py
@@ -627,6 +627,10 @@ def _convert_rs_sto_activity_pub(base_dir: str, http_prefix: str,
'
' + \
translate['Read more...'] + ''
+# podcast_properties = None
+# if len(item) > 8:
+# podcast_properties = item[8]
+
followers_only = False
# NOTE: the id when the post is created will not be
# consistent (it's based on the current time, not the
diff --git a/newswire.py b/newswire.py
index a5231931e..438610dfa 100644
--- a/newswire.py
+++ b/newswire.py
@@ -203,7 +203,8 @@ def _add_newswire_dict_entry(base_dir: str, domain: str,
description: str, moderated: bool,
mirrored: bool,
tags: [],
- max_tags: int, session, debug: bool) -> None:
+ max_tags: int, session, debug: bool,
+ podcast_properties: {}) -> None:
"""Update the newswire dictionary
"""
# remove any markup
@@ -246,7 +247,8 @@ def _add_newswire_dict_entry(base_dir: str, domain: str,
description,
moderated,
post_tags,
- mirrored
+ mirrored,
+ podcast_properties
]
@@ -377,6 +379,71 @@ def _xml2str_to_hashtag_categories(base_dir: str, xml_str: str,
False, force)
+def xml_podcast_to_dict(xml_str: str) -> {}:
+ """podcasting extensions for RSS feeds
+ """
+ if 'podcastindex.org/namespace/1.0' not in xml_str:
+ return {}
+ if '' not in pod_line:
+ ctr += 1
+ continue
+ if ' ' not in pod_line.split('>')[0]:
+ pod_key = pod_line.split('>')[0].strip()
+ pod_val = pod_line.split('>', 1)[1].strip()
+ if '<' in pod_val:
+ pod_val = pod_val.split('<')[0]
+ podcast_properties[pod_key] = pod_val
+ ctr += 1
+ continue
+ pod_key = pod_line.split(' ')[0]
+
+ pod_fields = (
+ 'url', 'geo', 'osm', 'type', 'method', 'group',
+ 'owner', 'srcset', 'img', 'role', 'address', 'suggested',
+ 'startTime', 'duration', 'href', 'name'
+ )
+ pod_entry = {}
+ for pod_field in pod_fields:
+ if pod_field + '="' not in pod_line:
+ continue
+ pod_str = pod_line.split(pod_field + '="')[1]
+ if '"' not in pod_str:
+ continue
+ pod_val = pod_str.split('"')[0]
+ pod_entry[pod_field] = pod_val
+
+ pod_text = pod_line.split('>')[1]
+ if '<' in pod_text:
+ pod_text = pod_text.split('<')[0].strip()
+ if pod_text:
+ pod_entry['text'] = pod_text
+
+ if pod_key + 's' in podcast_properties:
+ if isinstance(podcast_properties[pod_key + 's'], list):
+ podcast_properties[pod_key + 's'].append(pod_entry)
+ else:
+ podcast_properties[pod_key] = pod_entry
+ else:
+ podcast_properties[pod_key] = pod_entry
+ ctr += 1
+
+ return podcast_properties
+
+
def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
moderated: bool, mirrored: bool,
max_posts_per_source: int,
@@ -446,12 +513,14 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
if _valid_feed_date(pub_date_str):
post_filename = ''
votes_status = []
+ podcast_properties = xml_podcast_to_dict(xml_str)
_add_newswire_dict_entry(base_dir, domain,
result, pub_date_str,
title, link,
votes_status, post_filename,
description, moderated,
- mirrored, [], 32, session, debug)
+ mirrored, [], 32, session, debug,
+ podcast_properties)
post_ctr += 1
if post_ctr >= max_posts_per_source:
break
@@ -534,12 +603,14 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
if _valid_feed_date(pub_date_str):
post_filename = ''
votes_status = []
+ podcast_properties = xml_podcast_to_dict(xml_str)
_add_newswire_dict_entry(base_dir, domain,
result, pub_date_str,
title, link,
votes_status, post_filename,
description, moderated,
- mirrored, [], 32, session, debug)
+ mirrored, [], 32, session, debug,
+ podcast_properties)
post_ctr += 1
if post_ctr >= max_posts_per_source:
break
@@ -610,12 +681,14 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
if _valid_feed_date(pub_date_str):
post_filename = ''
votes_status = []
+ podcast_properties = xml_podcast_to_dict(xml_str)
_add_newswire_dict_entry(base_dir, domain,
result, pub_date_str,
title, link,
votes_status, post_filename,
description, moderated,
- mirrored, [], 32, session, debug)
+ mirrored, [], 32, session, debug,
+ podcast_properties)
post_ctr += 1
if post_ctr >= max_posts_per_source:
break
@@ -727,7 +800,8 @@ def _json_feed_v1to_dict(base_dir: str, domain: str, xml_str: str,
title, link,
votes_status, post_filename,
description, moderated,
- mirrored, [], 32, session, debug)
+ mirrored, [], 32, session, debug,
+ None)
post_ctr += 1
if post_ctr >= max_posts_per_source:
break
@@ -800,7 +874,8 @@ def _atom_feed_yt_to_dict(base_dir: str, domain: str, xml_str: str,
title, link,
votes_status, post_filename,
description, moderated, mirrored,
- [], 32, session, debug)
+ [], 32, session, debug,
+ None)
post_ctr += 1
if post_ctr >= max_posts_per_source:
break
@@ -1077,7 +1152,8 @@ def _add_account_blogs_to_newswire(base_dir: str, nickname: str, domain: str,
votes, full_post_filename,
description, moderated, False,
tags_from_post,
- max_tags, session, debug)
+ max_tags, session, debug,
+ None)
ctr += 1
if ctr >= max_blogs_per_account:
diff --git a/tests.py b/tests.py
index c5626da89..cbe943bf1 100644
--- a/tests.py
+++ b/tests.py
@@ -150,6 +150,7 @@ from linked_data_sig import generate_json_signature
from linked_data_sig import verify_json_signature
from newsdaemon import hashtag_rule_tree
from newsdaemon import hashtag_rule_resolve
+from newswire import xml_podcast_to_dict
from newswire import get_newswire_tags
from newswire import parse_feed_date
from newswire import limit_word_lengths
@@ -6354,7 +6355,7 @@ def _test_httpsig_base_new(with_digest: bool, base_dir: str,
def _test_get_actor_from_in_reply_to() -> None:
- print('testGetActorFromInReplyTo')
+ print('test_get_actor_from_in_reply_to')
in_reply_to = \
'https://fosstodon.org/users/bashrc/statuses/107400700612621140'
reply_actor = get_actor_from_in_reply_to(in_reply_to)
@@ -6365,6 +6366,85 @@ def _test_get_actor_from_in_reply_to() -> None:
assert reply_actor is None
+def _test_xml_podcast_dict() -> None:
+ print('test_xml_podcast_dict')
+ xml_str = \
+ '\n' + \
+ '\n' + \
+ '5\n' + \
+ '\n' + \
+ '' + \
+ 'Support the show\n' + \
+ '\n' + \
+ '' + \
+ 'Nowheresville\n' + \
+ 'yes' + \
+ '\n' + \
+ '' + \
+ 'Rodger Rabbit\n' + \
+ 'Rodger Rabbit' + \
+ '\n' + \
+ '' + \
+ 'Jessica Rabbit\n' + \
+ '' + \
+ 'Betty Boop\n' + \
+ '' + \
+ 'Bob Hoskins\n' + \
+ '1\n' + \
+ '\n' + \
+ '\n' + \
+ '\n' + \
+ '\n' + \
+ '\n' + \
+ '\n' + \
+ ' \n' + \
+ ' \n' + \
+ '\n' + \
+ ''
+ podcast_properties = xml_podcast_to_dict(xml_str)
+ assert podcast_properties
+ # pprint(podcast_properties)
+ assert podcast_properties.get('valueRecipients')
+ assert podcast_properties.get('persons')
+ assert podcast_properties.get('soundbites')
+ assert podcast_properties.get('locations')
+ assert podcast_properties.get('transcripts')
+ assert podcast_properties.get('episode')
+ assert podcast_properties.get('funding')
+ assert int(podcast_properties['episode']) == 5
+ assert podcast_properties['funding']['text'] == "Support the show"
+ assert len(podcast_properties['transcripts']) == 3
+ assert len(podcast_properties['valueRecipients']) == 2
+ assert len(podcast_properties['persons']) == 5
+ assert len(podcast_properties['locations']) == 1
+
+
def run_all_tests():
base_dir = os.getcwd()
print('Running tests...')
@@ -6381,6 +6461,7 @@ def run_all_tests():
'message_json', 'liked_post_json'])
_test_checkbox_names()
_test_functions()
+ _test_xml_podcast_dict()
_test_get_actor_from_in_reply_to()
_test_valid_emoji_content()
_test_add_cw_lists(base_dir)