From 41ff8954d7496dafb12bf511a3cbf2609889d9a8 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 13 Jan 2022 16:04:14 +0000 Subject: [PATCH] Handle multiple categories per podcast item --- newswire.py | 50 +++++++++++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/newswire.py b/newswire.py index a65dafe47..0bfd74688 100644 --- a/newswire.py +++ b/newswire.py @@ -397,28 +397,36 @@ def _get_podcast_categories(xml_item: str, xml_str: str) -> str: continue item_str = xml_str - episode_category = item_str.split(category_tag)[1] - if 'text="' in episode_category: - episode_category = episode_category.split('text="')[1] - if '"' in episode_category: - episode_category = episode_category.split('"')[0] - episode_category = episode_category.lower().replace(' ', '') - episode_category = episode_category.replace('#', '') - if episode_category not in podcast_categories: - if valid_hash_tag(episode_category): - podcast_categories.append('#' + episode_category) - continue + category_list = item_str.split(category_tag) + first_category = True + for category_item in category_list: + if first_category: + first_category = False + continue - if '>' in episode_category: - episode_category = episode_category.split('>')[1] - if '<' in episode_category: - episode_category = episode_category.split('<')[0] - episode_category = \ - episode_category.lower().replace(' ', '') - episode_category = episode_category.replace('#', '') - if episode_category not in podcast_categories: - if valid_hash_tag(episode_category): - podcast_categories.append('#' + episode_category) + episode_category = category_item + if 'text="' in episode_category: + episode_category = episode_category.split('text="')[1] + if '"' in episode_category: + episode_category = episode_category.split('"')[0] + episode_category = \ + episode_category.lower().replace(' ', '') + episode_category = episode_category.replace('#', '') + if episode_category not in podcast_categories: + if valid_hash_tag(episode_category): + podcast_categories.append('#' + episode_category) + continue + + if '>' in episode_category: + episode_category = episode_category.split('>')[1] + if '<' in episode_category: + episode_category = episode_category.split('<')[0] + episode_category = \ + episode_category.lower().replace(' ', '') + episode_category = episode_category.replace('#', '') + if episode_category not in podcast_categories: + if valid_hash_tag(episode_category): + podcast_categories.append('#' + episode_category) return podcast_categories