From cc11b4141caabb3014114b910695d64220241465 Mon Sep 17 00:00:00 2001
From: Bob Mottram
Date: Thu, 13 Jan 2022 11:11:18 +0000
Subject: [PATCH 01/15] Podcasts can potentially contain video
---
webapp_podcast.py | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/webapp_podcast.py b/webapp_podcast.py
index def903fbb..eeed66f72 100644
--- a/webapp_podcast.py
+++ b/webapp_podcast.py
@@ -184,6 +184,20 @@ def html_podcast_episode(css_cache: {}, translate: {},
audio_extension.replace('.', '') + '">' + \
translate['Your browser does not support the audio element.'] + \
'\n \n'
+ elif podcast_properties.get('linkMimeType'):
+ if 'video' in podcast_properties['linkMimeType']:
+ video_extension = \
+ podcast_properties['linkMimeType'].split('/')[1]
+ video_msg = 'Your browser does not support the video element.'
+ podcast_str += \
+ '\n' + \
+ ' \n\n\n'
podcast_title = \
remove_html(html.unescape(urllib.parse.unquote_plus(newswire_item[0])))
From fd4dca4b972c96b9635cba67d45b3012c405a318 Mon Sep 17 00:00:00 2001
From: Bob Mottram
Date: Thu, 13 Jan 2022 11:13:24 +0000
Subject: [PATCH 02/15] Don't need center
---
webapp_podcast.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/webapp_podcast.py b/webapp_podcast.py
index eeed66f72..04f13e881 100644
--- a/webapp_podcast.py
+++ b/webapp_podcast.py
@@ -190,14 +190,14 @@ def html_podcast_episode(css_cache: {}, translate: {},
podcast_properties['linkMimeType'].split('/')[1]
video_msg = 'Your browser does not support the video element.'
podcast_str += \
- '\n' + \
' \n\n\n'
+ '\n\n'
podcast_title = \
remove_html(html.unescape(urllib.parse.unquote_plus(newswire_item[0])))
From 26d0dad6f5d1a07e5b6dfd870b7aa75585500a2e Mon Sep 17 00:00:00 2001
From: Bob Mottram
Date: Thu, 13 Jan 2022 11:19:52 +0000
Subject: [PATCH 03/15] Tidying
---
webapp_podcast.py | 16 ++++++----------
1 file changed, 6 insertions(+), 10 deletions(-)
diff --git a/webapp_podcast.py b/webapp_podcast.py
index 04f13e881..55e8c5835 100644
--- a/webapp_podcast.py
+++ b/webapp_podcast.py
@@ -186,18 +186,14 @@ def html_podcast_episode(css_cache: {}, translate: {},
'\n \n'
elif podcast_properties.get('linkMimeType'):
if 'video' in podcast_properties['linkMimeType']:
- video_extension = \
- podcast_properties['linkMimeType'].split('/')[1]
+ video_mime_type = podcast_properties['linkMimeType']
video_msg = 'Your browser does not support the video element.'
podcast_str += \
- '\n' + \
- ' \n\n'
+ '\n' + \
+ ' \n\n'
podcast_title = \
remove_html(html.unescape(urllib.parse.unquote_plus(newswire_item[0])))
From 45f94eb7c5b1f468e775bcf63a3c59a77dac101d Mon Sep 17 00:00:00 2001
From: Bob Mottram
Date: Thu, 13 Jan 2022 11:24:15 +0000
Subject: [PATCH 04/15] Indentation
---
webapp_podcast.py | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/webapp_podcast.py b/webapp_podcast.py
index 55e8c5835..cf43a1ed1 100644
--- a/webapp_podcast.py
+++ b/webapp_podcast.py
@@ -189,11 +189,12 @@ def html_podcast_episode(css_cache: {}, translate: {},
video_mime_type = podcast_properties['linkMimeType']
video_msg = 'Your browser does not support the video element.'
podcast_str += \
- '\n' + \
+ ' \n' + \
' \n\n'
+ 'type="' + video_mime_type + '">' + \
+ translate[video_msg] + '\n \n'
podcast_title = \
remove_html(html.unescape(urllib.parse.unquote_plus(newswire_item[0])))
From 4ed62cf0d350aca330edf7d4830006a6ad75a081 Mon Sep 17 00:00:00 2001
From: Bob Mottram
Date: Thu, 13 Jan 2022 12:16:42 +0000
Subject: [PATCH 05/15] Ignore the first item in the feed list
---
newswire.py | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/newswire.py b/newswire.py
index a61f6eb94..dec0a727c 100644
--- a/newswire.py
+++ b/newswire.py
@@ -532,7 +532,10 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
rss_items = xml_str.split('- ')
post_ctr = 0
max_bytes = max_feed_item_size_kb * 1024
+ first_item = True
for rss_item in rss_items:
+ if first_item:
+ continue
if not rss_item:
continue
if len(rss_item) > max_bytes:
@@ -625,7 +628,10 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
rss_items = xml_str.split(item_str)
post_ctr = 0
max_bytes = max_feed_item_size_kb * 1024
+ first_item = True
for rss_item in rss_items:
+ if first_item:
+ continue
if not rss_item:
continue
if len(rss_item) > max_bytes:
@@ -708,7 +714,10 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
atom_items = xml_str.split('')
post_ctr = 0
max_bytes = max_feed_item_size_kb * 1024
+ first_item = True
for atom_item in atom_items:
+ if first_item:
+ continue
if not atom_item:
continue
if len(atom_item) > max_bytes:
From ee14bc2ef27e8740ac9cf3150108ef41cdfa9537 Mon Sep 17 00:00:00 2001
From: Bob Mottram
Date: Thu, 13 Jan 2022 12:19:35 +0000
Subject: [PATCH 06/15] Clear first item
---
newswire.py | 3 +++
1 file changed, 3 insertions(+)
diff --git a/newswire.py b/newswire.py
index dec0a727c..c611d07db 100644
--- a/newswire.py
+++ b/newswire.py
@@ -535,6 +535,7 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
first_item = True
for rss_item in rss_items:
if first_item:
+ first_item = False
continue
if not rss_item:
continue
@@ -631,6 +632,7 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
first_item = True
for rss_item in rss_items:
if first_item:
+ first_item = False
continue
if not rss_item:
continue
@@ -717,6 +719,7 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
first_item = True
for atom_item in atom_items:
if first_item:
+ first_item = False
continue
if not atom_item:
continue
From dd5684ae3cc8e811c76c7870209fd8adf0171464 Mon Sep 17 00:00:00 2001
From: Bob Mottram
Date: Thu, 13 Jan 2022 12:37:08 +0000
Subject: [PATCH 07/15] Podcast image can be global to the feed, not per item
---
newswire.py | 32 ++++++++++++++++++--------------
1 file changed, 18 insertions(+), 14 deletions(-)
diff --git a/newswire.py b/newswire.py
index c611d07db..04c3d0ccf 100644
--- a/newswire.py
+++ b/newswire.py
@@ -379,13 +379,13 @@ def _xml2str_to_hashtag_categories(base_dir: str, xml_str: str,
False, force)
-def xml_podcast_to_dict(xml_str: str) -> {}:
+def xml_podcast_to_dict(xml_item: str, xml_str: str) -> {}:
"""podcasting extensions for RSS feeds
See https://github.com/Podcastindex-org/podcast-namespace/
blob/main/docs/1.0.md
"""
- if ' {}:
"trailers": []
}
- pod_lines = xml_str.split('' not in pod_line:
@@ -448,9 +448,13 @@ def xml_podcast_to_dict(xml_str: str) -> {}:
podcast_episode_image = None
episode_image_tags = [' {}:
if podcast_episode_image:
podcast_properties['image'] = podcast_episode_image
- if 'Y' in xml_str or \
- 'T' in xml_str or \
- '1' in xml_str:
+ if 'Y' in xml_item or \
+ 'T' in xml_item or \
+ '1' in xml_item:
podcast_properties['explicit'] = True
else:
podcast_properties['explicit'] = False
else:
- if '
Date: Thu, 13 Jan 2022 13:29:45 +0000
Subject: [PATCH 08/15] Width of audio player on podcast screen
---
epicyon-podcast.css | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/epicyon-podcast.css b/epicyon-podcast.css
index 5510098c3..b15f9718e 100644
--- a/epicyon-podcast.css
+++ b/epicyon-podcast.css
@@ -71,6 +71,10 @@ body, html {
image-rendering: var(--rendering);
}
+audio {
+ width: 90%;
+}
+
a, u {
color: var(--options-fg-color);
}
From f9e33f2d35d13a59f3b10b07649c7dfab530ccb4 Mon Sep 17 00:00:00 2001
From: Bob Mottram
Date: Thu, 13 Jan 2022 15:10:41 +0000
Subject: [PATCH 09/15] Get categories from podcast feeds
---
content.py | 26 +-------------------------
inbox.py | 2 +-
newsdaemon.py | 2 +-
newswire.py | 34 ++++++++++++++++++++++++++++++++++
tests.py | 4 ++--
utils.py | 28 ++++++++++++++++++++++++++--
6 files changed, 65 insertions(+), 31 deletions(-)
diff --git a/content.py b/content.py
index a0412f5a9..2394e21a1 100644
--- a/content.py
+++ b/content.py
@@ -11,9 +11,9 @@ import os
import email.parser
import urllib.parse
from shutil import copyfile
+from utils import valid_hash_tag
from utils import dangerous_svg
from utils import remove_domain_port
-from utils import is_valid_language
from utils import get_image_extensions
from utils import load_json
from utils import save_json
@@ -33,17 +33,6 @@ MUSIC_SITES = ('soundcloud.com', 'bandcamp.com')
MAX_LINK_LENGTH = 40
-VALID_HASHTAG_CHARS = \
- set('0123456789' +
- 'abcdefghijklmnopqrstuvwxyz' +
- 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' +
- '¡¿ÄäÀàÁáÂâÃãÅåǍǎĄąĂăÆæĀā' +
- 'ÇçĆćĈĉČčĎđĐďðÈèÉéÊêËëĚěĘęĖėĒē' +
- 'ĜĝĢģĞğĤĥÌìÍíÎîÏïıĪīĮįĴĵĶķ' +
- 'ĹĺĻļŁłĽľĿŀÑñŃńŇňŅņÖöÒòÓóÔôÕõŐőØøŒœ' +
- 'ŔŕŘřẞߌśŜŝŞşŠšȘșŤťŢţÞþȚțÜüÙùÚúÛûŰűŨũŲųŮůŪū' +
- 'ŴŵÝýŸÿŶŷŹźŽžŻż')
-
REMOVE_MARKUP = (
'b', 'i', 'ul', 'ol', 'li', 'em', 'strong',
'blockquote', 'h1', 'h2', 'h3', 'h4', 'h5'
@@ -497,19 +486,6 @@ def add_web_links(content: str) -> str:
return content
-def valid_hash_tag(hashtag: str) -> bool:
- """Returns true if the give hashtag contains valid characters
- """
- # long hashtags are not valid
- if len(hashtag) >= 32:
- return False
- if set(hashtag).issubset(VALID_HASHTAG_CHARS):
- return True
- if is_valid_language(hashtag):
- return True
- return False
-
-
def _add_hash_tags(word_str: str, http_prefix: str, domain: str,
replace_hashtags: {}, post_hashtags: {}) -> bool:
"""Detects hashtags and adds them to the replacements dict
diff --git a/inbox.py b/inbox.py
index a4acdf093..51692f5e0 100644
--- a/inbox.py
+++ b/inbox.py
@@ -61,6 +61,7 @@ from utils import undo_reaction_collection_entry
from utils import has_group_type
from utils import local_actor_url
from utils import has_object_stringType
+from utils import valid_hash_tag
from categories import get_hashtag_categories
from categories import set_hashtag_category
from httpsig import get_digest_algorithm_from_headers
@@ -119,7 +120,6 @@ from announce import is_self_announce
from announce import create_announce
from notifyOnPost import notify_when_person_posts
from conversation import update_conversation
-from content import valid_hash_tag
from webapp_hashtagswarm import html_hash_tag_swarm
from person import valid_sending_actor
diff --git a/newsdaemon.py b/newsdaemon.py
index cddb460d4..7233eab66 100644
--- a/newsdaemon.py
+++ b/newsdaemon.py
@@ -24,7 +24,7 @@ from newswire import get_dict_from_newswire
# from posts import send_signed_json
from posts import create_news_post
from posts import archive_posts_for_person
-from content import valid_hash_tag
+from utils import valid_hash_tag
from utils import get_base_content_from_post
from utils import remove_html
from utils import get_full_domain
diff --git a/newswire.py b/newswire.py
index 04c3d0ccf..a17afb3d7 100644
--- a/newswire.py
+++ b/newswire.py
@@ -18,6 +18,7 @@ from datetime import timezone
from collections import OrderedDict
from utils import valid_post_date
from categories import set_hashtag_category
+from utils import valid_hash_tag
from utils import dangerous_svg
from utils import get_fav_filename_from_url
from utils import get_base_content_from_post
@@ -470,8 +471,41 @@ def xml_podcast_to_dict(xml_item: str, xml_str: str) -> {}:
podcast_episode_image = episode_image
break
+ # get categories if they exist. These can be turned into hashtags
+ podcast_categories = []
+ episode_category_tags = ['' in episode_category:
+ episode_category = episode_category.split('>')[1]
+ if '<' in episode_category:
+ episode_category = episode_category.split('<')[0]
+ episode_category = \
+ episode_category.lower().replace(' ', '')
+ if episode_category not in podcast_categories:
+ if valid_hash_tag(episode_category):
+ podcast_categories.append(episode_category)
+ continue
+
if podcast_episode_image:
podcast_properties['image'] = podcast_episode_image
+ podcast_properties['categories'] = podcast_categories
if 'Y' in xml_item or \
'T' in xml_item or \
diff --git a/tests.py b/tests.py
index f549b8184..4d086b1f8 100644
--- a/tests.py
+++ b/tests.py
@@ -82,6 +82,7 @@ from utils import copytree
from utils import load_json
from utils import save_json
from utils import get_status_number
+from utils import valid_hash_tag
from utils import get_followers_of_person
from utils import remove_html
from utils import dangerous_markup
@@ -132,7 +133,6 @@ from content import get_price_from_string
from content import limit_repeated_words
from content import switch_words
from content import extract_text_fields_in_post
-from content import valid_hash_tag
from content import html_replace_email_quote
from content import html_replace_quote_marks
from content import dangerous_css
@@ -6428,7 +6428,7 @@ def _test_xml_podcast_dict() -> None:
'address="someaddress2" split="99" />\n' + \
'\n' + \
''
- podcast_properties = xml_podcast_to_dict(xml_str)
+ podcast_properties = xml_podcast_to_dict(xml_str, xml_str)
assert podcast_properties
# pprint(podcast_properties)
assert podcast_properties.get('valueRecipients')
diff --git a/utils.py b/utils.py
index 3e1930464..74e6d2b39 100644
--- a/utils.py
+++ b/utils.py
@@ -20,6 +20,17 @@ from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives import hashes
from followingCalendar import add_person_to_calendar
+VALID_HASHTAG_CHARS = \
+ set('0123456789' +
+ 'abcdefghijklmnopqrstuvwxyz' +
+ 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' +
+ '¡¿ÄäÀàÁáÂâÃãÅåǍǎĄąĂăÆæĀā' +
+ 'ÇçĆćĈĉČčĎđĐďðÈèÉéÊêËëĚěĘęĖėĒē' +
+ 'ĜĝĢģĞğĤĥÌìÍíÎîÏïıĪīĮįĴĵĶķ' +
+ 'ĹĺĻļŁłĽľĿŀÑñŃńŇňŅņÖöÒòÓóÔôÕõŐőØøŒœ' +
+ 'ŔŕŘřẞߌśŜŝŞşŠšȘșŤťŢţÞþȚțÜüÙùÚúÛûŰűŨũŲųŮůŪū' +
+ 'ŴŵÝýŸÿŶŷŹźŽžŻż')
+
# posts containing these strings will always get screened out,
# both incoming and outgoing.
# Could include dubious clacks or admin dogwhistles
@@ -1798,7 +1809,7 @@ def delete_post(base_dir: str, http_prefix: str,
str(post_filename))
-def is_valid_language(text: str) -> bool:
+def _is_valid_language(text: str) -> bool:
"""Returns true if the given text contains a valid
natural language string
"""
@@ -1900,7 +1911,7 @@ def valid_nickname(domain: str, nickname: str) -> bool:
return False
if len(nickname) > 30:
return False
- if not is_valid_language(nickname):
+ if not _is_valid_language(nickname):
return False
forbidden_chars = ('.', ' ', '/', '?', ':', ';', '@', '#', '!')
for char in forbidden_chars:
@@ -3288,3 +3299,16 @@ def get_fav_filename_from_url(base_dir: str, favicon_url: str) -> str:
if '/favicon.' in favicon_url:
favicon_url = favicon_url.replace('/favicon.', '.')
return base_dir + '/favicons/' + favicon_url.replace('/', '-')
+
+
+def valid_hash_tag(hashtag: str) -> bool:
+ """Returns true if the give hashtag contains valid characters
+ """
+ # long hashtags are not valid
+ if len(hashtag) >= 32:
+ return False
+ if set(hashtag).issubset(VALID_HASHTAG_CHARS):
+ return True
+ if _is_valid_language(hashtag):
+ return True
+ return False
From 373116e72c081217617082d9bebeeb54f1287be9 Mon Sep 17 00:00:00 2001
From: Bob Mottram
Date: Thu, 13 Jan 2022 15:15:47 +0000
Subject: [PATCH 10/15] Turn podcast categories into hashtags
---
newswire.py | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/newswire.py b/newswire.py
index a17afb3d7..026a95875 100644
--- a/newswire.py
+++ b/newswire.py
@@ -226,6 +226,10 @@ def _add_newswire_dict_entry(base_dir: str, domain: str,
# extract hashtags from the text of the feed post
post_tags = get_newswire_tags(all_text, max_tags)
+ # Include tags from podcast categories
+ if podcast_properties:
+ post_tags += podcast_properties['categories']
+
# combine the tags into a single list
for tag in tags:
if tag in post_tags:
@@ -489,7 +493,7 @@ def xml_podcast_to_dict(xml_item: str, xml_str: str) -> {}:
episode_category = episode_category.lower().replace(' ', '')
if episode_category not in podcast_categories:
if valid_hash_tag(episode_category):
- podcast_categories.append(episode_category)
+ podcast_categories.append('#' + episode_category)
continue
else:
if '>' in episode_category:
@@ -500,7 +504,7 @@ def xml_podcast_to_dict(xml_item: str, xml_str: str) -> {}:
episode_category.lower().replace(' ', '')
if episode_category not in podcast_categories:
if valid_hash_tag(episode_category):
- podcast_categories.append(episode_category)
+ podcast_categories.append('#' + episode_category)
continue
if podcast_episode_image:
From 36a4cf3aad6da564e8766222f3b802f96cc4fd7b Mon Sep 17 00:00:00 2001
From: Bob Mottram
Date: Thu, 13 Jan 2022 15:18:39 +0000
Subject: [PATCH 11/15] Avoid any double hashes
---
newswire.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/newswire.py b/newswire.py
index 026a95875..5888206c0 100644
--- a/newswire.py
+++ b/newswire.py
@@ -491,6 +491,7 @@ def xml_podcast_to_dict(xml_item: str, xml_str: str) -> {}:
if '"' in episode_category:
episode_category = episode_category.split('"')[0]
episode_category = episode_category.lower().replace(' ', '')
+ episode_category = episode_category.replace('#', '')
if episode_category not in podcast_categories:
if valid_hash_tag(episode_category):
podcast_categories.append('#' + episode_category)
@@ -502,6 +503,7 @@ def xml_podcast_to_dict(xml_item: str, xml_str: str) -> {}:
episode_category = episode_category.split('<')[0]
episode_category = \
episode_category.lower().replace(' ', '')
+ episode_category = episode_category.replace('#', '')
if episode_category not in podcast_categories:
if valid_hash_tag(episode_category):
podcast_categories.append('#' + episode_category)
From fe063377e6e3aef5dc83030fd39baf93a353949f Mon Sep 17 00:00:00 2001
From: Bob Mottram
Date: Thu, 13 Jan 2022 15:30:55 +0000
Subject: [PATCH 12/15] Tidying
---
newswire.py | 72 +++++++++++++++++++++++++++++------------------------
1 file changed, 40 insertions(+), 32 deletions(-)
diff --git a/newswire.py b/newswire.py
index 5888206c0..a65dafe47 100644
--- a/newswire.py
+++ b/newswire.py
@@ -384,6 +384,45 @@ def _xml2str_to_hashtag_categories(base_dir: str, xml_str: str,
False, force)
+def _get_podcast_categories(xml_item: str, xml_str: str) -> str:
+ """ get podcast categories if they exist. These can be turned into hashtags
+ """
+ podcast_categories = []
+ episode_category_tags = ['' in episode_category:
+ episode_category = episode_category.split('>')[1]
+ if '<' in episode_category:
+ episode_category = episode_category.split('<')[0]
+ episode_category = \
+ episode_category.lower().replace(' ', '')
+ episode_category = episode_category.replace('#', '')
+ if episode_category not in podcast_categories:
+ if valid_hash_tag(episode_category):
+ podcast_categories.append('#' + episode_category)
+
+ return podcast_categories
+
+
def xml_podcast_to_dict(xml_item: str, xml_str: str) -> {}:
"""podcasting extensions for RSS feeds
See https://github.com/Podcastindex-org/podcast-namespace/
@@ -476,38 +515,7 @@ def xml_podcast_to_dict(xml_item: str, xml_str: str) -> {}:
break
# get categories if they exist. These can be turned into hashtags
- podcast_categories = []
- episode_category_tags = ['' in episode_category:
- episode_category = episode_category.split('>')[1]
- if '<' in episode_category:
- episode_category = episode_category.split('<')[0]
- episode_category = \
- episode_category.lower().replace(' ', '')
- episode_category = episode_category.replace('#', '')
- if episode_category not in podcast_categories:
- if valid_hash_tag(episode_category):
- podcast_categories.append('#' + episode_category)
- continue
+ podcast_categories = _get_podcast_categories(xml_item, xml_str)
if podcast_episode_image:
podcast_properties['image'] = podcast_episode_image
From 1587ec040478a8a9995da9253c8e9b2a9547c82d Mon Sep 17 00:00:00 2001
From: Bob Mottram
Date: Thu, 13 Jan 2022 15:40:54 +0000
Subject: [PATCH 13/15] Show hashtag categories on podcast screen
---
webapp_podcast.py | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/webapp_podcast.py b/webapp_podcast.py
index cf43a1ed1..1f30c36d2 100644
--- a/webapp_podcast.py
+++ b/webapp_podcast.py
@@ -221,6 +221,14 @@ def html_podcast_episode(css_cache: {}, translate: {},
'">
\n'
+ if podcast_properties['categories']:
+ podcast_str += ''
+ tags_str = ''
+ for tag in podcast_properties['categories']:
+ tag_link = '/users/' + nickname + '/tags/' + tag.replace('#', '')
+ tags_str += '' + tag + ' '
+ podcast_str += tags_str.strip() + '
\n'
+
podcast_str += _html_podcast_performers(podcast_properties)
podcast_str += ' \n'
From 41ff8954d7496dafb12bf511a3cbf2609889d9a8 Mon Sep 17 00:00:00 2001
From: Bob Mottram
Date: Thu, 13 Jan 2022 16:04:14 +0000
Subject: [PATCH 14/15] Handle multiple categories per podcast item
---
newswire.py | 50 +++++++++++++++++++++++++++++---------------------
1 file changed, 29 insertions(+), 21 deletions(-)
diff --git a/newswire.py b/newswire.py
index a65dafe47..0bfd74688 100644
--- a/newswire.py
+++ b/newswire.py
@@ -397,28 +397,36 @@ def _get_podcast_categories(xml_item: str, xml_str: str) -> str:
continue
item_str = xml_str
- episode_category = item_str.split(category_tag)[1]
- if 'text="' in episode_category:
- episode_category = episode_category.split('text="')[1]
- if '"' in episode_category:
- episode_category = episode_category.split('"')[0]
- episode_category = episode_category.lower().replace(' ', '')
- episode_category = episode_category.replace('#', '')
- if episode_category not in podcast_categories:
- if valid_hash_tag(episode_category):
- podcast_categories.append('#' + episode_category)
- continue
+ category_list = item_str.split(category_tag)
+ first_category = True
+ for category_item in category_list:
+ if first_category:
+ first_category = False
+ continue
- if '>' in episode_category:
- episode_category = episode_category.split('>')[1]
- if '<' in episode_category:
- episode_category = episode_category.split('<')[0]
- episode_category = \
- episode_category.lower().replace(' ', '')
- episode_category = episode_category.replace('#', '')
- if episode_category not in podcast_categories:
- if valid_hash_tag(episode_category):
- podcast_categories.append('#' + episode_category)
+ episode_category = category_item
+ if 'text="' in episode_category:
+ episode_category = episode_category.split('text="')[1]
+ if '"' in episode_category:
+ episode_category = episode_category.split('"')[0]
+ episode_category = \
+ episode_category.lower().replace(' ', '')
+ episode_category = episode_category.replace('#', '')
+ if episode_category not in podcast_categories:
+ if valid_hash_tag(episode_category):
+ podcast_categories.append('#' + episode_category)
+ continue
+
+ if '>' in episode_category:
+ episode_category = episode_category.split('>')[1]
+ if '<' in episode_category:
+ episode_category = episode_category.split('<')[0]
+ episode_category = \
+ episode_category.lower().replace(' ', '')
+ episode_category = episode_category.replace('#', '')
+ if episode_category not in podcast_categories:
+ if valid_hash_tag(episode_category):
+ podcast_categories.append('#' + episode_category)
return podcast_categories
From 3aab2753609ef9c60318c84466a1bca678596397 Mon Sep 17 00:00:00 2001
From: Bob Mottram
Date: Thu, 13 Jan 2022 16:12:55 +0000
Subject: [PATCH 15/15] Tidying
---
newswire.py | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/newswire.py b/newswire.py
index 0bfd74688..d30025dfb 100644
--- a/newswire.py
+++ b/newswire.py
@@ -399,12 +399,11 @@ def _get_podcast_categories(xml_item: str, xml_str: str) -> str:
category_list = item_str.split(category_tag)
first_category = True
- for category_item in category_list:
+ for episode_category in category_list:
if first_category:
first_category = False
continue
- episode_category = category_item
if 'text="' in episode_category:
episode_category = episode_category.split('text="')[1]
if '"' in episode_category: