From cc11b4141caabb3014114b910695d64220241465 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 13 Jan 2022 11:11:18 +0000 Subject: [PATCH 01/15] Podcasts can potentially contain video --- webapp_podcast.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/webapp_podcast.py b/webapp_podcast.py index def903fbb..eeed66f72 100644 --- a/webapp_podcast.py +++ b/webapp_podcast.py @@ -184,6 +184,20 @@ def html_podcast_episode(css_cache: {}, translate: {}, audio_extension.replace('.', '') + '">' + \ translate['Your browser does not support the audio element.'] + \ '\n \n' + elif podcast_properties.get('linkMimeType'): + if 'video' in podcast_properties['linkMimeType']: + video_extension = \ + podcast_properties['linkMimeType'].split('/')[1] + video_msg = 'Your browser does not support the video element.' + podcast_str += \ + '
\n' + \ + ' \n
\n
\n' podcast_title = \ remove_html(html.unescape(urllib.parse.unquote_plus(newswire_item[0]))) From fd4dca4b972c96b9635cba67d45b3012c405a318 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 13 Jan 2022 11:13:24 +0000 Subject: [PATCH 02/15] Don't need center --- webapp_podcast.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/webapp_podcast.py b/webapp_podcast.py index eeed66f72..04f13e881 100644 --- a/webapp_podcast.py +++ b/webapp_podcast.py @@ -190,14 +190,14 @@ def html_podcast_episode(css_cache: {}, translate: {}, podcast_properties['linkMimeType'].split('/')[1] video_msg = 'Your browser does not support the video element.' podcast_str += \ - '
\n' + \ ' \n
\n
\n' + '\n\n' podcast_title = \ remove_html(html.unescape(urllib.parse.unquote_plus(newswire_item[0]))) From 26d0dad6f5d1a07e5b6dfd870b7aa75585500a2e Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 13 Jan 2022 11:19:52 +0000 Subject: [PATCH 03/15] Tidying --- webapp_podcast.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/webapp_podcast.py b/webapp_podcast.py index 04f13e881..55e8c5835 100644 --- a/webapp_podcast.py +++ b/webapp_podcast.py @@ -186,18 +186,14 @@ def html_podcast_episode(css_cache: {}, translate: {}, '\n \n' elif podcast_properties.get('linkMimeType'): if 'video' in podcast_properties['linkMimeType']: - video_extension = \ - podcast_properties['linkMimeType'].split('/')[1] + video_mime_type = podcast_properties['linkMimeType'] video_msg = 'Your browser does not support the video element.' podcast_str += \ - '
\n' + \ - ' \n
\n' + '
\n' + \ + ' \n
\n' podcast_title = \ remove_html(html.unescape(urllib.parse.unquote_plus(newswire_item[0]))) From 45f94eb7c5b1f468e775bcf63a3c59a77dac101d Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 13 Jan 2022 11:24:15 +0000 Subject: [PATCH 04/15] Indentation --- webapp_podcast.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/webapp_podcast.py b/webapp_podcast.py index 55e8c5835..cf43a1ed1 100644 --- a/webapp_podcast.py +++ b/webapp_podcast.py @@ -189,11 +189,12 @@ def html_podcast_episode(css_cache: {}, translate: {}, video_mime_type = podcast_properties['linkMimeType'] video_msg = 'Your browser does not support the video element.' podcast_str += \ - '
\n' + \ + '
\n' + \ ' \n
\n' + 'type="' + video_mime_type + '">' + \ + translate[video_msg] + '\n
\n' podcast_title = \ remove_html(html.unescape(urllib.parse.unquote_plus(newswire_item[0]))) From 4ed62cf0d350aca330edf7d4830006a6ad75a081 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 13 Jan 2022 12:16:42 +0000 Subject: [PATCH 05/15] Ignore the first item in the feed list --- newswire.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/newswire.py b/newswire.py index a61f6eb94..dec0a727c 100644 --- a/newswire.py +++ b/newswire.py @@ -532,7 +532,10 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str, rss_items = xml_str.split('') post_ctr = 0 max_bytes = max_feed_item_size_kb * 1024 + first_item = True for rss_item in rss_items: + if first_item: + continue if not rss_item: continue if len(rss_item) > max_bytes: @@ -625,7 +628,10 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str, rss_items = xml_str.split(item_str) post_ctr = 0 max_bytes = max_feed_item_size_kb * 1024 + first_item = True for rss_item in rss_items: + if first_item: + continue if not rss_item: continue if len(rss_item) > max_bytes: @@ -708,7 +714,10 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str, atom_items = xml_str.split('') post_ctr = 0 max_bytes = max_feed_item_size_kb * 1024 + first_item = True for atom_item in atom_items: + if first_item: + continue if not atom_item: continue if len(atom_item) > max_bytes: From ee14bc2ef27e8740ac9cf3150108ef41cdfa9537 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 13 Jan 2022 12:19:35 +0000 Subject: [PATCH 06/15] Clear first item --- newswire.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/newswire.py b/newswire.py index dec0a727c..c611d07db 100644 --- a/newswire.py +++ b/newswire.py @@ -535,6 +535,7 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str, first_item = True for rss_item in rss_items: if first_item: + first_item = False continue if not rss_item: continue @@ -631,6 +632,7 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str, first_item = True for rss_item in rss_items: if first_item: + first_item = False continue if not rss_item: continue @@ -717,6 +719,7 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str, first_item = True for atom_item in atom_items: if first_item: + first_item = False continue if not atom_item: continue From dd5684ae3cc8e811c76c7870209fd8adf0171464 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 13 Jan 2022 12:37:08 +0000 Subject: [PATCH 07/15] Podcast image can be global to the feed, not per item --- newswire.py | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/newswire.py b/newswire.py index c611d07db..04c3d0ccf 100644 --- a/newswire.py +++ b/newswire.py @@ -379,13 +379,13 @@ def _xml2str_to_hashtag_categories(base_dir: str, xml_str: str, False, force) -def xml_podcast_to_dict(xml_str: str) -> {}: +def xml_podcast_to_dict(xml_item: str, xml_str: str) -> {}: """podcasting extensions for RSS feeds See https://github.com/Podcastindex-org/podcast-namespace/ blob/main/docs/1.0.md """ - if ' {}: "trailers": [] } - pod_lines = xml_str.split('' not in pod_line: @@ -448,9 +448,13 @@ def xml_podcast_to_dict(xml_str: str) -> {}: podcast_episode_image = None episode_image_tags = [' {}: if podcast_episode_image: podcast_properties['image'] = podcast_episode_image - if 'Y' in xml_str or \ - 'T' in xml_str or \ - '1' in xml_str: + if 'Y' in xml_item or \ + 'T' in xml_item or \ + '1' in xml_item: podcast_properties['explicit'] = True else: podcast_properties['explicit'] = False else: - if ' Date: Thu, 13 Jan 2022 13:29:45 +0000 Subject: [PATCH 08/15] Width of audio player on podcast screen --- epicyon-podcast.css | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/epicyon-podcast.css b/epicyon-podcast.css index 5510098c3..b15f9718e 100644 --- a/epicyon-podcast.css +++ b/epicyon-podcast.css @@ -71,6 +71,10 @@ body, html { image-rendering: var(--rendering); } +audio { + width: 90%; +} + a, u { color: var(--options-fg-color); } From f9e33f2d35d13a59f3b10b07649c7dfab530ccb4 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 13 Jan 2022 15:10:41 +0000 Subject: [PATCH 09/15] Get categories from podcast feeds --- content.py | 26 +------------------------- inbox.py | 2 +- newsdaemon.py | 2 +- newswire.py | 34 ++++++++++++++++++++++++++++++++++ tests.py | 4 ++-- utils.py | 28 ++++++++++++++++++++++++++-- 6 files changed, 65 insertions(+), 31 deletions(-) diff --git a/content.py b/content.py index a0412f5a9..2394e21a1 100644 --- a/content.py +++ b/content.py @@ -11,9 +11,9 @@ import os import email.parser import urllib.parse from shutil import copyfile +from utils import valid_hash_tag from utils import dangerous_svg from utils import remove_domain_port -from utils import is_valid_language from utils import get_image_extensions from utils import load_json from utils import save_json @@ -33,17 +33,6 @@ MUSIC_SITES = ('soundcloud.com', 'bandcamp.com') MAX_LINK_LENGTH = 40 -VALID_HASHTAG_CHARS = \ - set('0123456789' + - 'abcdefghijklmnopqrstuvwxyz' + - 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + - '¡¿ÄäÀàÁáÂâÃãÅåǍǎĄąĂăÆæĀā' + - 'ÇçĆćĈĉČčĎđĐďðÈèÉéÊêËëĚěĘęĖėĒē' + - 'ĜĝĢģĞğĤĥÌìÍíÎîÏïıĪīĮįĴĵĶķ' + - 'ĹĺĻļŁłĽľĿŀÑñŃńŇňŅņÖöÒòÓóÔôÕõŐőØøŒœ' + - 'ŔŕŘřẞߌśŜŝŞşŠšȘșŤťŢţÞþȚțÜüÙùÚúÛûŰűŨũŲųŮůŪū' + - 'ŴŵÝýŸÿŶŷŹźŽžŻż') - REMOVE_MARKUP = ( 'b', 'i', 'ul', 'ol', 'li', 'em', 'strong', 'blockquote', 'h1', 'h2', 'h3', 'h4', 'h5' @@ -497,19 +486,6 @@ def add_web_links(content: str) -> str: return content -def valid_hash_tag(hashtag: str) -> bool: - """Returns true if the give hashtag contains valid characters - """ - # long hashtags are not valid - if len(hashtag) >= 32: - return False - if set(hashtag).issubset(VALID_HASHTAG_CHARS): - return True - if is_valid_language(hashtag): - return True - return False - - def _add_hash_tags(word_str: str, http_prefix: str, domain: str, replace_hashtags: {}, post_hashtags: {}) -> bool: """Detects hashtags and adds them to the replacements dict diff --git a/inbox.py b/inbox.py index a4acdf093..51692f5e0 100644 --- a/inbox.py +++ b/inbox.py @@ -61,6 +61,7 @@ from utils import undo_reaction_collection_entry from utils import has_group_type from utils import local_actor_url from utils import has_object_stringType +from utils import valid_hash_tag from categories import get_hashtag_categories from categories import set_hashtag_category from httpsig import get_digest_algorithm_from_headers @@ -119,7 +120,6 @@ from announce import is_self_announce from announce import create_announce from notifyOnPost import notify_when_person_posts from conversation import update_conversation -from content import valid_hash_tag from webapp_hashtagswarm import html_hash_tag_swarm from person import valid_sending_actor diff --git a/newsdaemon.py b/newsdaemon.py index cddb460d4..7233eab66 100644 --- a/newsdaemon.py +++ b/newsdaemon.py @@ -24,7 +24,7 @@ from newswire import get_dict_from_newswire # from posts import send_signed_json from posts import create_news_post from posts import archive_posts_for_person -from content import valid_hash_tag +from utils import valid_hash_tag from utils import get_base_content_from_post from utils import remove_html from utils import get_full_domain diff --git a/newswire.py b/newswire.py index 04c3d0ccf..a17afb3d7 100644 --- a/newswire.py +++ b/newswire.py @@ -18,6 +18,7 @@ from datetime import timezone from collections import OrderedDict from utils import valid_post_date from categories import set_hashtag_category +from utils import valid_hash_tag from utils import dangerous_svg from utils import get_fav_filename_from_url from utils import get_base_content_from_post @@ -470,8 +471,41 @@ def xml_podcast_to_dict(xml_item: str, xml_str: str) -> {}: podcast_episode_image = episode_image break + # get categories if they exist. These can be turned into hashtags + podcast_categories = [] + episode_category_tags = ['' in episode_category: + episode_category = episode_category.split('>')[1] + if '<' in episode_category: + episode_category = episode_category.split('<')[0] + episode_category = \ + episode_category.lower().replace(' ', '') + if episode_category not in podcast_categories: + if valid_hash_tag(episode_category): + podcast_categories.append(episode_category) + continue + if podcast_episode_image: podcast_properties['image'] = podcast_episode_image + podcast_properties['categories'] = podcast_categories if 'Y' in xml_item or \ 'T' in xml_item or \ diff --git a/tests.py b/tests.py index f549b8184..4d086b1f8 100644 --- a/tests.py +++ b/tests.py @@ -82,6 +82,7 @@ from utils import copytree from utils import load_json from utils import save_json from utils import get_status_number +from utils import valid_hash_tag from utils import get_followers_of_person from utils import remove_html from utils import dangerous_markup @@ -132,7 +133,6 @@ from content import get_price_from_string from content import limit_repeated_words from content import switch_words from content import extract_text_fields_in_post -from content import valid_hash_tag from content import html_replace_email_quote from content import html_replace_quote_marks from content import dangerous_css @@ -6428,7 +6428,7 @@ def _test_xml_podcast_dict() -> None: 'address="someaddress2" split="99" />\n' + \ '\n' + \ '' - podcast_properties = xml_podcast_to_dict(xml_str) + podcast_properties = xml_podcast_to_dict(xml_str, xml_str) assert podcast_properties # pprint(podcast_properties) assert podcast_properties.get('valueRecipients') diff --git a/utils.py b/utils.py index 3e1930464..74e6d2b39 100644 --- a/utils.py +++ b/utils.py @@ -20,6 +20,17 @@ from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives import hashes from followingCalendar import add_person_to_calendar +VALID_HASHTAG_CHARS = \ + set('0123456789' + + 'abcdefghijklmnopqrstuvwxyz' + + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + + '¡¿ÄäÀàÁáÂâÃãÅåǍǎĄąĂăÆæĀā' + + 'ÇçĆćĈĉČčĎđĐďðÈèÉéÊêËëĚěĘęĖėĒē' + + 'ĜĝĢģĞğĤĥÌìÍíÎîÏïıĪīĮįĴĵĶķ' + + 'ĹĺĻļŁłĽľĿŀÑñŃńŇňŅņÖöÒòÓóÔôÕõŐőØøŒœ' + + 'ŔŕŘřẞߌśŜŝŞşŠšȘșŤťŢţÞþȚțÜüÙùÚúÛûŰűŨũŲųŮůŪū' + + 'ŴŵÝýŸÿŶŷŹźŽžŻż') + # posts containing these strings will always get screened out, # both incoming and outgoing. # Could include dubious clacks or admin dogwhistles @@ -1798,7 +1809,7 @@ def delete_post(base_dir: str, http_prefix: str, str(post_filename)) -def is_valid_language(text: str) -> bool: +def _is_valid_language(text: str) -> bool: """Returns true if the given text contains a valid natural language string """ @@ -1900,7 +1911,7 @@ def valid_nickname(domain: str, nickname: str) -> bool: return False if len(nickname) > 30: return False - if not is_valid_language(nickname): + if not _is_valid_language(nickname): return False forbidden_chars = ('.', ' ', '/', '?', ':', ';', '@', '#', '!') for char in forbidden_chars: @@ -3288,3 +3299,16 @@ def get_fav_filename_from_url(base_dir: str, favicon_url: str) -> str: if '/favicon.' in favicon_url: favicon_url = favicon_url.replace('/favicon.', '.') return base_dir + '/favicons/' + favicon_url.replace('/', '-') + + +def valid_hash_tag(hashtag: str) -> bool: + """Returns true if the give hashtag contains valid characters + """ + # long hashtags are not valid + if len(hashtag) >= 32: + return False + if set(hashtag).issubset(VALID_HASHTAG_CHARS): + return True + if _is_valid_language(hashtag): + return True + return False From 373116e72c081217617082d9bebeeb54f1287be9 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 13 Jan 2022 15:15:47 +0000 Subject: [PATCH 10/15] Turn podcast categories into hashtags --- newswire.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/newswire.py b/newswire.py index a17afb3d7..026a95875 100644 --- a/newswire.py +++ b/newswire.py @@ -226,6 +226,10 @@ def _add_newswire_dict_entry(base_dir: str, domain: str, # extract hashtags from the text of the feed post post_tags = get_newswire_tags(all_text, max_tags) + # Include tags from podcast categories + if podcast_properties: + post_tags += podcast_properties['categories'] + # combine the tags into a single list for tag in tags: if tag in post_tags: @@ -489,7 +493,7 @@ def xml_podcast_to_dict(xml_item: str, xml_str: str) -> {}: episode_category = episode_category.lower().replace(' ', '') if episode_category not in podcast_categories: if valid_hash_tag(episode_category): - podcast_categories.append(episode_category) + podcast_categories.append('#' + episode_category) continue else: if '>' in episode_category: @@ -500,7 +504,7 @@ def xml_podcast_to_dict(xml_item: str, xml_str: str) -> {}: episode_category.lower().replace(' ', '') if episode_category not in podcast_categories: if valid_hash_tag(episode_category): - podcast_categories.append(episode_category) + podcast_categories.append('#' + episode_category) continue if podcast_episode_image: From 36a4cf3aad6da564e8766222f3b802f96cc4fd7b Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 13 Jan 2022 15:18:39 +0000 Subject: [PATCH 11/15] Avoid any double hashes --- newswire.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/newswire.py b/newswire.py index 026a95875..5888206c0 100644 --- a/newswire.py +++ b/newswire.py @@ -491,6 +491,7 @@ def xml_podcast_to_dict(xml_item: str, xml_str: str) -> {}: if '"' in episode_category: episode_category = episode_category.split('"')[0] episode_category = episode_category.lower().replace(' ', '') + episode_category = episode_category.replace('#', '') if episode_category not in podcast_categories: if valid_hash_tag(episode_category): podcast_categories.append('#' + episode_category) @@ -502,6 +503,7 @@ def xml_podcast_to_dict(xml_item: str, xml_str: str) -> {}: episode_category = episode_category.split('<')[0] episode_category = \ episode_category.lower().replace(' ', '') + episode_category = episode_category.replace('#', '') if episode_category not in podcast_categories: if valid_hash_tag(episode_category): podcast_categories.append('#' + episode_category) From fe063377e6e3aef5dc83030fd39baf93a353949f Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 13 Jan 2022 15:30:55 +0000 Subject: [PATCH 12/15] Tidying --- newswire.py | 72 +++++++++++++++++++++++++++++------------------------ 1 file changed, 40 insertions(+), 32 deletions(-) diff --git a/newswire.py b/newswire.py index 5888206c0..a65dafe47 100644 --- a/newswire.py +++ b/newswire.py @@ -384,6 +384,45 @@ def _xml2str_to_hashtag_categories(base_dir: str, xml_str: str, False, force) +def _get_podcast_categories(xml_item: str, xml_str: str) -> str: + """ get podcast categories if they exist. These can be turned into hashtags + """ + podcast_categories = [] + episode_category_tags = ['' in episode_category: + episode_category = episode_category.split('>')[1] + if '<' in episode_category: + episode_category = episode_category.split('<')[0] + episode_category = \ + episode_category.lower().replace(' ', '') + episode_category = episode_category.replace('#', '') + if episode_category not in podcast_categories: + if valid_hash_tag(episode_category): + podcast_categories.append('#' + episode_category) + + return podcast_categories + + def xml_podcast_to_dict(xml_item: str, xml_str: str) -> {}: """podcasting extensions for RSS feeds See https://github.com/Podcastindex-org/podcast-namespace/ @@ -476,38 +515,7 @@ def xml_podcast_to_dict(xml_item: str, xml_str: str) -> {}: break # get categories if they exist. These can be turned into hashtags - podcast_categories = [] - episode_category_tags = ['' in episode_category: - episode_category = episode_category.split('>')[1] - if '<' in episode_category: - episode_category = episode_category.split('<')[0] - episode_category = \ - episode_category.lower().replace(' ', '') - episode_category = episode_category.replace('#', '') - if episode_category not in podcast_categories: - if valid_hash_tag(episode_category): - podcast_categories.append('#' + episode_category) - continue + podcast_categories = _get_podcast_categories(xml_item, xml_str) if podcast_episode_image: podcast_properties['image'] = podcast_episode_image From 1587ec040478a8a9995da9253c8e9b2a9547c82d Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 13 Jan 2022 15:40:54 +0000 Subject: [PATCH 13/15] Show hashtag categories on podcast screen --- webapp_podcast.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/webapp_podcast.py b/webapp_podcast.py index cf43a1ed1..1f30c36d2 100644 --- a/webapp_podcast.py +++ b/webapp_podcast.py @@ -221,6 +221,14 @@ def html_podcast_episode(css_cache: {}, translate: {}, '">

\n' + if podcast_properties['categories']: + podcast_str += '

' + tags_str = '' + for tag in podcast_properties['categories']: + tag_link = '/users/' + nickname + '/tags/' + tag.replace('#', '') + tags_str += '' + tag + ' ' + podcast_str += tags_str.strip() + '

\n' + podcast_str += _html_podcast_performers(podcast_properties) podcast_str += ' \n' From 41ff8954d7496dafb12bf511a3cbf2609889d9a8 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 13 Jan 2022 16:04:14 +0000 Subject: [PATCH 14/15] Handle multiple categories per podcast item --- newswire.py | 50 +++++++++++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/newswire.py b/newswire.py index a65dafe47..0bfd74688 100644 --- a/newswire.py +++ b/newswire.py @@ -397,28 +397,36 @@ def _get_podcast_categories(xml_item: str, xml_str: str) -> str: continue item_str = xml_str - episode_category = item_str.split(category_tag)[1] - if 'text="' in episode_category: - episode_category = episode_category.split('text="')[1] - if '"' in episode_category: - episode_category = episode_category.split('"')[0] - episode_category = episode_category.lower().replace(' ', '') - episode_category = episode_category.replace('#', '') - if episode_category not in podcast_categories: - if valid_hash_tag(episode_category): - podcast_categories.append('#' + episode_category) - continue + category_list = item_str.split(category_tag) + first_category = True + for category_item in category_list: + if first_category: + first_category = False + continue - if '>' in episode_category: - episode_category = episode_category.split('>')[1] - if '<' in episode_category: - episode_category = episode_category.split('<')[0] - episode_category = \ - episode_category.lower().replace(' ', '') - episode_category = episode_category.replace('#', '') - if episode_category not in podcast_categories: - if valid_hash_tag(episode_category): - podcast_categories.append('#' + episode_category) + episode_category = category_item + if 'text="' in episode_category: + episode_category = episode_category.split('text="')[1] + if '"' in episode_category: + episode_category = episode_category.split('"')[0] + episode_category = \ + episode_category.lower().replace(' ', '') + episode_category = episode_category.replace('#', '') + if episode_category not in podcast_categories: + if valid_hash_tag(episode_category): + podcast_categories.append('#' + episode_category) + continue + + if '>' in episode_category: + episode_category = episode_category.split('>')[1] + if '<' in episode_category: + episode_category = episode_category.split('<')[0] + episode_category = \ + episode_category.lower().replace(' ', '') + episode_category = episode_category.replace('#', '') + if episode_category not in podcast_categories: + if valid_hash_tag(episode_category): + podcast_categories.append('#' + episode_category) return podcast_categories From 3aab2753609ef9c60318c84466a1bca678596397 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 13 Jan 2022 16:12:55 +0000 Subject: [PATCH 15/15] Tidying --- newswire.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/newswire.py b/newswire.py index 0bfd74688..d30025dfb 100644 --- a/newswire.py +++ b/newswire.py @@ -399,12 +399,11 @@ def _get_podcast_categories(xml_item: str, xml_str: str) -> str: category_list = item_str.split(category_tag) first_category = True - for category_item in category_list: + for episode_category in category_list: if first_category: first_category = False continue - episode_category = category_item if 'text="' in episode_category: episode_category = episode_category.split('text="')[1] if '"' in episode_category: