Command option for preferred podcast format

main
Bob Mottram 2022-04-22 14:46:42 +01:00
parent 637687ca23
commit aa18c03acb
5 changed files with 93 additions and 29 deletions

View File

@ -20628,7 +20628,8 @@ def load_tokens(base_dir: str, tokens_dict: {}, tokens_lookup: {}) -> None:
break break
def run_daemon(check_actor_timeout: int, def run_daemon(preferred_podcast_formats: [],
check_actor_timeout: int,
crawlers_allowed: [], crawlers_allowed: [],
dyslexic_font: bool, dyslexic_font: bool,
content_license_url: str, content_license_url: str,
@ -20720,6 +20721,10 @@ def run_daemon(check_actor_timeout: int,
# scan the theme directory for any svg files containing scripts # scan the theme directory for any svg files containing scripts
assert not scan_themes_for_scripts(base_dir) assert not scan_themes_for_scripts(base_dir)
# list of preferred podcast formats
# eg ['audio/opus', 'audio/mp3']
httpd.preferred_podcast_formats = preferred_podcast_formats
# for each account, whether bold reading is enabled # for each account, whether bold reading is enabled
httpd.bold_reading = load_bold_reading(base_dir) httpd.bold_reading = load_bold_reading(base_dir)

View File

@ -267,6 +267,10 @@ parser.add_argument('--proxy', dest='proxy_port', type=int, default=None,
parser.add_argument('--path', dest='base_dir', parser.add_argument('--path', dest='base_dir',
type=str, default=os.getcwd(), type=str, default=os.getcwd(),
help='Directory in which to store posts') help='Directory in which to store posts')
parser.add_argument('--podcast-formats', dest='podcast_formats',
type=str, default=None,
help='Preferred podcast formats separated by commas. ' +
'eg. "opus, mp3"')
parser.add_argument('--ytdomain', dest='yt_replace_domain', parser.add_argument('--ytdomain', dest='yt_replace_domain',
type=str, default=None, type=str, default=None,
help='Domain used to replace youtube.com') help='Domain used to replace youtube.com')
@ -1138,10 +1142,22 @@ if args.domain:
domain = args.domain domain = args.domain
set_config_param(base_dir, 'domain', domain) set_config_param(base_dir, 'domain', domain)
preferred_podcast_formats = None
if args.podcast_formats:
podcast_formats = args.podcast_formats.split(',')
for pod_format in podcast_formats:
pod_format = pod_format.lower().strip()
if '/' not in pod_format:
pod_format = 'audio/' + pod_format
if pod_format in preferred_podcast_formats:
continue
preferred_podcast_formats.append(pod_format)
if args.rss: if args.rss:
session = create_session(None) session = create_session(None)
testRSS = get_rss(base_dir, domain, session, args.rss, testRSS = get_rss(base_dir, domain, session, args.rss,
False, False, 1000, 1000, 1000, 1000, debug) False, False, 1000, 1000, 1000, 1000, debug,
preferred_podcast_formats)
pprint(testRSS) pprint(testRSS)
sys.exit() sys.exit()
@ -3429,7 +3445,8 @@ if args.defaultCurrency:
if __name__ == "__main__": if __name__ == "__main__":
print('allowdeletion: ' + str(args.allowdeletion)) print('allowdeletion: ' + str(args.allowdeletion))
run_daemon(args.check_actor_timeout, run_daemon(preferred_podcast_formats,
args.check_actor_timeout,
crawlers_allowed, crawlers_allowed,
args.dyslexic_font, args.dyslexic_font,
content_license_url, content_license_url,

View File

@ -817,7 +817,8 @@ def run_newswire_daemon(base_dir: str, httpd,
httpd.max_newswire_posts, httpd.max_newswire_posts,
httpd.maxCategoriesFeedItemSizeKb, httpd.maxCategoriesFeedItemSizeKb,
httpd.system_language, httpd.system_language,
httpd.debug) httpd.debug,
httpd.preferred_podcast_formats)
if not httpd.newswire: if not httpd.newswire:
print('Newswire feeds not updated') print('Newswire feeds not updated')

View File

@ -624,8 +624,8 @@ def xml_podcast_to_dict(base_dir: str, xml_item: str, xml_str: str) -> {}:
def get_link_from_rss_item(rss_item: str, def get_link_from_rss_item(rss_item: str,
preferred_mime_types: [] = None, preferred_mime_types: [],
proxy_type: str = None) -> (str, str): proxy_type: str) -> (str, str):
"""Extracts rss link from rss item string """Extracts rss link from rss item string
""" """
mime_type = None mime_type = None
@ -717,7 +717,8 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
max_posts_per_source: int, max_posts_per_source: int,
max_feed_item_size_kb: int, max_feed_item_size_kb: int,
max_categories_feedItem_size_kb: int, max_categories_feedItem_size_kb: int,
session, debug: bool) -> {}: session, debug: bool,
preferred_podcast_formats: []) -> {}:
"""Converts an xml RSS 2.0 string to a dictionary """Converts an xml RSS 2.0 string to a dictionary
""" """
if '<item>' not in xml_str: if '<item>' not in xml_str:
@ -769,7 +770,15 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
description = description.split('</media:description>')[0] description = description.split('</media:description>')[0]
description = remove_html(description) description = remove_html(description)
link, link_mime_type = get_link_from_rss_item(rss_item) proxy_type = None
if domain.endswith('.onion'):
proxy_type = 'tor'
elif domain.endswith('.i2p'):
proxy_type = 'i2p'
link, link_mime_type = \
get_link_from_rss_item(rss_item, preferred_podcast_formats,
proxy_type)
if not link: if not link:
continue continue
@ -812,7 +821,8 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
max_posts_per_source: int, max_posts_per_source: int,
max_feed_item_size_kb: int, max_feed_item_size_kb: int,
max_categories_feedItem_size_kb: int, max_categories_feedItem_size_kb: int,
session, debug: bool) -> {}: session, debug: bool,
preferred_podcast_formats: []) -> {}:
"""Converts an xml RSS 1.0 string to a dictionary """Converts an xml RSS 1.0 string to a dictionary
https://validator.w3.org/feed/docs/rss1.html https://validator.w3.org/feed/docs/rss1.html
""" """
@ -866,7 +876,15 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
description = description.split('</media:description>')[0] description = description.split('</media:description>')[0]
description = remove_html(description) description = remove_html(description)
link, link_mime_type = get_link_from_rss_item(rss_item) proxy_type = None
if domain.endswith('.onion'):
proxy_type = 'tor'
elif domain.endswith('.i2p'):
proxy_type = 'i2p'
link, link_mime_type = \
get_link_from_rss_item(rss_item, preferred_podcast_formats,
proxy_type)
if not link: if not link:
continue continue
@ -908,7 +926,8 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
moderated: bool, mirrored: bool, moderated: bool, mirrored: bool,
max_posts_per_source: int, max_posts_per_source: int,
max_feed_item_size_kb: int, max_feed_item_size_kb: int,
session, debug: bool) -> {}: session, debug: bool,
preferred_podcast_formats: []) -> {}:
"""Converts an atom feed string to a dictionary """Converts an atom feed string to a dictionary
""" """
if '<entry>' not in xml_str: if '<entry>' not in xml_str:
@ -951,7 +970,15 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
description = description.split('</media:description>')[0] description = description.split('</media:description>')[0]
description = remove_html(description) description = remove_html(description)
link, link_mime_type = get_link_from_rss_item(atom_item) proxy_type = None
if domain.endswith('.onion'):
proxy_type = 'tor'
elif domain.endswith('.i2p'):
proxy_type = 'i2p'
link, link_mime_type = \
get_link_from_rss_item(atom_item, preferred_podcast_formats,
proxy_type)
if not link: if not link:
continue continue
@ -1155,7 +1182,7 @@ def _atom_feed_yt_to_dict(base_dir: str, domain: str, xml_str: str,
description = description.split('</summary>')[0] description = description.split('</summary>')[0]
description = remove_html(description) description = remove_html(description)
link, _ = get_link_from_rss_item(atom_item) link, _ = get_link_from_rss_item(atom_item, None, None)
if not link: if not link:
link = atom_item.split('<yt:videoId>')[1] link = atom_item.split('<yt:videoId>')[1]
link = link.split('</yt:videoId>')[0] link = link.split('</yt:videoId>')[0]
@ -1196,7 +1223,8 @@ def _xml_str_to_dict(base_dir: str, domain: str, xml_str: str,
max_posts_per_source: int, max_posts_per_source: int,
max_feed_item_size_kb: int, max_feed_item_size_kb: int,
max_categories_feedItem_size_kb: int, max_categories_feedItem_size_kb: int,
session, debug: bool) -> {}: session, debug: bool,
preferred_podcast_formats: []) -> {}:
"""Converts an xml string to a dictionary """Converts an xml string to a dictionary
""" """
if '<yt:videoId>' in xml_str and '<yt:channelId>' in xml_str: if '<yt:videoId>' in xml_str and '<yt:channelId>' in xml_str:
@ -1211,18 +1239,19 @@ def _xml_str_to_dict(base_dir: str, domain: str, xml_str: str,
xml_str, moderated, mirrored, xml_str, moderated, mirrored,
max_posts_per_source, max_feed_item_size_kb, max_posts_per_source, max_feed_item_size_kb,
max_categories_feedItem_size_kb, max_categories_feedItem_size_kb,
session, debug) session, debug,
preferred_podcast_formats)
if '<?xml version="1.0"' in xml_str: if '<?xml version="1.0"' in xml_str:
return _xml1str_to_dict(base_dir, domain, return _xml1str_to_dict(base_dir, domain,
xml_str, moderated, mirrored, xml_str, moderated, mirrored,
max_posts_per_source, max_feed_item_size_kb, max_posts_per_source, max_feed_item_size_kb,
max_categories_feedItem_size_kb, max_categories_feedItem_size_kb,
session, debug) session, debug, preferred_podcast_formats)
if 'xmlns="http://www.w3.org/2005/Atom"' in xml_str: if 'xmlns="http://www.w3.org/2005/Atom"' in xml_str:
return _atom_feed_to_dict(base_dir, domain, return _atom_feed_to_dict(base_dir, domain,
xml_str, moderated, mirrored, xml_str, moderated, mirrored,
max_posts_per_source, max_feed_item_size_kb, max_posts_per_source, max_feed_item_size_kb,
session, debug) session, debug, preferred_podcast_formats)
if 'https://jsonfeed.org/version/1' in xml_str: if 'https://jsonfeed.org/version/1' in xml_str:
return _json_feed_v1to_dict(base_dir, domain, return _json_feed_v1to_dict(base_dir, domain,
xml_str, moderated, mirrored, xml_str, moderated, mirrored,
@ -1248,7 +1277,8 @@ def get_rss(base_dir: str, domain: str, session, url: str,
moderated: bool, mirrored: bool, moderated: bool, mirrored: bool,
max_posts_per_source: int, max_feed_size_kb: int, max_posts_per_source: int, max_feed_size_kb: int,
max_feed_item_size_kb: int, max_feed_item_size_kb: int,
max_categories_feedItem_size_kb: int, debug: bool) -> {}: max_categories_feedItem_size_kb: int, debug: bool,
preferred_podcast_formats: []) -> {}:
"""Returns an RSS url as a dict """Returns an RSS url as a dict
""" """
if not isinstance(url, str): if not isinstance(url, str):
@ -1281,7 +1311,8 @@ def get_rss(base_dir: str, domain: str, session, url: str,
max_posts_per_source, max_posts_per_source,
max_feed_item_size_kb, max_feed_item_size_kb,
max_categories_feedItem_size_kb, max_categories_feedItem_size_kb,
session, debug) session, debug,
preferred_podcast_formats)
else: else:
print('WARN: feed is too large, ' + print('WARN: feed is too large, ' +
'or contains invalid characters: ' + url) 'or contains invalid characters: ' + url)
@ -1526,7 +1557,8 @@ def get_dict_from_newswire(session, base_dir: str, domain: str,
max_tags: int, max_feed_item_size_kb: int, max_tags: int, max_feed_item_size_kb: int,
max_newswire_posts: int, max_newswire_posts: int,
max_categories_feedItem_size_kb: int, max_categories_feedItem_size_kb: int,
system_language: str, debug: bool) -> {}: system_language: str, debug: bool,
preferred_podcast_formats: []) -> {}:
"""Gets rss feeds as a dictionary from newswire file """Gets rss feeds as a dictionary from newswire file
""" """
subscriptions_filename = base_dir + '/accounts/newswire.txt' subscriptions_filename = base_dir + '/accounts/newswire.txt'
@ -1567,7 +1599,8 @@ def get_dict_from_newswire(session, base_dir: str, domain: str,
moderated, mirrored, moderated, mirrored,
max_posts_per_source, max_feed_size_kb, max_posts_per_source, max_feed_size_kb,
max_feed_item_size_kb, max_feed_item_size_kb,
max_categories_feedItem_size_kb, debug) max_categories_feedItem_size_kb, debug,
preferred_podcast_formats)
if items_list: if items_list:
for date_str, item in items_list.items(): for date_str, item in items_list.items():
result[date_str] = item result[date_str] = item

View File

@ -827,8 +827,10 @@ def create_server_alice(path: str, domain: str, port: int,
dyslexic_font = False dyslexic_font = False
crawlers_allowed = [] crawlers_allowed = []
check_actor_timeout = 2 check_actor_timeout = 2
preferred_podcast_formats = None
print('Server running: Alice') print('Server running: Alice')
run_daemon(check_actor_timeout, run_daemon(preferred_podcast_formats,
check_actor_timeout,
crawlers_allowed, crawlers_allowed,
dyslexic_font, dyslexic_font,
content_license_url, content_license_url,
@ -984,8 +986,10 @@ def create_server_bob(path: str, domain: str, port: int,
dyslexic_font = False dyslexic_font = False
crawlers_allowed = [] crawlers_allowed = []
check_actor_timeout = 2 check_actor_timeout = 2
preferred_podcast_formats = None
print('Server running: Bob') print('Server running: Bob')
run_daemon(check_actor_timeout, run_daemon(preferred_podcast_formats,
check_actor_timeout,
crawlers_allowed, crawlers_allowed,
dyslexic_font, dyslexic_font,
content_license_url, content_license_url,
@ -1064,8 +1068,10 @@ def create_server_eve(path: str, domain: str, port: int, federation_list: [],
dyslexic_font = False dyslexic_font = False
crawlers_allowed = [] crawlers_allowed = []
check_actor_timeout = 2 check_actor_timeout = 2
preferred_podcast_formats = None
print('Server running: Eve') print('Server running: Eve')
run_daemon(check_actor_timeout, run_daemon(preferred_podcast_formats,
check_actor_timeout,
crawlers_allowed, crawlers_allowed,
dyslexic_font, dyslexic_font,
content_license_url, content_license_url,
@ -1146,8 +1152,10 @@ def create_server_group(path: str, domain: str, port: int,
dyslexic_font = False dyslexic_font = False
crawlers_allowed = [] crawlers_allowed = []
check_actor_timeout = 2 check_actor_timeout = 2
preferred_podcast_formats = None
print('Server running: Group') print('Server running: Group')
run_daemon(check_actor_timeout, run_daemon(preferred_podcast_formats,
check_actor_timeout,
crawlers_allowed, crawlers_allowed,
dyslexic_font, dyslexic_font,
content_license_url, content_license_url,
@ -6789,7 +6797,7 @@ def _test_xml_podcast_dict(base_dir: str) -> None:
assert len(podcast_properties['locations']) == 1 assert len(podcast_properties['locations']) == 1
def _test_get_link_from_rss_item() -> None: def _test_link_from_rss_item() -> None:
print('test_get_link_from_rssitem') print('test_get_link_from_rssitem')
rss_item = \ rss_item = \
'<link>' + \ '<link>' + \
@ -6844,7 +6852,7 @@ def _test_get_link_from_rss_item() -> None:
'Hessel-van-Oorschot-of-Tribe-of-Noise--Free-Music-Archive-e1crvce' + \ 'Hessel-van-Oorschot-of-Tribe-of-Noise--Free-Music-Archive-e1crvce' + \
'</link>' + \ '</link>' + \
'<pubDate>Wed, 12 Jan 2022 14:28:46 GMT</pubDate>' '<pubDate>Wed, 12 Jan 2022 14:28:46 GMT</pubDate>'
link, mime_type = get_link_from_rss_item(rss_item) link, mime_type = get_link_from_rss_item(rss_item, None, None)
assert link assert link
assert link.startswith('https://anchor.fm') assert link.startswith('https://anchor.fm')
assert not mime_type assert not mime_type
@ -6855,7 +6863,7 @@ def _test_get_link_from_rss_item() -> None:
'Hessel-van-Oorschot-of-Tribe-of-Noise--Free-Music-Archive-e1crvce' + \ 'Hessel-van-Oorschot-of-Tribe-of-Noise--Free-Music-Archive-e1crvce' + \
'"/>' + \ '"/>' + \
'<pubDate>Wed, 12 Jan 2022 14:28:46 GMT</pubDate>' '<pubDate>Wed, 12 Jan 2022 14:28:46 GMT</pubDate>'
link, mime_type = get_link_from_rss_item(rss_item) link, mime_type = get_link_from_rss_item(rss_item, None, None)
assert link assert link
assert link.startswith('https://test.link/creativecommons') assert link.startswith('https://test.link/creativecommons')
@ -7071,7 +7079,7 @@ def run_all_tests():
_test_bold_reading() _test_bold_reading()
_test_published_to_local_timezone() _test_published_to_local_timezone()
_test_safe_webtext() _test_safe_webtext()
_test_get_link_from_rss_item() _test_link_from_rss_item()
_test_xml_podcast_dict(base_dir) _test_xml_podcast_dict(base_dir)
_test_get_actor_from_in_reply_to() _test_get_actor_from_in_reply_to()
_test_valid_emoji_content() _test_valid_emoji_content()