Command option for preferred podcast format

2022-04-22 14:46:42 +01:00 · 2022-04-22 14:46:42 +01:00 · aa18c03acb
parent 637687ca23
commit aa18c03acb
5 changed files with 93 additions and 29 deletions
--- a/daemon.py
+++ b/daemon.py
@ -20628,7 +20628,8 @@ def load_tokens(base_dir: str, tokens_dict: {}, tokens_lookup: {}) -> None:
        break


-def run_daemon(check_actor_timeout: int,
+def run_daemon(preferred_podcast_formats: [],
+               check_actor_timeout: int,
               crawlers_allowed: [],
               dyslexic_font: bool,
               content_license_url: str,
@ -20720,6 +20721,10 @@ def run_daemon(check_actor_timeout: int,
    # scan the theme directory for any svg files containing scripts
    assert not scan_themes_for_scripts(base_dir)

+    # list of preferred podcast formats
+    # eg ['audio/opus', 'audio/mp3']
+    httpd.preferred_podcast_formats = preferred_podcast_formats
+
    # for each account, whether bold reading is enabled
    httpd.bold_reading = load_bold_reading(base_dir)

--- a/epicyon.py
+++ b/epicyon.py
@ -267,6 +267,10 @@ parser.add_argument('--proxy', dest='proxy_port', type=int, default=None,
 parser.add_argument('--path', dest='base_dir',
                    type=str, default=os.getcwd(),
                    help='Directory in which to store posts')
+parser.add_argument('--podcast-formats', dest='podcast_formats',
+                    type=str, default=None,
+                    help='Preferred podcast formats separated by commas. ' +
+                    'eg. "opus, mp3"')
 parser.add_argument('--ytdomain', dest='yt_replace_domain',
                    type=str, default=None,
                    help='Domain used to replace youtube.com')
@ -1138,10 +1142,22 @@ if args.domain:
    domain = args.domain
    set_config_param(base_dir, 'domain', domain)

+preferred_podcast_formats = None
+if args.podcast_formats:
+    podcast_formats = args.podcast_formats.split(',')
+    for pod_format in podcast_formats:
+        pod_format = pod_format.lower().strip()
+        if '/' not in pod_format:
+            pod_format = 'audio/' + pod_format
+        if pod_format in preferred_podcast_formats:
+            continue
+        preferred_podcast_formats.append(pod_format)
+
 if args.rss:
    session = create_session(None)
    testRSS = get_rss(base_dir, domain, session, args.rss,
-                      False, False, 1000, 1000, 1000, 1000, debug)
+                      False, False, 1000, 1000, 1000, 1000, debug,
+                      preferred_podcast_formats)
    pprint(testRSS)
    sys.exit()

@ -3429,7 +3445,8 @@ if args.defaultCurrency:

 if __name__ == "__main__":
    print('allowdeletion: ' + str(args.allowdeletion))
-    run_daemon(args.check_actor_timeout,
+    run_daemon(preferred_podcast_formats,
+               args.check_actor_timeout,
               crawlers_allowed,
               args.dyslexic_font,
               content_license_url,
--- a/newsdaemon.py
+++ b/newsdaemon.py
@ -817,7 +817,8 @@ def run_newswire_daemon(base_dir: str, httpd,
                                   httpd.max_newswire_posts,
                                   httpd.maxCategoriesFeedItemSizeKb,
                                   httpd.system_language,
-                                   httpd.debug)
+                                   httpd.debug,
+                                   httpd.preferred_podcast_formats)

        if not httpd.newswire:
            print('Newswire feeds not updated')
--- a/newswire.py
+++ b/newswire.py
@ -624,8 +624,8 @@ def xml_podcast_to_dict(base_dir: str, xml_item: str, xml_str: str) -> {}:


 def get_link_from_rss_item(rss_item: str,
-                           preferred_mime_types: [] = None,
-                           proxy_type: str = None) -> (str, str):
+                           preferred_mime_types: [],
+                           proxy_type: str) -> (str, str):
    """Extracts rss link from rss item string
    """
    mime_type = None
@ -717,7 +717,8 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
                     max_posts_per_source: int,
                     max_feed_item_size_kb: int,
                     max_categories_feedItem_size_kb: int,
-                     session, debug: bool) -> {}:
+                     session, debug: bool,
+                     preferred_podcast_formats: []) -> {}:
    """Converts an xml RSS 2.0 string to a dictionary
    """
    if '<item>' not in xml_str:
@ -769,7 +770,15 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
                description = description.split('</media:description>')[0]
                description = remove_html(description)

-        link, link_mime_type = get_link_from_rss_item(rss_item)
+        proxy_type = None
+        if domain.endswith('.onion'):
+            proxy_type = 'tor'
+        elif domain.endswith('.i2p'):
+            proxy_type = 'i2p'
+
+        link, link_mime_type = \
+            get_link_from_rss_item(rss_item, preferred_podcast_formats,
+                                   proxy_type)
        if not link:
            continue

@ -812,7 +821,8 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
                     max_posts_per_source: int,
                     max_feed_item_size_kb: int,
                     max_categories_feedItem_size_kb: int,
-                     session, debug: bool) -> {}:
+                     session, debug: bool,
+                     preferred_podcast_formats: []) -> {}:
    """Converts an xml RSS 1.0 string to a dictionary
    https://validator.w3.org/feed/docs/rss1.html
    """
@ -866,7 +876,15 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
                description = description.split('</media:description>')[0]
                description = remove_html(description)

-        link, link_mime_type = get_link_from_rss_item(rss_item)
+        proxy_type = None
+        if domain.endswith('.onion'):
+            proxy_type = 'tor'
+        elif domain.endswith('.i2p'):
+            proxy_type = 'i2p'
+
+        link, link_mime_type = \
+            get_link_from_rss_item(rss_item, preferred_podcast_formats,
+                                   proxy_type)
        if not link:
            continue

@ -908,7 +926,8 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
                       moderated: bool, mirrored: bool,
                       max_posts_per_source: int,
                       max_feed_item_size_kb: int,
-                       session, debug: bool) -> {}:
+                       session, debug: bool,
+                       preferred_podcast_formats: []) -> {}:
    """Converts an atom feed string to a dictionary
    """
    if '<entry>' not in xml_str:
@ -951,7 +970,15 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
                description = description.split('</media:description>')[0]
                description = remove_html(description)

-        link, link_mime_type = get_link_from_rss_item(atom_item)
+        proxy_type = None
+        if domain.endswith('.onion'):
+            proxy_type = 'tor'
+        elif domain.endswith('.i2p'):
+            proxy_type = 'i2p'
+
+        link, link_mime_type = \
+            get_link_from_rss_item(atom_item, preferred_podcast_formats,
+                                   proxy_type)
        if not link:
            continue

@ -1155,7 +1182,7 @@ def _atom_feed_yt_to_dict(base_dir: str, domain: str, xml_str: str,
            description = description.split('</summary>')[0]
            description = remove_html(description)

-        link, _ = get_link_from_rss_item(atom_item)
+        link, _ = get_link_from_rss_item(atom_item, None, None)
        if not link:
            link = atom_item.split('<yt:videoId>')[1]
            link = link.split('</yt:videoId>')[0]
@ -1196,7 +1223,8 @@ def _xml_str_to_dict(base_dir: str, domain: str, xml_str: str,
                     max_posts_per_source: int,
                     max_feed_item_size_kb: int,
                     max_categories_feedItem_size_kb: int,
-                     session, debug: bool) -> {}:
+                     session, debug: bool,
+                     preferred_podcast_formats: []) -> {}:
    """Converts an xml string to a dictionary
    """
    if '<yt:videoId>' in xml_str and '<yt:channelId>' in xml_str:
@ -1211,18 +1239,19 @@ def _xml_str_to_dict(base_dir: str, domain: str, xml_str: str,
                                xml_str, moderated, mirrored,
                                max_posts_per_source, max_feed_item_size_kb,
                                max_categories_feedItem_size_kb,
-                                session, debug)
+                                session, debug,
+                                preferred_podcast_formats)
    if '<?xml version="1.0"' in xml_str:
        return _xml1str_to_dict(base_dir, domain,
                                xml_str, moderated, mirrored,
                                max_posts_per_source, max_feed_item_size_kb,
                                max_categories_feedItem_size_kb,
-                                session, debug)
+                                session, debug, preferred_podcast_formats)
    if 'xmlns="http://www.w3.org/2005/Atom"' in xml_str:
        return _atom_feed_to_dict(base_dir, domain,
                                  xml_str, moderated, mirrored,
                                  max_posts_per_source, max_feed_item_size_kb,
-                                  session, debug)
+                                  session, debug, preferred_podcast_formats)
    if 'https://jsonfeed.org/version/1' in xml_str:
        return _json_feed_v1to_dict(base_dir, domain,
                                    xml_str, moderated, mirrored,
@ -1248,7 +1277,8 @@ def get_rss(base_dir: str, domain: str, session, url: str,
            moderated: bool, mirrored: bool,
            max_posts_per_source: int, max_feed_size_kb: int,
            max_feed_item_size_kb: int,
-            max_categories_feedItem_size_kb: int, debug: bool) -> {}:
+            max_categories_feedItem_size_kb: int, debug: bool,
+            preferred_podcast_formats: []) -> {}:
    """Returns an RSS url as a dict
    """
    if not isinstance(url, str):
@ -1281,7 +1311,8 @@ def get_rss(base_dir: str, domain: str, session, url: str,
                                        max_posts_per_source,
                                        max_feed_item_size_kb,
                                        max_categories_feedItem_size_kb,
-                                        session, debug)
+                                        session, debug,
+                                        preferred_podcast_formats)
            else:
                print('WARN: feed is too large, ' +
                      'or contains invalid characters: ' + url)
@ -1526,7 +1557,8 @@ def get_dict_from_newswire(session, base_dir: str, domain: str,
                           max_tags: int, max_feed_item_size_kb: int,
                           max_newswire_posts: int,
                           max_categories_feedItem_size_kb: int,
-                           system_language: str, debug: bool) -> {}:
+                           system_language: str, debug: bool,
+                           preferred_podcast_formats: []) -> {}:
    """Gets rss feeds as a dictionary from newswire file
    """
    subscriptions_filename = base_dir + '/accounts/newswire.txt'
@ -1567,7 +1599,8 @@ def get_dict_from_newswire(session, base_dir: str, domain: str,
                             moderated, mirrored,
                             max_posts_per_source, max_feed_size_kb,
                             max_feed_item_size_kb,
-                             max_categories_feedItem_size_kb, debug)
+                             max_categories_feedItem_size_kb, debug,
+                             preferred_podcast_formats)
        if items_list:
            for date_str, item in items_list.items():
                result[date_str] = item
--- a/tests.py
+++ b/tests.py
@ -827,8 +827,10 @@ def create_server_alice(path: str, domain: str, port: int,
    dyslexic_font = False
    crawlers_allowed = []
    check_actor_timeout = 2
+    preferred_podcast_formats = None
    print('Server running: Alice')
-    run_daemon(check_actor_timeout,
+    run_daemon(preferred_podcast_formats,
+               check_actor_timeout,
               crawlers_allowed,
               dyslexic_font,
               content_license_url,
@ -984,8 +986,10 @@ def create_server_bob(path: str, domain: str, port: int,
    dyslexic_font = False
    crawlers_allowed = []
    check_actor_timeout = 2
+    preferred_podcast_formats = None
    print('Server running: Bob')
-    run_daemon(check_actor_timeout,
+    run_daemon(preferred_podcast_formats,
+               check_actor_timeout,
               crawlers_allowed,
               dyslexic_font,
               content_license_url,
@ -1064,8 +1068,10 @@ def create_server_eve(path: str, domain: str, port: int, federation_list: [],
    dyslexic_font = False
    crawlers_allowed = []
    check_actor_timeout = 2
+    preferred_podcast_formats = None
    print('Server running: Eve')
-    run_daemon(check_actor_timeout,
+    run_daemon(preferred_podcast_formats,
+               check_actor_timeout,
               crawlers_allowed,
               dyslexic_font,
               content_license_url,
@ -1146,8 +1152,10 @@ def create_server_group(path: str, domain: str, port: int,
    dyslexic_font = False
    crawlers_allowed = []
    check_actor_timeout = 2
+    preferred_podcast_formats = None
    print('Server running: Group')
-    run_daemon(check_actor_timeout,
+    run_daemon(preferred_podcast_formats,
+               check_actor_timeout,
               crawlers_allowed,
               dyslexic_font,
               content_license_url,
@ -6789,7 +6797,7 @@ def _test_xml_podcast_dict(base_dir: str) -> None:
    assert len(podcast_properties['locations']) == 1


-def _test_get_link_from_rss_item() -> None:
+def _test_link_from_rss_item() -> None:
    print('test_get_link_from_rssitem')
    rss_item = \
        '<link>' + \
@ -6844,7 +6852,7 @@ def _test_get_link_from_rss_item() -> None:
        'Hessel-van-Oorschot-of-Tribe-of-Noise--Free-Music-Archive-e1crvce' + \
        '</link>' + \
        '<pubDate>Wed, 12 Jan 2022 14:28:46 GMT</pubDate>'
-    link, mime_type = get_link_from_rss_item(rss_item)
+    link, mime_type = get_link_from_rss_item(rss_item, None, None)
    assert link
    assert link.startswith('https://anchor.fm')
    assert not mime_type
@ -6855,7 +6863,7 @@ def _test_get_link_from_rss_item() -> None:
        'Hessel-van-Oorschot-of-Tribe-of-Noise--Free-Music-Archive-e1crvce' + \
        '"/>' + \
        '<pubDate>Wed, 12 Jan 2022 14:28:46 GMT</pubDate>'
-    link, mime_type = get_link_from_rss_item(rss_item)
+    link, mime_type = get_link_from_rss_item(rss_item, None, None)
    assert link
    assert link.startswith('https://test.link/creativecommons')

@ -7071,7 +7079,7 @@ def run_all_tests():
    _test_bold_reading()
    _test_published_to_local_timezone()
    _test_safe_webtext()
-    _test_get_link_from_rss_item()
+    _test_link_from_rss_item()
    _test_xml_podcast_dict(base_dir)
    _test_get_actor_from_in_reply_to()
    _test_valid_emoji_content()