Extract any extra links from atom feed items for display on podcast screen

2024-06-05 20:34:38 +01:00 · 2024-06-05 20:34:38 +01:00 · 7524d7656d
parent 8433d9069e
commit 7524d7656d
2 changed files with 64 additions and 11 deletions
--- a/newswire.py
+++ b/newswire.py
@ -215,7 +215,8 @@ def _add_newswire_dict_entry(base_dir: str,
                             max_tags: int, session, debug: bool,
                             podcast_properties: {},
                             system_language: str,
-                             fediverse_handle: str) -> None:
+                             fediverse_handle: str,
                             extra_links: []) -> None:
    """Update the newswire dictionary
    """
    # remove any markup
@ -268,7 +269,8 @@ def _add_newswire_dict_entry(base_dir: str,
        post_tags,
        mirrored,
        podcast_properties,
-        fediverse_handle
+        fediverse_handle,
        extra_links
    ]
@ -872,6 +874,7 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
                if podcast_properties:
                    podcast_properties['linkMimeType'] = link_mime_type
                fediverse_handle = ''
                extra_links = []
                _add_newswire_dict_entry(base_dir,
                                         result, pub_date_str,
                                         title, link,
@ -879,7 +882,7 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
                                         description, moderated,
                                         mirrored, [], 32, session, debug,
                                         podcast_properties, system_language,
-                                         fediverse_handle)
+                                         fediverse_handle, extra_links)
                post_ctr += 1
                if post_ctr >= max_posts_per_source:
                    break
@ -988,6 +991,7 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
                if podcast_properties:
                    podcast_properties['linkMimeType'] = link_mime_type
                fediverse_handle = ''
                extra_links = []
                _add_newswire_dict_entry(base_dir,
                                         result, pub_date_str,
                                         title, link,
@ -995,7 +999,7 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
                                         description, moderated,
                                         mirrored, [], 32, session, debug,
                                         podcast_properties, system_language,
-                                         fediverse_handle)
+                                         fediverse_handle, extra_links)
                post_ctr += 1
                if post_ctr >= max_posts_per_source:
                    break
@ -1084,6 +1088,33 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
                       not is_local_network_address(actor_uri):
                        fediverse_handle = actor_uri
        # are there any extra links?
        extra_links = []
        if '<activity:object>' in atom_item and \
           '</activity:object>' in atom_item:
            obj_str = atom_item.split('<activity:object>')[1]
            obj_str = \
                unescaped_text(obj_str.split('</activity:object>')[0])
            obj_str = remove_script(obj_str, None, None, None)
            sections = obj_str.split('<link ')
            ctr = 0
            for section_str in sections:
                if ctr == 0:
                    ctr = 1
                    continue
                if '>' in section_str:
                    link_str = section_str.split('>')[0]
                    if 'href="' in link_str and \
                       'rel="preview"' not in link_str:
                        link_str = link_str.split('href="')[1]
                        if '"' in link_str:
                            link_str = link_str.split('"')[0]
                            link_str = remove_html(link_str)
                            if resembles_url(link_str) and \
                               not is_local_network_address(link_str):
                                if link_str not in extra_links:
                                    extra_links.append(link_str)
        proxy_type = None
        if domain.endswith('.onion'):
            proxy_type = 'tor'
@ -1122,7 +1153,7 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
                                         description, moderated,
                                         mirrored, [], 32, session, debug,
                                         podcast_properties, system_language,
-                                         fediverse_handle)
+                                         fediverse_handle, extra_links)
                post_ctr += 1
                if post_ctr >= max_posts_per_source:
                    break
@ -1232,6 +1263,7 @@ def _json_feed_v1to_dict(base_dir: str, xml_str: str,
                post_filename = ''
                votes_status = []
                fediverse_handle = ''
                extra_links = []
                _add_newswire_dict_entry(base_dir,
                                         result, pub_date_str,
                                         title, link,
@ -1239,7 +1271,7 @@ def _json_feed_v1to_dict(base_dir: str, xml_str: str,
                                         description, moderated,
                                         mirrored, [], 32, session, debug,
                                         None, system_language,
-                                         fediverse_handle)
+                                         fediverse_handle, extra_links)
                post_ctr += 1
                if post_ctr >= max_posts_per_source:
                    break
@ -1337,6 +1369,7 @@ def _atom_feed_yt_to_dict(base_dir: str, xml_str: str,
                if podcast_properties:
                    podcast_properties['linkMimeType'] = 'video/youtube'
                fediverse_handle = ''
                extra_links = []
                _add_newswire_dict_entry(base_dir,
                                         result, pub_date_str,
                                         title, link,
@ -1344,7 +1377,7 @@ def _atom_feed_yt_to_dict(base_dir: str, xml_str: str,
                                         description, moderated, mirrored,
                                         [], 32, session, debug,
                                         podcast_properties, system_language,
-                                         fediverse_handle)
+                                         fediverse_handle, extra_links)
                post_ctr += 1
                if post_ctr >= max_posts_per_source:
                    break
@ -1633,6 +1666,7 @@ def _add_account_blogs_to_newswire(base_dir: str, nickname: str, domain: str,
                        get_url_from_post(post_json_object['object']['url'])
                    url2 = remove_html(url_str)
                    fediverse_handle = ''
                    extra_links = []
                    _add_newswire_dict_entry(base_dir,
                                             newswire, published,
                                             summary, url2,
@ -1641,7 +1675,7 @@ def _add_account_blogs_to_newswire(base_dir: str, nickname: str, domain: str,
                                             tags_from_post,
                                             max_tags, session, debug,
                                             None, system_language,
-                                             fediverse_handle)
+                                             fediverse_handle, extra_links)
            ctr += 1
            if ctr >= max_blogs_per_account:
--- a/webapp_podcast.py
+++ b/webapp_podcast.py
@ -12,6 +12,7 @@ import html
 import datetime
 import urllib.parse
 from shutil import copyfile
 from utils import resembles_url
 from utils import get_nickname_from_actor
 from utils import get_domain_from_actor
 from utils import data_dir
@ -460,9 +461,27 @@ def html_podcast_episode(translate: {},
        fediverse_handle = newswire_item[9]
        podcast_nickname = get_nickname_from_actor(fediverse_handle)
        podcast_domain, _ = get_domain_from_actor(fediverse_handle)
-        podcast_str += \
+        if podcast_nickname and podcast_domain:
-            '<p><a href="' + fediverse_handle + '">' + \
+            podcast_str += \
-            podcast_nickname + '@' + podcast_domain + '</a></p>\n'
+                '<p><a href="' + fediverse_handle + '">' + \
                podcast_nickname + '@' + podcast_domain + '</a></p>\n'
    extra_links = []
    if len(newswire_item) > 10:
        extra_links = newswire_item[10]
        if extra_links:
            links_text = ''
            for link_str in extra_links:
                link_str = remove_html(link_str)
                if not resembles_url(link_str):
                    continue
                if not links_text:
                    links_text = '<p>\n'
                links_text += \
                    '<a href="' + link_str + '">' + link_str + '</a><br>\n'
            if links_text:
                links_text += '</p>\n'
                podcast_str += links_text
    if podcast_properties['categories']:
        tags_str = ''