Extract any extra links from atom feed items for display on podcast screen

main
Bob Mottram 2024-06-05 20:34:38 +01:00
parent 8433d9069e
commit 7524d7656d
2 changed files with 64 additions and 11 deletions

View File

@ -215,7 +215,8 @@ def _add_newswire_dict_entry(base_dir: str,
max_tags: int, session, debug: bool,
podcast_properties: {},
system_language: str,
fediverse_handle: str) -> None:
fediverse_handle: str,
extra_links: []) -> None:
"""Update the newswire dictionary
"""
# remove any markup
@ -268,7 +269,8 @@ def _add_newswire_dict_entry(base_dir: str,
post_tags,
mirrored,
podcast_properties,
fediverse_handle
fediverse_handle,
extra_links
]
@ -872,6 +874,7 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
if podcast_properties:
podcast_properties['linkMimeType'] = link_mime_type
fediverse_handle = ''
extra_links = []
_add_newswire_dict_entry(base_dir,
result, pub_date_str,
title, link,
@ -879,7 +882,7 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
description, moderated,
mirrored, [], 32, session, debug,
podcast_properties, system_language,
fediverse_handle)
fediverse_handle, extra_links)
post_ctr += 1
if post_ctr >= max_posts_per_source:
break
@ -988,6 +991,7 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
if podcast_properties:
podcast_properties['linkMimeType'] = link_mime_type
fediverse_handle = ''
extra_links = []
_add_newswire_dict_entry(base_dir,
result, pub_date_str,
title, link,
@ -995,7 +999,7 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
description, moderated,
mirrored, [], 32, session, debug,
podcast_properties, system_language,
fediverse_handle)
fediverse_handle, extra_links)
post_ctr += 1
if post_ctr >= max_posts_per_source:
break
@ -1084,6 +1088,33 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
not is_local_network_address(actor_uri):
fediverse_handle = actor_uri
# are there any extra links?
extra_links = []
if '<activity:object>' in atom_item and \
'</activity:object>' in atom_item:
obj_str = atom_item.split('<activity:object>')[1]
obj_str = \
unescaped_text(obj_str.split('</activity:object>')[0])
obj_str = remove_script(obj_str, None, None, None)
sections = obj_str.split('<link ')
ctr = 0
for section_str in sections:
if ctr == 0:
ctr = 1
continue
if '>' in section_str:
link_str = section_str.split('>')[0]
if 'href="' in link_str and \
'rel="preview"' not in link_str:
link_str = link_str.split('href="')[1]
if '"' in link_str:
link_str = link_str.split('"')[0]
link_str = remove_html(link_str)
if resembles_url(link_str) and \
not is_local_network_address(link_str):
if link_str not in extra_links:
extra_links.append(link_str)
proxy_type = None
if domain.endswith('.onion'):
proxy_type = 'tor'
@ -1122,7 +1153,7 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
description, moderated,
mirrored, [], 32, session, debug,
podcast_properties, system_language,
fediverse_handle)
fediverse_handle, extra_links)
post_ctr += 1
if post_ctr >= max_posts_per_source:
break
@ -1232,6 +1263,7 @@ def _json_feed_v1to_dict(base_dir: str, xml_str: str,
post_filename = ''
votes_status = []
fediverse_handle = ''
extra_links = []
_add_newswire_dict_entry(base_dir,
result, pub_date_str,
title, link,
@ -1239,7 +1271,7 @@ def _json_feed_v1to_dict(base_dir: str, xml_str: str,
description, moderated,
mirrored, [], 32, session, debug,
None, system_language,
fediverse_handle)
fediverse_handle, extra_links)
post_ctr += 1
if post_ctr >= max_posts_per_source:
break
@ -1337,6 +1369,7 @@ def _atom_feed_yt_to_dict(base_dir: str, xml_str: str,
if podcast_properties:
podcast_properties['linkMimeType'] = 'video/youtube'
fediverse_handle = ''
extra_links = []
_add_newswire_dict_entry(base_dir,
result, pub_date_str,
title, link,
@ -1344,7 +1377,7 @@ def _atom_feed_yt_to_dict(base_dir: str, xml_str: str,
description, moderated, mirrored,
[], 32, session, debug,
podcast_properties, system_language,
fediverse_handle)
fediverse_handle, extra_links)
post_ctr += 1
if post_ctr >= max_posts_per_source:
break
@ -1633,6 +1666,7 @@ def _add_account_blogs_to_newswire(base_dir: str, nickname: str, domain: str,
get_url_from_post(post_json_object['object']['url'])
url2 = remove_html(url_str)
fediverse_handle = ''
extra_links = []
_add_newswire_dict_entry(base_dir,
newswire, published,
summary, url2,
@ -1641,7 +1675,7 @@ def _add_account_blogs_to_newswire(base_dir: str, nickname: str, domain: str,
tags_from_post,
max_tags, session, debug,
None, system_language,
fediverse_handle)
fediverse_handle, extra_links)
ctr += 1
if ctr >= max_blogs_per_account:

View File

@ -12,6 +12,7 @@ import html
import datetime
import urllib.parse
from shutil import copyfile
from utils import resembles_url
from utils import get_nickname_from_actor
from utils import get_domain_from_actor
from utils import data_dir
@ -460,9 +461,27 @@ def html_podcast_episode(translate: {},
fediverse_handle = newswire_item[9]
podcast_nickname = get_nickname_from_actor(fediverse_handle)
podcast_domain, _ = get_domain_from_actor(fediverse_handle)
podcast_str += \
'<p><a href="' + fediverse_handle + '">' + \
podcast_nickname + '@' + podcast_domain + '</a></p>\n'
if podcast_nickname and podcast_domain:
podcast_str += \
'<p><a href="' + fediverse_handle + '">' + \
podcast_nickname + '@' + podcast_domain + '</a></p>\n'
extra_links = []
if len(newswire_item) > 10:
extra_links = newswire_item[10]
if extra_links:
links_text = ''
for link_str in extra_links:
link_str = remove_html(link_str)
if not resembles_url(link_str):
continue
if not links_text:
links_text = '<p>\n'
links_text += \
'<a href="' + link_str + '">' + link_str + '</a><br>\n'
if links_text:
links_text += '</p>\n'
podcast_str += links_text
if podcast_properties['categories']:
tags_str = ''