mirror of https://gitlab.com/bashrc2/epicyon
Extract any extra links from atom feed items for display on podcast screen
parent
8433d9069e
commit
7524d7656d
50
newswire.py
50
newswire.py
|
@ -215,7 +215,8 @@ def _add_newswire_dict_entry(base_dir: str,
|
|||
max_tags: int, session, debug: bool,
|
||||
podcast_properties: {},
|
||||
system_language: str,
|
||||
fediverse_handle: str) -> None:
|
||||
fediverse_handle: str,
|
||||
extra_links: []) -> None:
|
||||
"""Update the newswire dictionary
|
||||
"""
|
||||
# remove any markup
|
||||
|
@ -268,7 +269,8 @@ def _add_newswire_dict_entry(base_dir: str,
|
|||
post_tags,
|
||||
mirrored,
|
||||
podcast_properties,
|
||||
fediverse_handle
|
||||
fediverse_handle,
|
||||
extra_links
|
||||
]
|
||||
|
||||
|
||||
|
@ -872,6 +874,7 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
if podcast_properties:
|
||||
podcast_properties['linkMimeType'] = link_mime_type
|
||||
fediverse_handle = ''
|
||||
extra_links = []
|
||||
_add_newswire_dict_entry(base_dir,
|
||||
result, pub_date_str,
|
||||
title, link,
|
||||
|
@ -879,7 +882,7 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
description, moderated,
|
||||
mirrored, [], 32, session, debug,
|
||||
podcast_properties, system_language,
|
||||
fediverse_handle)
|
||||
fediverse_handle, extra_links)
|
||||
post_ctr += 1
|
||||
if post_ctr >= max_posts_per_source:
|
||||
break
|
||||
|
@ -988,6 +991,7 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
if podcast_properties:
|
||||
podcast_properties['linkMimeType'] = link_mime_type
|
||||
fediverse_handle = ''
|
||||
extra_links = []
|
||||
_add_newswire_dict_entry(base_dir,
|
||||
result, pub_date_str,
|
||||
title, link,
|
||||
|
@ -995,7 +999,7 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
description, moderated,
|
||||
mirrored, [], 32, session, debug,
|
||||
podcast_properties, system_language,
|
||||
fediverse_handle)
|
||||
fediverse_handle, extra_links)
|
||||
post_ctr += 1
|
||||
if post_ctr >= max_posts_per_source:
|
||||
break
|
||||
|
@ -1084,6 +1088,33 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
not is_local_network_address(actor_uri):
|
||||
fediverse_handle = actor_uri
|
||||
|
||||
# are there any extra links?
|
||||
extra_links = []
|
||||
if '<activity:object>' in atom_item and \
|
||||
'</activity:object>' in atom_item:
|
||||
obj_str = atom_item.split('<activity:object>')[1]
|
||||
obj_str = \
|
||||
unescaped_text(obj_str.split('</activity:object>')[0])
|
||||
obj_str = remove_script(obj_str, None, None, None)
|
||||
sections = obj_str.split('<link ')
|
||||
ctr = 0
|
||||
for section_str in sections:
|
||||
if ctr == 0:
|
||||
ctr = 1
|
||||
continue
|
||||
if '>' in section_str:
|
||||
link_str = section_str.split('>')[0]
|
||||
if 'href="' in link_str and \
|
||||
'rel="preview"' not in link_str:
|
||||
link_str = link_str.split('href="')[1]
|
||||
if '"' in link_str:
|
||||
link_str = link_str.split('"')[0]
|
||||
link_str = remove_html(link_str)
|
||||
if resembles_url(link_str) and \
|
||||
not is_local_network_address(link_str):
|
||||
if link_str not in extra_links:
|
||||
extra_links.append(link_str)
|
||||
|
||||
proxy_type = None
|
||||
if domain.endswith('.onion'):
|
||||
proxy_type = 'tor'
|
||||
|
@ -1122,7 +1153,7 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
description, moderated,
|
||||
mirrored, [], 32, session, debug,
|
||||
podcast_properties, system_language,
|
||||
fediverse_handle)
|
||||
fediverse_handle, extra_links)
|
||||
post_ctr += 1
|
||||
if post_ctr >= max_posts_per_source:
|
||||
break
|
||||
|
@ -1232,6 +1263,7 @@ def _json_feed_v1to_dict(base_dir: str, xml_str: str,
|
|||
post_filename = ''
|
||||
votes_status = []
|
||||
fediverse_handle = ''
|
||||
extra_links = []
|
||||
_add_newswire_dict_entry(base_dir,
|
||||
result, pub_date_str,
|
||||
title, link,
|
||||
|
@ -1239,7 +1271,7 @@ def _json_feed_v1to_dict(base_dir: str, xml_str: str,
|
|||
description, moderated,
|
||||
mirrored, [], 32, session, debug,
|
||||
None, system_language,
|
||||
fediverse_handle)
|
||||
fediverse_handle, extra_links)
|
||||
post_ctr += 1
|
||||
if post_ctr >= max_posts_per_source:
|
||||
break
|
||||
|
@ -1337,6 +1369,7 @@ def _atom_feed_yt_to_dict(base_dir: str, xml_str: str,
|
|||
if podcast_properties:
|
||||
podcast_properties['linkMimeType'] = 'video/youtube'
|
||||
fediverse_handle = ''
|
||||
extra_links = []
|
||||
_add_newswire_dict_entry(base_dir,
|
||||
result, pub_date_str,
|
||||
title, link,
|
||||
|
@ -1344,7 +1377,7 @@ def _atom_feed_yt_to_dict(base_dir: str, xml_str: str,
|
|||
description, moderated, mirrored,
|
||||
[], 32, session, debug,
|
||||
podcast_properties, system_language,
|
||||
fediverse_handle)
|
||||
fediverse_handle, extra_links)
|
||||
post_ctr += 1
|
||||
if post_ctr >= max_posts_per_source:
|
||||
break
|
||||
|
@ -1633,6 +1666,7 @@ def _add_account_blogs_to_newswire(base_dir: str, nickname: str, domain: str,
|
|||
get_url_from_post(post_json_object['object']['url'])
|
||||
url2 = remove_html(url_str)
|
||||
fediverse_handle = ''
|
||||
extra_links = []
|
||||
_add_newswire_dict_entry(base_dir,
|
||||
newswire, published,
|
||||
summary, url2,
|
||||
|
@ -1641,7 +1675,7 @@ def _add_account_blogs_to_newswire(base_dir: str, nickname: str, domain: str,
|
|||
tags_from_post,
|
||||
max_tags, session, debug,
|
||||
None, system_language,
|
||||
fediverse_handle)
|
||||
fediverse_handle, extra_links)
|
||||
|
||||
ctr += 1
|
||||
if ctr >= max_blogs_per_account:
|
||||
|
|
|
@ -12,6 +12,7 @@ import html
|
|||
import datetime
|
||||
import urllib.parse
|
||||
from shutil import copyfile
|
||||
from utils import resembles_url
|
||||
from utils import get_nickname_from_actor
|
||||
from utils import get_domain_from_actor
|
||||
from utils import data_dir
|
||||
|
@ -460,9 +461,27 @@ def html_podcast_episode(translate: {},
|
|||
fediverse_handle = newswire_item[9]
|
||||
podcast_nickname = get_nickname_from_actor(fediverse_handle)
|
||||
podcast_domain, _ = get_domain_from_actor(fediverse_handle)
|
||||
podcast_str += \
|
||||
'<p><a href="' + fediverse_handle + '">' + \
|
||||
podcast_nickname + '@' + podcast_domain + '</a></p>\n'
|
||||
if podcast_nickname and podcast_domain:
|
||||
podcast_str += \
|
||||
'<p><a href="' + fediverse_handle + '">' + \
|
||||
podcast_nickname + '@' + podcast_domain + '</a></p>\n'
|
||||
|
||||
extra_links = []
|
||||
if len(newswire_item) > 10:
|
||||
extra_links = newswire_item[10]
|
||||
if extra_links:
|
||||
links_text = ''
|
||||
for link_str in extra_links:
|
||||
link_str = remove_html(link_str)
|
||||
if not resembles_url(link_str):
|
||||
continue
|
||||
if not links_text:
|
||||
links_text = '<p>\n'
|
||||
links_text += \
|
||||
'<a href="' + link_str + '">' + link_str + '</a><br>\n'
|
||||
if links_text:
|
||||
links_text += '</p>\n'
|
||||
podcast_str += links_text
|
||||
|
||||
if podcast_properties['categories']:
|
||||
tags_str = ''
|
||||
|
|
Loading…
Reference in New Issue