mirror of https://gitlab.com/bashrc2/epicyon
Extract any extra links from atom feed items for display on podcast screen
parent
8433d9069e
commit
7524d7656d
50
newswire.py
50
newswire.py
|
@ -215,7 +215,8 @@ def _add_newswire_dict_entry(base_dir: str,
|
||||||
max_tags: int, session, debug: bool,
|
max_tags: int, session, debug: bool,
|
||||||
podcast_properties: {},
|
podcast_properties: {},
|
||||||
system_language: str,
|
system_language: str,
|
||||||
fediverse_handle: str) -> None:
|
fediverse_handle: str,
|
||||||
|
extra_links: []) -> None:
|
||||||
"""Update the newswire dictionary
|
"""Update the newswire dictionary
|
||||||
"""
|
"""
|
||||||
# remove any markup
|
# remove any markup
|
||||||
|
@ -268,7 +269,8 @@ def _add_newswire_dict_entry(base_dir: str,
|
||||||
post_tags,
|
post_tags,
|
||||||
mirrored,
|
mirrored,
|
||||||
podcast_properties,
|
podcast_properties,
|
||||||
fediverse_handle
|
fediverse_handle,
|
||||||
|
extra_links
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@ -872,6 +874,7 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
|
||||||
if podcast_properties:
|
if podcast_properties:
|
||||||
podcast_properties['linkMimeType'] = link_mime_type
|
podcast_properties['linkMimeType'] = link_mime_type
|
||||||
fediverse_handle = ''
|
fediverse_handle = ''
|
||||||
|
extra_links = []
|
||||||
_add_newswire_dict_entry(base_dir,
|
_add_newswire_dict_entry(base_dir,
|
||||||
result, pub_date_str,
|
result, pub_date_str,
|
||||||
title, link,
|
title, link,
|
||||||
|
@ -879,7 +882,7 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
|
||||||
description, moderated,
|
description, moderated,
|
||||||
mirrored, [], 32, session, debug,
|
mirrored, [], 32, session, debug,
|
||||||
podcast_properties, system_language,
|
podcast_properties, system_language,
|
||||||
fediverse_handle)
|
fediverse_handle, extra_links)
|
||||||
post_ctr += 1
|
post_ctr += 1
|
||||||
if post_ctr >= max_posts_per_source:
|
if post_ctr >= max_posts_per_source:
|
||||||
break
|
break
|
||||||
|
@ -988,6 +991,7 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
|
||||||
if podcast_properties:
|
if podcast_properties:
|
||||||
podcast_properties['linkMimeType'] = link_mime_type
|
podcast_properties['linkMimeType'] = link_mime_type
|
||||||
fediverse_handle = ''
|
fediverse_handle = ''
|
||||||
|
extra_links = []
|
||||||
_add_newswire_dict_entry(base_dir,
|
_add_newswire_dict_entry(base_dir,
|
||||||
result, pub_date_str,
|
result, pub_date_str,
|
||||||
title, link,
|
title, link,
|
||||||
|
@ -995,7 +999,7 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
|
||||||
description, moderated,
|
description, moderated,
|
||||||
mirrored, [], 32, session, debug,
|
mirrored, [], 32, session, debug,
|
||||||
podcast_properties, system_language,
|
podcast_properties, system_language,
|
||||||
fediverse_handle)
|
fediverse_handle, extra_links)
|
||||||
post_ctr += 1
|
post_ctr += 1
|
||||||
if post_ctr >= max_posts_per_source:
|
if post_ctr >= max_posts_per_source:
|
||||||
break
|
break
|
||||||
|
@ -1084,6 +1088,33 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
|
||||||
not is_local_network_address(actor_uri):
|
not is_local_network_address(actor_uri):
|
||||||
fediverse_handle = actor_uri
|
fediverse_handle = actor_uri
|
||||||
|
|
||||||
|
# are there any extra links?
|
||||||
|
extra_links = []
|
||||||
|
if '<activity:object>' in atom_item and \
|
||||||
|
'</activity:object>' in atom_item:
|
||||||
|
obj_str = atom_item.split('<activity:object>')[1]
|
||||||
|
obj_str = \
|
||||||
|
unescaped_text(obj_str.split('</activity:object>')[0])
|
||||||
|
obj_str = remove_script(obj_str, None, None, None)
|
||||||
|
sections = obj_str.split('<link ')
|
||||||
|
ctr = 0
|
||||||
|
for section_str in sections:
|
||||||
|
if ctr == 0:
|
||||||
|
ctr = 1
|
||||||
|
continue
|
||||||
|
if '>' in section_str:
|
||||||
|
link_str = section_str.split('>')[0]
|
||||||
|
if 'href="' in link_str and \
|
||||||
|
'rel="preview"' not in link_str:
|
||||||
|
link_str = link_str.split('href="')[1]
|
||||||
|
if '"' in link_str:
|
||||||
|
link_str = link_str.split('"')[0]
|
||||||
|
link_str = remove_html(link_str)
|
||||||
|
if resembles_url(link_str) and \
|
||||||
|
not is_local_network_address(link_str):
|
||||||
|
if link_str not in extra_links:
|
||||||
|
extra_links.append(link_str)
|
||||||
|
|
||||||
proxy_type = None
|
proxy_type = None
|
||||||
if domain.endswith('.onion'):
|
if domain.endswith('.onion'):
|
||||||
proxy_type = 'tor'
|
proxy_type = 'tor'
|
||||||
|
@ -1122,7 +1153,7 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
|
||||||
description, moderated,
|
description, moderated,
|
||||||
mirrored, [], 32, session, debug,
|
mirrored, [], 32, session, debug,
|
||||||
podcast_properties, system_language,
|
podcast_properties, system_language,
|
||||||
fediverse_handle)
|
fediverse_handle, extra_links)
|
||||||
post_ctr += 1
|
post_ctr += 1
|
||||||
if post_ctr >= max_posts_per_source:
|
if post_ctr >= max_posts_per_source:
|
||||||
break
|
break
|
||||||
|
@ -1232,6 +1263,7 @@ def _json_feed_v1to_dict(base_dir: str, xml_str: str,
|
||||||
post_filename = ''
|
post_filename = ''
|
||||||
votes_status = []
|
votes_status = []
|
||||||
fediverse_handle = ''
|
fediverse_handle = ''
|
||||||
|
extra_links = []
|
||||||
_add_newswire_dict_entry(base_dir,
|
_add_newswire_dict_entry(base_dir,
|
||||||
result, pub_date_str,
|
result, pub_date_str,
|
||||||
title, link,
|
title, link,
|
||||||
|
@ -1239,7 +1271,7 @@ def _json_feed_v1to_dict(base_dir: str, xml_str: str,
|
||||||
description, moderated,
|
description, moderated,
|
||||||
mirrored, [], 32, session, debug,
|
mirrored, [], 32, session, debug,
|
||||||
None, system_language,
|
None, system_language,
|
||||||
fediverse_handle)
|
fediverse_handle, extra_links)
|
||||||
post_ctr += 1
|
post_ctr += 1
|
||||||
if post_ctr >= max_posts_per_source:
|
if post_ctr >= max_posts_per_source:
|
||||||
break
|
break
|
||||||
|
@ -1337,6 +1369,7 @@ def _atom_feed_yt_to_dict(base_dir: str, xml_str: str,
|
||||||
if podcast_properties:
|
if podcast_properties:
|
||||||
podcast_properties['linkMimeType'] = 'video/youtube'
|
podcast_properties['linkMimeType'] = 'video/youtube'
|
||||||
fediverse_handle = ''
|
fediverse_handle = ''
|
||||||
|
extra_links = []
|
||||||
_add_newswire_dict_entry(base_dir,
|
_add_newswire_dict_entry(base_dir,
|
||||||
result, pub_date_str,
|
result, pub_date_str,
|
||||||
title, link,
|
title, link,
|
||||||
|
@ -1344,7 +1377,7 @@ def _atom_feed_yt_to_dict(base_dir: str, xml_str: str,
|
||||||
description, moderated, mirrored,
|
description, moderated, mirrored,
|
||||||
[], 32, session, debug,
|
[], 32, session, debug,
|
||||||
podcast_properties, system_language,
|
podcast_properties, system_language,
|
||||||
fediverse_handle)
|
fediverse_handle, extra_links)
|
||||||
post_ctr += 1
|
post_ctr += 1
|
||||||
if post_ctr >= max_posts_per_source:
|
if post_ctr >= max_posts_per_source:
|
||||||
break
|
break
|
||||||
|
@ -1633,6 +1666,7 @@ def _add_account_blogs_to_newswire(base_dir: str, nickname: str, domain: str,
|
||||||
get_url_from_post(post_json_object['object']['url'])
|
get_url_from_post(post_json_object['object']['url'])
|
||||||
url2 = remove_html(url_str)
|
url2 = remove_html(url_str)
|
||||||
fediverse_handle = ''
|
fediverse_handle = ''
|
||||||
|
extra_links = []
|
||||||
_add_newswire_dict_entry(base_dir,
|
_add_newswire_dict_entry(base_dir,
|
||||||
newswire, published,
|
newswire, published,
|
||||||
summary, url2,
|
summary, url2,
|
||||||
|
@ -1641,7 +1675,7 @@ def _add_account_blogs_to_newswire(base_dir: str, nickname: str, domain: str,
|
||||||
tags_from_post,
|
tags_from_post,
|
||||||
max_tags, session, debug,
|
max_tags, session, debug,
|
||||||
None, system_language,
|
None, system_language,
|
||||||
fediverse_handle)
|
fediverse_handle, extra_links)
|
||||||
|
|
||||||
ctr += 1
|
ctr += 1
|
||||||
if ctr >= max_blogs_per_account:
|
if ctr >= max_blogs_per_account:
|
||||||
|
|
|
@ -12,6 +12,7 @@ import html
|
||||||
import datetime
|
import datetime
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
from shutil import copyfile
|
from shutil import copyfile
|
||||||
|
from utils import resembles_url
|
||||||
from utils import get_nickname_from_actor
|
from utils import get_nickname_from_actor
|
||||||
from utils import get_domain_from_actor
|
from utils import get_domain_from_actor
|
||||||
from utils import data_dir
|
from utils import data_dir
|
||||||
|
@ -460,9 +461,27 @@ def html_podcast_episode(translate: {},
|
||||||
fediverse_handle = newswire_item[9]
|
fediverse_handle = newswire_item[9]
|
||||||
podcast_nickname = get_nickname_from_actor(fediverse_handle)
|
podcast_nickname = get_nickname_from_actor(fediverse_handle)
|
||||||
podcast_domain, _ = get_domain_from_actor(fediverse_handle)
|
podcast_domain, _ = get_domain_from_actor(fediverse_handle)
|
||||||
podcast_str += \
|
if podcast_nickname and podcast_domain:
|
||||||
'<p><a href="' + fediverse_handle + '">' + \
|
podcast_str += \
|
||||||
podcast_nickname + '@' + podcast_domain + '</a></p>\n'
|
'<p><a href="' + fediverse_handle + '">' + \
|
||||||
|
podcast_nickname + '@' + podcast_domain + '</a></p>\n'
|
||||||
|
|
||||||
|
extra_links = []
|
||||||
|
if len(newswire_item) > 10:
|
||||||
|
extra_links = newswire_item[10]
|
||||||
|
if extra_links:
|
||||||
|
links_text = ''
|
||||||
|
for link_str in extra_links:
|
||||||
|
link_str = remove_html(link_str)
|
||||||
|
if not resembles_url(link_str):
|
||||||
|
continue
|
||||||
|
if not links_text:
|
||||||
|
links_text = '<p>\n'
|
||||||
|
links_text += \
|
||||||
|
'<a href="' + link_str + '">' + link_str + '</a><br>\n'
|
||||||
|
if links_text:
|
||||||
|
links_text += '</p>\n'
|
||||||
|
podcast_str += links_text
|
||||||
|
|
||||||
if podcast_properties['categories']:
|
if podcast_properties['categories']:
|
||||||
tags_str = ''
|
tags_str = ''
|
||||||
|
|
Loading…
Reference in New Issue