mirror of https://gitlab.com/bashrc2/epicyon
Obtain fediverse handle to atom feeds
parent
4d25af2705
commit
96ba85d04d
48
newswire.py
48
newswire.py
|
@ -19,6 +19,7 @@ from datetime import timezone
|
|||
from collections import OrderedDict
|
||||
from utils import valid_post_date
|
||||
from categories import set_hashtag_category
|
||||
from utils import is_local_network_address
|
||||
from utils import data_dir
|
||||
from utils import string_contains
|
||||
from utils import image_mime_types_dict
|
||||
|
@ -213,7 +214,8 @@ def _add_newswire_dict_entry(base_dir: str,
|
|||
tags: [],
|
||||
max_tags: int, session, debug: bool,
|
||||
podcast_properties: {},
|
||||
system_language: str) -> None:
|
||||
system_language: str,
|
||||
fediverse_handle: str) -> None:
|
||||
"""Update the newswire dictionary
|
||||
"""
|
||||
# remove any markup
|
||||
|
@ -265,7 +267,8 @@ def _add_newswire_dict_entry(base_dir: str,
|
|||
moderated,
|
||||
post_tags,
|
||||
mirrored,
|
||||
podcast_properties
|
||||
podcast_properties,
|
||||
fediverse_handle
|
||||
]
|
||||
|
||||
|
||||
|
@ -868,13 +871,15 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
xml_podcast_to_dict(base_dir, rss_item, xml_str)
|
||||
if podcast_properties:
|
||||
podcast_properties['linkMimeType'] = link_mime_type
|
||||
fediverse_handle = ''
|
||||
_add_newswire_dict_entry(base_dir,
|
||||
result, pub_date_str,
|
||||
title, link,
|
||||
votes_status, post_filename,
|
||||
description, moderated,
|
||||
mirrored, [], 32, session, debug,
|
||||
podcast_properties, system_language)
|
||||
podcast_properties, system_language,
|
||||
fediverse_handle)
|
||||
post_ctr += 1
|
||||
if post_ctr >= max_posts_per_source:
|
||||
break
|
||||
|
@ -982,13 +987,15 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
xml_podcast_to_dict(base_dir, rss_item, xml_str)
|
||||
if podcast_properties:
|
||||
podcast_properties['linkMimeType'] = link_mime_type
|
||||
fediverse_handle = ''
|
||||
_add_newswire_dict_entry(base_dir,
|
||||
result, pub_date_str,
|
||||
title, link,
|
||||
votes_status, post_filename,
|
||||
description, moderated,
|
||||
mirrored, [], 32, session, debug,
|
||||
podcast_properties, system_language)
|
||||
podcast_properties, system_language,
|
||||
fediverse_handle)
|
||||
post_ctr += 1
|
||||
if post_ctr >= max_posts_per_source:
|
||||
break
|
||||
|
@ -1059,6 +1066,24 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
description = remove_script(description, None, None, None)
|
||||
description = remove_html(description)
|
||||
|
||||
# is there a fediverse handle
|
||||
fediverse_handle = ''
|
||||
if '<author>' in atom_item and '</author>' in atom_item:
|
||||
actor_str = atom_item.split('<author>')[1]
|
||||
actor_str = unescaped_text(actor_str.split('</author>')[0])
|
||||
actor_str = remove_script(actor_str, None, None, None)
|
||||
if '<activity:object-type>' in actor_str and \
|
||||
'</activity:object-type>' in actor_str and \
|
||||
'<uri>' in actor_str and '</uri>' in actor_str:
|
||||
obj_type = actor_str.split('<activity:object-type>')[1]
|
||||
obj_type = obj_type.split('</activity:object-type>')[0]
|
||||
if obj_type == 'Person':
|
||||
actor_uri = actor_str.split('<uri>')[1]
|
||||
actor_uri = actor_uri.split('</uri>')[0]
|
||||
if resembles_url(actor_uri) and \
|
||||
not is_local_network_address(actor_uri):
|
||||
fediverse_handle = actor_uri
|
||||
|
||||
proxy_type = None
|
||||
if domain.endswith('.onion'):
|
||||
proxy_type = 'tor'
|
||||
|
@ -1096,7 +1121,8 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
|
|||
votes_status, post_filename,
|
||||
description, moderated,
|
||||
mirrored, [], 32, session, debug,
|
||||
podcast_properties, system_language)
|
||||
podcast_properties, system_language,
|
||||
fediverse_handle)
|
||||
post_ctr += 1
|
||||
if post_ctr >= max_posts_per_source:
|
||||
break
|
||||
|
@ -1205,13 +1231,15 @@ def _json_feed_v1to_dict(base_dir: str, xml_str: str,
|
|||
if _valid_feed_date(pub_date_str):
|
||||
post_filename = ''
|
||||
votes_status = []
|
||||
fediverse_handle = ''
|
||||
_add_newswire_dict_entry(base_dir,
|
||||
result, pub_date_str,
|
||||
title, link,
|
||||
votes_status, post_filename,
|
||||
description, moderated,
|
||||
mirrored, [], 32, session, debug,
|
||||
None, system_language)
|
||||
None, system_language,
|
||||
fediverse_handle)
|
||||
post_ctr += 1
|
||||
if post_ctr >= max_posts_per_source:
|
||||
break
|
||||
|
@ -1308,13 +1336,15 @@ def _atom_feed_yt_to_dict(base_dir: str, xml_str: str,
|
|||
xml_podcast_to_dict(base_dir, atom_item, xml_str)
|
||||
if podcast_properties:
|
||||
podcast_properties['linkMimeType'] = 'video/youtube'
|
||||
fediverse_handle = ''
|
||||
_add_newswire_dict_entry(base_dir,
|
||||
result, pub_date_str,
|
||||
title, link,
|
||||
votes_status, post_filename,
|
||||
description, moderated, mirrored,
|
||||
[], 32, session, debug,
|
||||
podcast_properties, system_language)
|
||||
podcast_properties, system_language,
|
||||
fediverse_handle)
|
||||
post_ctr += 1
|
||||
if post_ctr >= max_posts_per_source:
|
||||
break
|
||||
|
@ -1602,6 +1632,7 @@ def _add_account_blogs_to_newswire(base_dir: str, nickname: str, domain: str,
|
|||
url_str = \
|
||||
get_url_from_post(post_json_object['object']['url'])
|
||||
url2 = remove_html(url_str)
|
||||
fediverse_handle = ''
|
||||
_add_newswire_dict_entry(base_dir,
|
||||
newswire, published,
|
||||
summary, url2,
|
||||
|
@ -1609,7 +1640,8 @@ def _add_account_blogs_to_newswire(base_dir: str, nickname: str, domain: str,
|
|||
description, moderated, False,
|
||||
tags_from_post,
|
||||
max_tags, session, debug,
|
||||
None, system_language)
|
||||
None, system_language,
|
||||
fediverse_handle)
|
||||
|
||||
ctr += 1
|
||||
if ctr >= max_blogs_per_account:
|
||||
|
|
Loading…
Reference in New Issue