diff --git a/newswire.py b/newswire.py
index 3bc37e457..737b506c5 100644
--- a/newswire.py
+++ b/newswire.py
@@ -19,6 +19,7 @@ from datetime import timezone
from collections import OrderedDict
from utils import valid_post_date
from categories import set_hashtag_category
+from utils import is_local_network_address
from utils import data_dir
from utils import string_contains
from utils import image_mime_types_dict
@@ -213,7 +214,8 @@ def _add_newswire_dict_entry(base_dir: str,
tags: [],
max_tags: int, session, debug: bool,
podcast_properties: {},
- system_language: str) -> None:
+ system_language: str,
+ fediverse_handle: str) -> None:
"""Update the newswire dictionary
"""
# remove any markup
@@ -265,7 +267,8 @@ def _add_newswire_dict_entry(base_dir: str,
moderated,
post_tags,
mirrored,
- podcast_properties
+ podcast_properties,
+ fediverse_handle
]
@@ -868,13 +871,15 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
xml_podcast_to_dict(base_dir, rss_item, xml_str)
if podcast_properties:
podcast_properties['linkMimeType'] = link_mime_type
+ fediverse_handle = ''
_add_newswire_dict_entry(base_dir,
result, pub_date_str,
title, link,
votes_status, post_filename,
description, moderated,
mirrored, [], 32, session, debug,
- podcast_properties, system_language)
+ podcast_properties, system_language,
+ fediverse_handle)
post_ctr += 1
if post_ctr >= max_posts_per_source:
break
@@ -982,13 +987,15 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
xml_podcast_to_dict(base_dir, rss_item, xml_str)
if podcast_properties:
podcast_properties['linkMimeType'] = link_mime_type
+ fediverse_handle = ''
_add_newswire_dict_entry(base_dir,
result, pub_date_str,
title, link,
votes_status, post_filename,
description, moderated,
mirrored, [], 32, session, debug,
- podcast_properties, system_language)
+ podcast_properties, system_language,
+ fediverse_handle)
post_ctr += 1
if post_ctr >= max_posts_per_source:
break
@@ -1059,6 +1066,24 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
description = remove_script(description, None, None, None)
description = remove_html(description)
+ # is there a fediverse handle
+ fediverse_handle = ''
+ if '' in atom_item and '' in atom_item:
+ actor_str = atom_item.split('')[1]
+ actor_str = unescaped_text(actor_str.split('')[0])
+ actor_str = remove_script(actor_str, None, None, None)
+ if '' in actor_str and \
+ '' in actor_str and \
+ '' in actor_str and '' in actor_str:
+ obj_type = actor_str.split('')[1]
+ obj_type = obj_type.split('')[0]
+ if obj_type == 'Person':
+ actor_uri = actor_str.split('')[1]
+ actor_uri = actor_uri.split('')[0]
+ if resembles_url(actor_uri) and \
+ not is_local_network_address(actor_uri):
+ fediverse_handle = actor_uri
+
proxy_type = None
if domain.endswith('.onion'):
proxy_type = 'tor'
@@ -1096,7 +1121,8 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
votes_status, post_filename,
description, moderated,
mirrored, [], 32, session, debug,
- podcast_properties, system_language)
+ podcast_properties, system_language,
+ fediverse_handle)
post_ctr += 1
if post_ctr >= max_posts_per_source:
break
@@ -1205,13 +1231,15 @@ def _json_feed_v1to_dict(base_dir: str, xml_str: str,
if _valid_feed_date(pub_date_str):
post_filename = ''
votes_status = []
+ fediverse_handle = ''
_add_newswire_dict_entry(base_dir,
result, pub_date_str,
title, link,
votes_status, post_filename,
description, moderated,
mirrored, [], 32, session, debug,
- None, system_language)
+ None, system_language,
+ fediverse_handle)
post_ctr += 1
if post_ctr >= max_posts_per_source:
break
@@ -1308,13 +1336,15 @@ def _atom_feed_yt_to_dict(base_dir: str, xml_str: str,
xml_podcast_to_dict(base_dir, atom_item, xml_str)
if podcast_properties:
podcast_properties['linkMimeType'] = 'video/youtube'
+ fediverse_handle = ''
_add_newswire_dict_entry(base_dir,
result, pub_date_str,
title, link,
votes_status, post_filename,
description, moderated, mirrored,
[], 32, session, debug,
- podcast_properties, system_language)
+ podcast_properties, system_language,
+ fediverse_handle)
post_ctr += 1
if post_ctr >= max_posts_per_source:
break
@@ -1602,6 +1632,7 @@ def _add_account_blogs_to_newswire(base_dir: str, nickname: str, domain: str,
url_str = \
get_url_from_post(post_json_object['object']['url'])
url2 = remove_html(url_str)
+ fediverse_handle = ''
_add_newswire_dict_entry(base_dir,
newswire, published,
summary, url2,
@@ -1609,7 +1640,8 @@ def _add_account_blogs_to_newswire(base_dir: str, nickname: str, domain: str,
description, moderated, False,
tags_from_post,
max_tags, session, debug,
- None, system_language)
+ None, system_language,
+ fediverse_handle)
ctr += 1
if ctr >= max_blogs_per_account: