diff --git a/newswire.py b/newswire.py index fdba9c3ca..a8eb2ca3b 100644 --- a/newswire.py +++ b/newswire.py @@ -468,6 +468,31 @@ def xml_podcast_to_dict(xml_str: str) -> {}: return podcast_properties +def _get_link_from_rss_item(rss_item: str) -> str: + """Extracts rss link from rss item string + """ + link = None + if '' in enclosure: + enclosure = enclosure.split('>')[0] + if 'url="' in enclosure and \ + ('"audio/' in enclosure or '"video/' in enclosure): + link_str = enclosure.split('url="')[1] + if '"' in link_str: + link_str = link_str.split('"')[0] + if '://' in link_str: + link = link_str + + if not link: + link = rss_item.split('')[1] + link = link.split('')[0] + if '://' not in link: + return None + return link + + def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str, moderated: bool, mirrored: bool, max_posts_per_source: int, @@ -523,25 +548,9 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str, description = description.split('')[0] description = remove_html(description) - link = None - if '' in enclosure: - enclosure = enclosure.split('>')[0] - if 'url="' in enclosure and \ - ('"audio/' in enclosure or '"video/' in enclosure): - link_str = enclosure.split('url="')[1] - if '"' in link_str: - link_str = link_str.split('"')[0] - if '://' in link_str: - link = link_str - + link = _get_link_from_rss_item(rss_item) if not link: - link = rss_item.split('')[1] - link = link.split('')[0] - if '://' not in link: - continue + continue item_domain = link.split('://')[1] if '/' in item_domain: @@ -631,25 +640,9 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str, description = description.split('')[0] description = remove_html(description) - link = None - if '' in enclosure: - enclosure = enclosure.split('>')[0] - if 'url="' in enclosure and \ - ('"audio/' in enclosure or '"video/' in enclosure): - link_str = enclosure.split('url="')[1] - if '"' in link_str: - link_str = link_str.split('"')[0] - if '://' in link_str: - link = link_str - + link = _get_link_from_rss_item(rss_item) if not link: - link = rss_item.split('')[1] - link = link.split('')[0] - if '://' not in link: - continue + continue item_domain = link.split('://')[1] if '/' in item_domain: @@ -727,25 +720,9 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str, description = description.split('')[0] description = remove_html(description) - link = None - if '' in enclosure: - enclosure = enclosure.split('>')[0] - if 'url="' in enclosure and \ - ('"audio/' in enclosure or '"video/' in enclosure): - link_str = enclosure.split('url="')[1] - if '"' in link_str: - link_str = link_str.split('"')[0] - if '://' in link_str: - link = link_str - + link = _get_link_from_rss_item(atom_item) if not link: - link = atom_item.split('')[1] - link = link.split('')[0] - if '://' not in link: - continue + continue item_domain = link.split('://')[1] if '/' in item_domain: