mirror of https://gitlab.com/bashrc2/epicyon
Remove any scripts from rss feed title and description
parent
91da39755b
commit
d43027a779
15
newswire.py
15
newswire.py
|
@ -45,6 +45,7 @@ from blocking import is_blocked_domain
|
||||||
from blocking import is_blocked_hashtag
|
from blocking import is_blocked_hashtag
|
||||||
from filters import is_filtered
|
from filters import is_filtered
|
||||||
from session import download_image_any_mime_type
|
from session import download_image_any_mime_type
|
||||||
|
from content import remove_script
|
||||||
|
|
||||||
|
|
||||||
def _remove_cdata(text: str) -> str:
|
def _remove_cdata(text: str) -> str:
|
||||||
|
@ -773,6 +774,7 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
|
||||||
title = rss_item.split('<title>')[1]
|
title = rss_item.split('<title>')[1]
|
||||||
title = _remove_cdata(title.split('</title>')[0])
|
title = _remove_cdata(title.split('</title>')[0])
|
||||||
title = unescaped_text(title)
|
title = unescaped_text(title)
|
||||||
|
title = remove_script(title, None, None, None)
|
||||||
title = remove_html(title)
|
title = remove_html(title)
|
||||||
|
|
||||||
description = ''
|
description = ''
|
||||||
|
@ -780,6 +782,7 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
|
||||||
description = rss_item.split('<description>')[1]
|
description = rss_item.split('<description>')[1]
|
||||||
description = description.split('</description>')[0]
|
description = description.split('</description>')[0]
|
||||||
description = unescaped_text(description)
|
description = unescaped_text(description)
|
||||||
|
description = remove_script(description, None, None, None)
|
||||||
description = remove_html(description)
|
description = remove_html(description)
|
||||||
else:
|
else:
|
||||||
if '<media:description>' in rss_item and \
|
if '<media:description>' in rss_item and \
|
||||||
|
@ -787,6 +790,7 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
|
||||||
description = rss_item.split('<media:description>')[1]
|
description = rss_item.split('<media:description>')[1]
|
||||||
description = description.split('</media:description>')[0]
|
description = description.split('</media:description>')[0]
|
||||||
description = unescaped_text(description)
|
description = unescaped_text(description)
|
||||||
|
description = remove_script(description, None, None, None)
|
||||||
description = remove_html(description)
|
description = remove_html(description)
|
||||||
|
|
||||||
proxy_type = None
|
proxy_type = None
|
||||||
|
@ -885,12 +889,14 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
|
||||||
title = rss_item.split('<title>')[1]
|
title = rss_item.split('<title>')[1]
|
||||||
title = _remove_cdata(title.split('</title>')[0])
|
title = _remove_cdata(title.split('</title>')[0])
|
||||||
title = unescaped_text(title)
|
title = unescaped_text(title)
|
||||||
|
title = remove_script(title, None, None, None)
|
||||||
title = remove_html(title)
|
title = remove_html(title)
|
||||||
description = ''
|
description = ''
|
||||||
if '<description>' in rss_item and '</description>' in rss_item:
|
if '<description>' in rss_item and '</description>' in rss_item:
|
||||||
description = rss_item.split('<description>')[1]
|
description = rss_item.split('<description>')[1]
|
||||||
description = description.split('</description>')[0]
|
description = description.split('</description>')[0]
|
||||||
description = unescaped_text(description)
|
description = unescaped_text(description)
|
||||||
|
description = remove_script(description, None, None, None)
|
||||||
description = remove_html(description)
|
description = remove_html(description)
|
||||||
else:
|
else:
|
||||||
if '<media:description>' in rss_item and \
|
if '<media:description>' in rss_item and \
|
||||||
|
@ -898,6 +904,7 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
|
||||||
description = rss_item.split('<media:description>')[1]
|
description = rss_item.split('<media:description>')[1]
|
||||||
description = description.split('</media:description>')[0]
|
description = description.split('</media:description>')[0]
|
||||||
description = unescaped_text(description)
|
description = unescaped_text(description)
|
||||||
|
description = remove_script(description, None, None, None)
|
||||||
description = remove_html(description)
|
description = remove_html(description)
|
||||||
|
|
||||||
proxy_type = None
|
proxy_type = None
|
||||||
|
@ -984,16 +991,19 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
|
||||||
title = atom_item.split('<title>')[1]
|
title = atom_item.split('<title>')[1]
|
||||||
title = _remove_cdata(title.split('</title>')[0])
|
title = _remove_cdata(title.split('</title>')[0])
|
||||||
title = unescaped_text(title)
|
title = unescaped_text(title)
|
||||||
|
title = remove_script(title, None, None, None)
|
||||||
title = remove_html(title)
|
title = remove_html(title)
|
||||||
description = ''
|
description = ''
|
||||||
if '<summary>' in atom_item and '</summary>' in atom_item:
|
if '<summary>' in atom_item and '</summary>' in atom_item:
|
||||||
description = atom_item.split('<summary>')[1]
|
description = atom_item.split('<summary>')[1]
|
||||||
description = unescaped_text(description.split('</summary>')[0])
|
description = unescaped_text(description.split('</summary>')[0])
|
||||||
|
description = remove_script(description, None, None, None)
|
||||||
description = remove_html(description)
|
description = remove_html(description)
|
||||||
elif '<content' in atom_item and '</content>' in atom_item:
|
elif '<content' in atom_item and '</content>' in atom_item:
|
||||||
description = atom_item.split('<content', 1)[1]
|
description = atom_item.split('<content', 1)[1]
|
||||||
description = description.split('>', 1)[1]
|
description = description.split('>', 1)[1]
|
||||||
description = unescaped_text(description.split('</content>')[0])
|
description = unescaped_text(description.split('</content>')[0])
|
||||||
|
description = remove_script(description, None, None, None)
|
||||||
description = remove_html(description)
|
description = remove_html(description)
|
||||||
else:
|
else:
|
||||||
if '<media:description>' in atom_item and \
|
if '<media:description>' in atom_item and \
|
||||||
|
@ -1001,6 +1011,7 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
|
||||||
description = atom_item.split('<media:description>')[1]
|
description = atom_item.split('<media:description>')[1]
|
||||||
description = description.split('</media:description>')[0]
|
description = description.split('</media:description>')[0]
|
||||||
description = unescaped_text(description)
|
description = unescaped_text(description)
|
||||||
|
description = remove_script(description, None, None, None)
|
||||||
description = remove_html(description)
|
description = remove_html(description)
|
||||||
|
|
||||||
proxy_type = None
|
proxy_type = None
|
||||||
|
@ -1206,6 +1217,7 @@ def _atom_feed_yt_to_dict(base_dir: str, domain: str, xml_str: str,
|
||||||
continue
|
continue
|
||||||
title = atom_item.split('<title>')[1]
|
title = atom_item.split('<title>')[1]
|
||||||
title = _remove_cdata(title.split('</title>')[0])
|
title = _remove_cdata(title.split('</title>')[0])
|
||||||
|
title = remove_script(title, None, None, None)
|
||||||
title = unescaped_text(title)
|
title = unescaped_text(title)
|
||||||
description = ''
|
description = ''
|
||||||
if '<media:description>' in atom_item and \
|
if '<media:description>' in atom_item and \
|
||||||
|
@ -1213,17 +1225,20 @@ def _atom_feed_yt_to_dict(base_dir: str, domain: str, xml_str: str,
|
||||||
description = atom_item.split('<media:description>')[1]
|
description = atom_item.split('<media:description>')[1]
|
||||||
description = description.split('</media:description>')[0]
|
description = description.split('</media:description>')[0]
|
||||||
description = unescaped_text(description)
|
description = unescaped_text(description)
|
||||||
|
description = remove_script(description, None, None, None)
|
||||||
description = remove_html(description)
|
description = remove_html(description)
|
||||||
elif '<summary>' in atom_item and '</summary>' in atom_item:
|
elif '<summary>' in atom_item and '</summary>' in atom_item:
|
||||||
description = atom_item.split('<summary>')[1]
|
description = atom_item.split('<summary>')[1]
|
||||||
description = description.split('</summary>')[0]
|
description = description.split('</summary>')[0]
|
||||||
description = unescaped_text(description)
|
description = unescaped_text(description)
|
||||||
|
description = remove_script(description, None, None, None)
|
||||||
description = remove_html(description)
|
description = remove_html(description)
|
||||||
elif '<content' in atom_item and '</content>' in atom_item:
|
elif '<content' in atom_item and '</content>' in atom_item:
|
||||||
description = atom_item.split('<content', 1)[1]
|
description = atom_item.split('<content', 1)[1]
|
||||||
description = description.split('>', 1)[1]
|
description = description.split('>', 1)[1]
|
||||||
description = description.split('</content>')[0]
|
description = description.split('</content>')[0]
|
||||||
description = unescaped_text(description)
|
description = unescaped_text(description)
|
||||||
|
description = remove_script(description, None, None, None)
|
||||||
description = remove_html(description)
|
description = remove_html(description)
|
||||||
|
|
||||||
link, _ = get_link_from_rss_item(atom_item, None, None)
|
link, _ = get_link_from_rss_item(atom_item, None, None)
|
||||||
|
|
Loading…
Reference in New Issue