diff --git a/newswire.py b/newswire.py
index 6eafec39e..d416e6237 100644
--- a/newswire.py
+++ b/newswire.py
@@ -45,6 +45,7 @@ from blocking import is_blocked_domain
from blocking import is_blocked_hashtag
from filters import is_filtered
from session import download_image_any_mime_type
+from content import remove_script
def _remove_cdata(text: str) -> str:
@@ -773,6 +774,7 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
title = rss_item.split('
')[1]
title = _remove_cdata(title.split('')[0])
title = unescaped_text(title)
+ title = remove_script(title, None, None, None)
title = remove_html(title)
description = ''
@@ -780,6 +782,7 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
description = rss_item.split('')[1]
description = description.split('')[0]
description = unescaped_text(description)
+ description = remove_script(description, None, None, None)
description = remove_html(description)
else:
if '' in rss_item and \
@@ -787,6 +790,7 @@ def _xml2str_to_dict(base_dir: str, domain: str, xml_str: str,
description = rss_item.split('')[1]
description = description.split('')[0]
description = unescaped_text(description)
+ description = remove_script(description, None, None, None)
description = remove_html(description)
proxy_type = None
@@ -885,12 +889,14 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
title = rss_item.split('')[1]
title = _remove_cdata(title.split('')[0])
title = unescaped_text(title)
+ title = remove_script(title, None, None, None)
title = remove_html(title)
description = ''
if '' in rss_item and '' in rss_item:
description = rss_item.split('')[1]
description = description.split('')[0]
description = unescaped_text(description)
+ description = remove_script(description, None, None, None)
description = remove_html(description)
else:
if '' in rss_item and \
@@ -898,6 +904,7 @@ def _xml1str_to_dict(base_dir: str, domain: str, xml_str: str,
description = rss_item.split('')[1]
description = description.split('')[0]
description = unescaped_text(description)
+ description = remove_script(description, None, None, None)
description = remove_html(description)
proxy_type = None
@@ -984,16 +991,19 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
title = atom_item.split('')[1]
title = _remove_cdata(title.split('')[0])
title = unescaped_text(title)
+ title = remove_script(title, None, None, None)
title = remove_html(title)
description = ''
if '' in atom_item and '' in atom_item:
description = atom_item.split('')[1]
description = unescaped_text(description.split('')[0])
+ description = remove_script(description, None, None, None)
description = remove_html(description)
elif '' in atom_item:
description = atom_item.split('', 1)[1]
description = unescaped_text(description.split('')[0])
+ description = remove_script(description, None, None, None)
description = remove_html(description)
else:
if '' in atom_item and \
@@ -1001,6 +1011,7 @@ def _atom_feed_to_dict(base_dir: str, domain: str, xml_str: str,
description = atom_item.split('')[1]
description = description.split('')[0]
description = unescaped_text(description)
+ description = remove_script(description, None, None, None)
description = remove_html(description)
proxy_type = None
@@ -1206,6 +1217,7 @@ def _atom_feed_yt_to_dict(base_dir: str, domain: str, xml_str: str,
continue
title = atom_item.split('')[1]
title = _remove_cdata(title.split('')[0])
+ title = remove_script(title, None, None, None)
title = unescaped_text(title)
description = ''
if '' in atom_item and \
@@ -1213,17 +1225,20 @@ def _atom_feed_yt_to_dict(base_dir: str, domain: str, xml_str: str,
description = atom_item.split('')[1]
description = description.split('')[0]
description = unescaped_text(description)
+ description = remove_script(description, None, None, None)
description = remove_html(description)
elif '' in atom_item and '' in atom_item:
description = atom_item.split('')[1]
description = description.split('')[0]
description = unescaped_text(description)
+ description = remove_script(description, None, None, None)
description = remove_html(description)
elif '' in atom_item:
description = atom_item.split('', 1)[1]
description = description.split('')[0]
description = unescaped_text(description)
+ description = remove_script(description, None, None, None)
description = remove_html(description)
link, _ = get_link_from_rss_item(atom_item, None, None)