diff --git a/newswire.py b/newswire.py
index b9b476b8c..52b8c931b 100644
--- a/newswire.py
+++ b/newswire.py
@@ -25,6 +25,16 @@ from blocking import isBlockedHashtag
from filters import isFiltered
+def removeCDATA(text: str) -> str:
+ """Removes any CDATA from the given text
+ """
+ if 'CDATA[' in text:
+ text = text.split('CDATA[')[1]
+ if ']' in text:
+ text = text.split(']')[0]
+ return text
+
+
def rss2Header(httpPrefix: str,
nickname: str, domainFull: str,
title: str, translate: {}) -> str:
@@ -154,16 +164,17 @@ def xml2StrToDict(baseDir: str, domain: str, xmlStr: str,
if '' not in rssItem:
continue
title = rssItem.split('
')[1]
- title = title.split('')[0]
+ title = removeCDATA(title.split('')[0])
description = ''
if '' in rssItem and '' in rssItem:
description = rssItem.split('')[1]
- description = description.split('')[0]
+ description = removeCDATA(description.split('')[0])
else:
if '' in rssItem and \
'' in rssItem:
description = rssItem.split('')[1]
description = description.split('')[0]
+ description = removeCDATA(description)
link = rssItem.split('')[1]
link = link.split('')[0]
if '://' not in link:
@@ -243,16 +254,17 @@ def atomFeedToDict(baseDir: str, domain: str, xmlStr: str,
if '' not in rssItem:
continue
title = rssItem.split('')[1]
- title = title.split('')[0]
+ title = removeCDATA(title.split('')[0])
description = ''
if '' in rssItem and '' in rssItem:
description = rssItem.split('')[1]
- description = description.split('')[0]
+ description = removeCDATA(description.split('')[0])
else:
if '' in rssItem and \
'' in rssItem:
description = rssItem.split('')[1]
description = description.split('')[0]
+ description = removeCDATA(description)
link = rssItem.split('')[1]
link = link.split('')[0]
if '://' not in link:
@@ -333,15 +345,17 @@ def atomFeedYTToDict(baseDir: str, domain: str, xmlStr: str,
if '' not in rssItem:
continue
title = rssItem.split('')[1]
- title = title.split('')[0]
+ title = removeCDATA(title.split('')[0])
description = ''
if '' in rssItem and \
'' in rssItem:
description = rssItem.split('')[1]
description = description.split('')[0]
+ description = removeCDATA(description)
elif '' in rssItem and '' in rssItem:
description = rssItem.split('')[1]
description = description.split('')[0]
+ description = removeCDATA(description)
link = rssItem.split('')[1]
link = link.split('')[0]
link = 'https://www.youtube.com/watch?v=' + link.strip()
@@ -494,7 +508,7 @@ def getRSSfromDict(baseDir: str, newswire: {},
continue
rssStr += '\n'
rssStr += ' ' + fields[0] + '\n'
- description = firstParagraphFromString(fields[4])
+ description = removeCDATA(firstParagraphFromString(fields[4]))
rssStr += ' ' + description + '\n'
url = fields[1]
if '://' not in url:
@@ -614,6 +628,7 @@ def addAccountBlogsToNewswire(baseDir: str, nickname: str, domain: str,
votes = loadJson(fullPostFilename + '.votes')
content = postJsonObject['object']['content']
description = firstParagraphFromString(content)
+ description = removeCDATA(description)
addNewswireDictEntry(baseDir, domain,
newswire, published,
postJsonObject['object']['summary'],