Remove CDATA

main
Bob Mottram 2020-11-18 22:31:38 +00:00
parent 7f4c490872
commit f84941ba42
1 changed files with 15 additions and 2 deletions

View File

@ -10,6 +10,7 @@ import os
from datetime import datetime
from shutil import copyfile
from content import removeLongWords
from utils import removeHtml
from utils import locatePost
from utils import loadJson
from utils import getConfigParam
@ -205,8 +206,14 @@ def htmlNewswire(baseDir: str, newswire: {}, nickname: str, moderator: bool,
separatorStr = htmlPostSeparator(baseDir, 'right')
htmlStr = ''
for dateStr, item in newswire.items():
if not item[0].strip():
item[0] = removeHtml(item[0]).strip()
if not item[0]:
continue
# remove any CDATA
if 'CDATA[' in item[0]:
item[0] = item[0].split('CDATA[')[1]
if ']' in item[0]:
item[0] = item[0].split(']')[0]
publishedDate = \
datetime.strptime(dateStr, "%Y-%m-%d %H:%M:%S%z")
dateShown = publishedDate.strftime("%Y-%m-%d %H:%M")
@ -348,8 +355,14 @@ def htmlCitations(baseDir: str, nickname: str, domain: str,
if newswire:
ctr = 0
for dateStr, item in newswire.items():
if not item[0].strip():
item[0] = removeHtml(item[0]).strip()
if not item[0]:
continue
# remove any CDATA
if 'CDATA[' in item[0]:
item[0] = item[0].split('CDATA[')[1]
if ']' in item[0]:
item[0] = item[0].split(']')[0]
# should this checkbox be selected?
selectedStr = ''
if dateStr in citationsSelected: