Remove CDATA

main
Bob Mottram 2020-11-18 22:31:38 +00:00
parent 7f4c490872
commit f84941ba42
1 changed files with 15 additions and 2 deletions

View File

@ -10,6 +10,7 @@ import os
from datetime import datetime from datetime import datetime
from shutil import copyfile from shutil import copyfile
from content import removeLongWords from content import removeLongWords
from utils import removeHtml
from utils import locatePost from utils import locatePost
from utils import loadJson from utils import loadJson
from utils import getConfigParam from utils import getConfigParam
@ -205,8 +206,14 @@ def htmlNewswire(baseDir: str, newswire: {}, nickname: str, moderator: bool,
separatorStr = htmlPostSeparator(baseDir, 'right') separatorStr = htmlPostSeparator(baseDir, 'right')
htmlStr = '' htmlStr = ''
for dateStr, item in newswire.items(): for dateStr, item in newswire.items():
if not item[0].strip(): item[0] = removeHtml(item[0]).strip()
if not item[0]:
continue continue
# remove any CDATA
if 'CDATA[' in item[0]:
item[0] = item[0].split('CDATA[')[1]
if ']' in item[0]:
item[0] = item[0].split(']')[0]
publishedDate = \ publishedDate = \
datetime.strptime(dateStr, "%Y-%m-%d %H:%M:%S%z") datetime.strptime(dateStr, "%Y-%m-%d %H:%M:%S%z")
dateShown = publishedDate.strftime("%Y-%m-%d %H:%M") dateShown = publishedDate.strftime("%Y-%m-%d %H:%M")
@ -348,8 +355,14 @@ def htmlCitations(baseDir: str, nickname: str, domain: str,
if newswire: if newswire:
ctr = 0 ctr = 0
for dateStr, item in newswire.items(): for dateStr, item in newswire.items():
if not item[0].strip(): item[0] = removeHtml(item[0]).strip()
if not item[0]:
continue continue
# remove any CDATA
if 'CDATA[' in item[0]:
item[0] = item[0].split('CDATA[')[1]
if ']' in item[0]:
item[0] = item[0].split(']')[0]
# should this checkbox be selected? # should this checkbox be selected?
selectedStr = '' selectedStr = ''
if dateStr in citationsSelected: if dateStr in citationsSelected: