From f84941ba42d3d969d3e1ef5f1d56f12916bdec52 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Wed, 18 Nov 2020 22:31:38 +0000 Subject: [PATCH] Remove CDATA --- webapp_column_right.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/webapp_column_right.py b/webapp_column_right.py index a1eb7e02c..5ed3e6f92 100644 --- a/webapp_column_right.py +++ b/webapp_column_right.py @@ -10,6 +10,7 @@ import os from datetime import datetime from shutil import copyfile from content import removeLongWords +from utils import removeHtml from utils import locatePost from utils import loadJson from utils import getConfigParam @@ -205,8 +206,14 @@ def htmlNewswire(baseDir: str, newswire: {}, nickname: str, moderator: bool, separatorStr = htmlPostSeparator(baseDir, 'right') htmlStr = '' for dateStr, item in newswire.items(): - if not item[0].strip(): + item[0] = removeHtml(item[0]).strip() + if not item[0]: continue + # remove any CDATA + if 'CDATA[' in item[0]: + item[0] = item[0].split('CDATA[')[1] + if ']' in item[0]: + item[0] = item[0].split(']')[0] publishedDate = \ datetime.strptime(dateStr, "%Y-%m-%d %H:%M:%S%z") dateShown = publishedDate.strftime("%Y-%m-%d %H:%M") @@ -348,8 +355,14 @@ def htmlCitations(baseDir: str, nickname: str, domain: str, if newswire: ctr = 0 for dateStr, item in newswire.items(): - if not item[0].strip(): + item[0] = removeHtml(item[0]).strip() + if not item[0]: continue + # remove any CDATA + if 'CDATA[' in item[0]: + item[0] = item[0].split('CDATA[')[1] + if ']' in item[0]: + item[0] = item[0].split(']')[0] # should this checkbox be selected? selectedStr = '' if dateStr in citationsSelected: