From fd07e67f3bab65b3d2b4d4accd0ca612811090e5 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sat, 12 Dec 2020 15:44:43 +0000 Subject: [PATCH] Ensure that there isn't any html in feed text --- newswire.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/newswire.py b/newswire.py index 1734a3a05..144e7854a 100644 --- a/newswire.py +++ b/newswire.py @@ -103,7 +103,11 @@ def addNewswireDictEntry(baseDir: str, domain: str, tags=[], maxTags=32) -> None: """Update the newswire dictionary """ - allText = removeHtml(title + ' ' + description) + # remove any markup + title = removeHtml(title) + description = removeHtml(description) + + allText = title + ' ' + description # check that none of the text is filtered against if isFiltered(baseDir, 'news', domain, allText):