From 2d31488d49801b17d6d73d48dcf8f3b1f9d61274 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Mon, 11 Jan 2021 14:13:17 +0000 Subject: [PATCH] Remove colon endings and question marks --- posts.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/posts.py b/posts.py index 99d7c83fd..62069d607 100644 --- a/posts.py +++ b/posts.py @@ -476,13 +476,15 @@ def _updateWordFrequency(content: str, wordFrequency: {}) -> None: plainText = removeHtml(content) plainText = plainText.replace('.', ' ') plainText = plainText.replace(';', ' ') + plainText = plainText.replace('?', ' ') wordsList = plainText.split(' ') commonWords = ( 'that', 'some', 'about', 'then', 'they', 'were', 'also', 'from', 'with', 'this', 'have', 'more', 'need', 'here', 'would', 'these', 'into', 'very', 'well', 'when', 'what', 'your', 'there', 'which', - 'even', 'there', 'such', 'just', 'those', 'only' + 'even', 'there', 'such', 'just', 'those', 'only', + 'will', 'much' ) for word in wordsList: wordLen = len(word) @@ -493,8 +495,11 @@ def _updateWordFrequency(content: str, wordFrequency: {}) -> None: continue if '&' in word or \ '"' in word or \ - '@' in word: + '@' in word or \ + '://' in word: continue + if word.endswith(':'): + word = word.replace(':', '') if word in commonWords: continue if wordFrequency.get(word):