Remove colon endings and question marks

main
Bob Mottram 2021-01-11 14:13:17 +00:00
parent 1934a269f1
commit 2d31488d49
1 changed files with 7 additions and 2 deletions

View File

@ -476,13 +476,15 @@ def _updateWordFrequency(content: str, wordFrequency: {}) -> None:
plainText = removeHtml(content)
plainText = plainText.replace('.', ' ')
plainText = plainText.replace(';', ' ')
plainText = plainText.replace('?', ' ')
wordsList = plainText.split(' ')
commonWords = (
'that', 'some', 'about', 'then', 'they', 'were',
'also', 'from', 'with', 'this', 'have', 'more',
'need', 'here', 'would', 'these', 'into', 'very',
'well', 'when', 'what', 'your', 'there', 'which',
'even', 'there', 'such', 'just', 'those', 'only'
'even', 'there', 'such', 'just', 'those', 'only',
'will', 'much'
)
for word in wordsList:
wordLen = len(word)
@ -493,8 +495,11 @@ def _updateWordFrequency(content: str, wordFrequency: {}) -> None:
continue
if '&' in word or \
'"' in word or \
'@' in word:
'@' in word or \
'://' in word:
continue
if word.endswith(':'):
word = word.replace(':', '')
if word in commonWords:
continue
if wordFrequency.get(word):