mirror of https://gitlab.com/bashrc2/epicyon
Remove common words
parent
7f1281e9e9
commit
aac3eb816c
10
posts.py
10
posts.py
|
@ -477,6 +477,10 @@ def _updateWordFrequency(content: str, wordFrequency: {}) -> None:
|
||||||
plainText = plainText.replace('.', ' ')
|
plainText = plainText.replace('.', ' ')
|
||||||
plainText = plainText.replace(';', ' ')
|
plainText = plainText.replace(';', ' ')
|
||||||
wordsList = plainText.split(' ')
|
wordsList = plainText.split(' ')
|
||||||
|
commonWords = (
|
||||||
|
'that', 'some', 'about', 'then', 'they', 'were',
|
||||||
|
'also', 'from', 'with', 'this', 'have', 'more'
|
||||||
|
)
|
||||||
for word in wordsList:
|
for word in wordsList:
|
||||||
wordLen = len(word)
|
wordLen = len(word)
|
||||||
if wordLen < 3:
|
if wordLen < 3:
|
||||||
|
@ -484,6 +488,12 @@ def _updateWordFrequency(content: str, wordFrequency: {}) -> None:
|
||||||
if wordLen < 4:
|
if wordLen < 4:
|
||||||
if word.upper() != word:
|
if word.upper() != word:
|
||||||
continue
|
continue
|
||||||
|
if '&' in word or \
|
||||||
|
'"' in word or \
|
||||||
|
'@' in word:
|
||||||
|
continue
|
||||||
|
if word in commonWords:
|
||||||
|
continue
|
||||||
if wordFrequency.get(word):
|
if wordFrequency.get(word):
|
||||||
wordFrequency[word] += 1
|
wordFrequency[word] += 1
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Reference in New Issue