mirror of https://gitlab.com/bashrc2/epicyon
Remove extra character
parent
4838dd5e02
commit
df2a947516
31
posts.py
31
posts.py
|
@ -491,16 +491,10 @@ def _getPosts(session, outboxUrl: str, maxPosts: int,
|
||||||
return personPosts
|
return personPosts
|
||||||
|
|
||||||
|
|
||||||
def _updateWordFrequency(content: str, wordFrequency: {}) -> None:
|
def _getCommonWords() -> str:
|
||||||
"""Creates a dictionary containing words and the number of times
|
"""Returns a list of common words
|
||||||
that they appear
|
|
||||||
"""
|
"""
|
||||||
plainText = removeHtml(content)
|
return (
|
||||||
removeChars = ('.', ';', '?', '\n')
|
|
||||||
for ch in removeChars:
|
|
||||||
plainText = plainText.replace(ch, ' ')
|
|
||||||
wordsList = plainText.split(' ')
|
|
||||||
commonWords = (
|
|
||||||
'that', 'some', 'about', 'then', 'they', 'were',
|
'that', 'some', 'about', 'then', 'they', 'were',
|
||||||
'also', 'from', 'with', 'this', 'have', 'more',
|
'also', 'from', 'with', 'this', 'have', 'more',
|
||||||
'need', 'here', 'would', 'these', 'into', 'very',
|
'need', 'here', 'would', 'these', 'into', 'very',
|
||||||
|
@ -510,8 +504,23 @@ def _updateWordFrequency(content: str, wordFrequency: {}) -> None:
|
||||||
'been', 'over', 'their', 'where', 'could', 'though',
|
'been', 'over', 'their', 'where', 'could', 'though',
|
||||||
'like', 'think', 'same', 'maybe', 'really', 'thing',
|
'like', 'think', 'same', 'maybe', 'really', 'thing',
|
||||||
'something', 'possible', 'actual', 'actually',
|
'something', 'possible', 'actual', 'actually',
|
||||||
'because', 'around', 'having'
|
'because', 'around', 'having', 'especially', 'other',
|
||||||
|
'making', 'made', 'make', 'makes', 'including',
|
||||||
|
'includes', 'know', 'knowing', 'knows', 'things',
|
||||||
|
'say', 'says', 'saying', 'many', 'somewhat',
|
||||||
|
'problem', 'problems', 'idea', 'ideas'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _updateWordFrequency(content: str, wordFrequency: {}) -> None:
|
||||||
|
"""Creates a dictionary containing words and the number of times
|
||||||
|
that they appear
|
||||||
|
"""
|
||||||
|
plainText = removeHtml(content)
|
||||||
|
removeChars = ('.', ';', '?', '\n', ':')
|
||||||
|
for ch in removeChars:
|
||||||
|
plainText = plainText.replace(ch, ' ')
|
||||||
|
wordsList = plainText.split(' ')
|
||||||
|
commonWords = _getCommonWords()
|
||||||
for word in wordsList:
|
for word in wordsList:
|
||||||
wordLen = len(word)
|
wordLen = len(word)
|
||||||
if wordLen < 3:
|
if wordLen < 3:
|
||||||
|
@ -524,8 +533,6 @@ def _updateWordFrequency(content: str, wordFrequency: {}) -> None:
|
||||||
'@' in word or \
|
'@' in word or \
|
||||||
'://' in word:
|
'://' in word:
|
||||||
continue
|
continue
|
||||||
if word.endswith(':'):
|
|
||||||
word = word.replace(':', '')
|
|
||||||
if word.lower() in commonWords:
|
if word.lower() in commonWords:
|
||||||
continue
|
continue
|
||||||
if wordFrequency.get(word):
|
if wordFrequency.get(word):
|
||||||
|
|
Loading…
Reference in New Issue