mirror of https://gitlab.com/bashrc2/epicyon
Tidying
parent
07f4b3605a
commit
27c99f1900
17
content.py
17
content.py
|
@ -1149,19 +1149,28 @@ def _wordsSimilarityHistogram(words: []) -> {}:
|
||||||
return histogram
|
return histogram
|
||||||
|
|
||||||
|
|
||||||
|
def _wordsSimilarityWordsList(content: str) -> []:
|
||||||
|
"""Returns a list of words for the given content
|
||||||
|
"""
|
||||||
|
removePunctuation = ('.', ',', ';', '-', ':')
|
||||||
|
content = removeHtml(content).lower()
|
||||||
|
for p in removePunctuation:
|
||||||
|
content = content.replace(p, ' ')
|
||||||
|
content = content.replace(' ', ' ')
|
||||||
|
return content.split(' ')
|
||||||
|
|
||||||
|
|
||||||
def wordsSimilarity(content1: str, content2: str, minWords: int) -> int:
|
def wordsSimilarity(content1: str, content2: str, minWords: int) -> int:
|
||||||
"""Returns percentage similarity
|
"""Returns percentage similarity
|
||||||
"""
|
"""
|
||||||
if content1 == content2:
|
if content1 == content2:
|
||||||
return 100
|
return 100
|
||||||
|
|
||||||
content1 = removeHtml(content1).lower()
|
words1 = _wordsSimilarityWordsList(content1)
|
||||||
words1 = content1.split(' ')
|
|
||||||
if len(words1) < minWords:
|
if len(words1) < minWords:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
content2 = removeHtml(content2).lower()
|
words2 = _wordsSimilarityWordsList(content2)
|
||||||
words2 = content2.split(' ')
|
|
||||||
if len(words2) < minWords:
|
if len(words2) < minWords:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue