mirror of https://gitlab.com/bashrc2/epicyon
Limit the number of times that the same word can be repeated
parent
e8553eb192
commit
7511af13d0
33
content.py
33
content.py
|
@ -866,6 +866,7 @@ def addHtmlTags(baseDir: str, httpPrefix: str,
|
|||
content = addWebLinks(content)
|
||||
if longWordsList:
|
||||
content = removeLongWords(content, maxWordLength, longWordsList)
|
||||
content = limitRepeatedWords(content, 6)
|
||||
content = content.replace(' --linebreak-- ', '</p><p>')
|
||||
content = htmlReplaceEmailQuote(content)
|
||||
return '<p>' + htmlReplaceQuoteMarks(content) + '</p>'
|
||||
|
@ -1053,3 +1054,35 @@ def extractTextFieldsInPOST(postBytes, boundary: str, debug: bool,
|
|||
postValue += postLines[line]
|
||||
fields[postKey] = urllib.parse.unquote(postValue)
|
||||
return fields
|
||||
|
||||
|
||||
def limitRepeatedWords(text: str, maxRepeats: int) -> str:
|
||||
"""Removes words which are repeated many times
|
||||
"""
|
||||
words = text.replace('\n', ' ').split(' ')
|
||||
repeatCtr = 0
|
||||
repeatedText = ''
|
||||
replacements = {}
|
||||
prevWord = ''
|
||||
for word in words:
|
||||
if word == prevWord:
|
||||
repeatCtr += 1
|
||||
if repeatedText:
|
||||
repeatedText += ' ' + word
|
||||
else:
|
||||
repeatedText = word + ' ' + word
|
||||
else:
|
||||
if repeatCtr > maxRepeats:
|
||||
newText = ((prevWord + ' ') * maxRepeats).strip()
|
||||
replacements[prevWord] = [repeatedText, newText]
|
||||
repeatCtr = 0
|
||||
repeatedText = ''
|
||||
prevWord = word
|
||||
|
||||
if repeatCtr > maxRepeats:
|
||||
newText = ((prevWord + ' ') * maxRepeats).strip()
|
||||
replacements[prevWord] = [repeatedText, newText]
|
||||
|
||||
for word, item in replacements.items():
|
||||
text = text.replace(item[0], item[1])
|
||||
return text
|
||||
|
|
4
posts.py
4
posts.py
|
@ -61,6 +61,7 @@ from utils import removeHtml
|
|||
from utils import dangerousMarkup
|
||||
from media import attachMedia
|
||||
from media import replaceYouTube
|
||||
from content import limitRepeatedWords
|
||||
from content import tagExists
|
||||
from content import removeLongWords
|
||||
from content import addHtmlTags
|
||||
|
@ -4031,6 +4032,9 @@ def downloadAnnounce(session, baseDir: str, httpPrefix: str,
|
|||
# remove any long words
|
||||
contentStr = removeLongWords(contentStr, 40, [])
|
||||
|
||||
# Prevent the same word from being repeated many times
|
||||
contentStr = limitRepeatedWords(contentStr, 6)
|
||||
|
||||
# remove text formatting, such as bold/italics
|
||||
contentStr = removeTextFormatting(contentStr)
|
||||
|
||||
|
|
39
tests.py
39
tests.py
|
@ -94,6 +94,7 @@ from inbox import jsonPostAllowsComments
|
|||
from inbox import validInbox
|
||||
from inbox import validInboxFilenames
|
||||
from categories import guessHashtagCategory
|
||||
from content import limitRepeatedWords
|
||||
from content import switchWords
|
||||
from content import extractTextFieldsInPOST
|
||||
from content import validHashTag
|
||||
|
@ -4154,9 +4155,47 @@ def _testLimitWordLengths() -> None:
|
|||
assert result == "This is an exceptionally test"
|
||||
|
||||
|
||||
def _testLimitRepetedWords() -> None:
|
||||
print('limitRepeatedWords')
|
||||
text = \
|
||||
"This is a preamble.\n\n" + \
|
||||
"Same Same Same Same Same Same Same Same Same Same " + \
|
||||
"Same Same Same Same Same Same Same Same Same Same " + \
|
||||
"Same Same Same Same Same Same Same Same Same Same " + \
|
||||
"Same Same Same Same Same Same Same Same Same Same " + \
|
||||
"Same Same Same Same Same Same Same Same Same Same " + \
|
||||
"Same Same Same Same Same Same Same Same Same Same " + \
|
||||
"Same Same Same Same Same Same Same Same Same Same\n\n" + \
|
||||
"Some other text."
|
||||
expected = \
|
||||
"This is a preamble.\n\n" + \
|
||||
"Same Same Same Same Same Same\n\n" + \
|
||||
"Some other text."
|
||||
result = limitRepeatedWords(text, 6)
|
||||
assert result == expected
|
||||
|
||||
text = \
|
||||
"This is other preamble.\n\n" + \
|
||||
"Same Same Same Same Same Same Same Same Same Same " + \
|
||||
"Same Same Same Same Same Same Same Same Same Same " + \
|
||||
"Same Same Same Same Same Same Same Same Same Same " + \
|
||||
"Same Same Same Same Same Same Same Same Same Same " + \
|
||||
"Same Same Same Same Same Same Same Same Same Same " + \
|
||||
"Same Same Same Same Same Same Same Same Same Same " + \
|
||||
"Same Same Same Same Same Same Same Same Same Same " + \
|
||||
"Same Same Same Same Same Same Same Same Same Same " + \
|
||||
"Same Same Same Same Same Same Same Same Same Same"
|
||||
expected = \
|
||||
"This is other preamble.\n\n" + \
|
||||
"Same Same Same Same Same Same"
|
||||
result = limitRepeatedWords(text, 6)
|
||||
assert result == expected
|
||||
|
||||
|
||||
def runAllTests():
|
||||
print('Running tests...')
|
||||
updateDefaultThemesList(os.getcwd())
|
||||
_testLimitRepetedWords()
|
||||
_testLimitWordLengths()
|
||||
_testSwitchWords()
|
||||
_testFunctions()
|
||||
|
|
|
@ -10,6 +10,7 @@ __module_group__ = "Web Interface Columns"
|
|||
import os
|
||||
from datetime import datetime
|
||||
from content import removeLongWords
|
||||
from content import limitRepeatedWords
|
||||
from utils import removeHtml
|
||||
from utils import locatePost
|
||||
from utils import loadJson
|
||||
|
@ -265,6 +266,7 @@ def _htmlNewswire(baseDir: str, newswire: {}, nickname: str, moderator: bool,
|
|||
_votesIndicator(totalVotes, positiveVoting)
|
||||
|
||||
title = removeLongWords(item[0], 16, []).replace('\n', '<br>')
|
||||
title = limitRepeatedWords(title, 6)
|
||||
htmlStr += '<p class="newswireItemVotedOn">' + \
|
||||
'<a href="' + url + '" target="_blank" ' + \
|
||||
'rel="nofollow noopener noreferrer">' + \
|
||||
|
@ -293,6 +295,7 @@ def _htmlNewswire(baseDir: str, newswire: {}, nickname: str, moderator: bool,
|
|||
_votesIndicator(totalVotes, positiveVoting)
|
||||
|
||||
title = removeLongWords(item[0], 16, []).replace('\n', '<br>')
|
||||
title = limitRepeatedWords(title, 6)
|
||||
if moderator and moderatedItem:
|
||||
htmlStr += '<p class="newswireItemModerated">' + \
|
||||
'<a href="' + url + '" target="_blank" ' + \
|
||||
|
@ -417,6 +420,7 @@ def htmlCitations(baseDir: str, nickname: str, domain: str,
|
|||
dateShown = publishedDate.strftime("%Y-%m-%d %H:%M")
|
||||
|
||||
title = removeLongWords(item[0], 16, []).replace('\n', '<br>')
|
||||
title = limitRepeatedWords(title, 6)
|
||||
link = item[1]
|
||||
|
||||
citationValue = \
|
||||
|
|
|
@ -45,6 +45,7 @@ from utils import removeIdEnding
|
|||
from utils import getNicknameFromActor
|
||||
from utils import getDomainFromActor
|
||||
from utils import isEventPost
|
||||
from content import limitRepeatedWords
|
||||
from content import replaceEmojiFromTags
|
||||
from content import htmlReplaceQuoteMarks
|
||||
from content import htmlReplaceEmailQuote
|
||||
|
@ -1601,6 +1602,7 @@ def individualPostAsHtml(allowDownloads: bool,
|
|||
objectContent = \
|
||||
removeLongWords(postJsonObject['object']['content'], 40, [])
|
||||
objectContent = removeTextFormatting(objectContent)
|
||||
objectContent = limitRepeatedWords(objectContent, 6)
|
||||
objectContent = \
|
||||
switchWords(baseDir, nickname, domain, objectContent)
|
||||
objectContent = htmlReplaceEmailQuote(objectContent)
|
||||
|
|
Loading…
Reference in New Issue