mirror of https://gitlab.com/bashrc2/epicyon
Limit the number of times that the same word can be repeated
parent
e8553eb192
commit
7511af13d0
33
content.py
33
content.py
|
|
@ -866,6 +866,7 @@ def addHtmlTags(baseDir: str, httpPrefix: str,
|
||||||
content = addWebLinks(content)
|
content = addWebLinks(content)
|
||||||
if longWordsList:
|
if longWordsList:
|
||||||
content = removeLongWords(content, maxWordLength, longWordsList)
|
content = removeLongWords(content, maxWordLength, longWordsList)
|
||||||
|
content = limitRepeatedWords(content, 6)
|
||||||
content = content.replace(' --linebreak-- ', '</p><p>')
|
content = content.replace(' --linebreak-- ', '</p><p>')
|
||||||
content = htmlReplaceEmailQuote(content)
|
content = htmlReplaceEmailQuote(content)
|
||||||
return '<p>' + htmlReplaceQuoteMarks(content) + '</p>'
|
return '<p>' + htmlReplaceQuoteMarks(content) + '</p>'
|
||||||
|
|
@ -1053,3 +1054,35 @@ def extractTextFieldsInPOST(postBytes, boundary: str, debug: bool,
|
||||||
postValue += postLines[line]
|
postValue += postLines[line]
|
||||||
fields[postKey] = urllib.parse.unquote(postValue)
|
fields[postKey] = urllib.parse.unquote(postValue)
|
||||||
return fields
|
return fields
|
||||||
|
|
||||||
|
|
||||||
|
def limitRepeatedWords(text: str, maxRepeats: int) -> str:
|
||||||
|
"""Removes words which are repeated many times
|
||||||
|
"""
|
||||||
|
words = text.replace('\n', ' ').split(' ')
|
||||||
|
repeatCtr = 0
|
||||||
|
repeatedText = ''
|
||||||
|
replacements = {}
|
||||||
|
prevWord = ''
|
||||||
|
for word in words:
|
||||||
|
if word == prevWord:
|
||||||
|
repeatCtr += 1
|
||||||
|
if repeatedText:
|
||||||
|
repeatedText += ' ' + word
|
||||||
|
else:
|
||||||
|
repeatedText = word + ' ' + word
|
||||||
|
else:
|
||||||
|
if repeatCtr > maxRepeats:
|
||||||
|
newText = ((prevWord + ' ') * maxRepeats).strip()
|
||||||
|
replacements[prevWord] = [repeatedText, newText]
|
||||||
|
repeatCtr = 0
|
||||||
|
repeatedText = ''
|
||||||
|
prevWord = word
|
||||||
|
|
||||||
|
if repeatCtr > maxRepeats:
|
||||||
|
newText = ((prevWord + ' ') * maxRepeats).strip()
|
||||||
|
replacements[prevWord] = [repeatedText, newText]
|
||||||
|
|
||||||
|
for word, item in replacements.items():
|
||||||
|
text = text.replace(item[0], item[1])
|
||||||
|
return text
|
||||||
|
|
|
||||||
4
posts.py
4
posts.py
|
|
@ -61,6 +61,7 @@ from utils import removeHtml
|
||||||
from utils import dangerousMarkup
|
from utils import dangerousMarkup
|
||||||
from media import attachMedia
|
from media import attachMedia
|
||||||
from media import replaceYouTube
|
from media import replaceYouTube
|
||||||
|
from content import limitRepeatedWords
|
||||||
from content import tagExists
|
from content import tagExists
|
||||||
from content import removeLongWords
|
from content import removeLongWords
|
||||||
from content import addHtmlTags
|
from content import addHtmlTags
|
||||||
|
|
@ -4031,6 +4032,9 @@ def downloadAnnounce(session, baseDir: str, httpPrefix: str,
|
||||||
# remove any long words
|
# remove any long words
|
||||||
contentStr = removeLongWords(contentStr, 40, [])
|
contentStr = removeLongWords(contentStr, 40, [])
|
||||||
|
|
||||||
|
# Prevent the same word from being repeated many times
|
||||||
|
contentStr = limitRepeatedWords(contentStr, 6)
|
||||||
|
|
||||||
# remove text formatting, such as bold/italics
|
# remove text formatting, such as bold/italics
|
||||||
contentStr = removeTextFormatting(contentStr)
|
contentStr = removeTextFormatting(contentStr)
|
||||||
|
|
||||||
|
|
|
||||||
39
tests.py
39
tests.py
|
|
@ -94,6 +94,7 @@ from inbox import jsonPostAllowsComments
|
||||||
from inbox import validInbox
|
from inbox import validInbox
|
||||||
from inbox import validInboxFilenames
|
from inbox import validInboxFilenames
|
||||||
from categories import guessHashtagCategory
|
from categories import guessHashtagCategory
|
||||||
|
from content import limitRepeatedWords
|
||||||
from content import switchWords
|
from content import switchWords
|
||||||
from content import extractTextFieldsInPOST
|
from content import extractTextFieldsInPOST
|
||||||
from content import validHashTag
|
from content import validHashTag
|
||||||
|
|
@ -4154,9 +4155,47 @@ def _testLimitWordLengths() -> None:
|
||||||
assert result == "This is an exceptionally test"
|
assert result == "This is an exceptionally test"
|
||||||
|
|
||||||
|
|
||||||
|
def _testLimitRepetedWords() -> None:
|
||||||
|
print('limitRepeatedWords')
|
||||||
|
text = \
|
||||||
|
"This is a preamble.\n\n" + \
|
||||||
|
"Same Same Same Same Same Same Same Same Same Same " + \
|
||||||
|
"Same Same Same Same Same Same Same Same Same Same " + \
|
||||||
|
"Same Same Same Same Same Same Same Same Same Same " + \
|
||||||
|
"Same Same Same Same Same Same Same Same Same Same " + \
|
||||||
|
"Same Same Same Same Same Same Same Same Same Same " + \
|
||||||
|
"Same Same Same Same Same Same Same Same Same Same " + \
|
||||||
|
"Same Same Same Same Same Same Same Same Same Same\n\n" + \
|
||||||
|
"Some other text."
|
||||||
|
expected = \
|
||||||
|
"This is a preamble.\n\n" + \
|
||||||
|
"Same Same Same Same Same Same\n\n" + \
|
||||||
|
"Some other text."
|
||||||
|
result = limitRepeatedWords(text, 6)
|
||||||
|
assert result == expected
|
||||||
|
|
||||||
|
text = \
|
||||||
|
"This is other preamble.\n\n" + \
|
||||||
|
"Same Same Same Same Same Same Same Same Same Same " + \
|
||||||
|
"Same Same Same Same Same Same Same Same Same Same " + \
|
||||||
|
"Same Same Same Same Same Same Same Same Same Same " + \
|
||||||
|
"Same Same Same Same Same Same Same Same Same Same " + \
|
||||||
|
"Same Same Same Same Same Same Same Same Same Same " + \
|
||||||
|
"Same Same Same Same Same Same Same Same Same Same " + \
|
||||||
|
"Same Same Same Same Same Same Same Same Same Same " + \
|
||||||
|
"Same Same Same Same Same Same Same Same Same Same " + \
|
||||||
|
"Same Same Same Same Same Same Same Same Same Same"
|
||||||
|
expected = \
|
||||||
|
"This is other preamble.\n\n" + \
|
||||||
|
"Same Same Same Same Same Same"
|
||||||
|
result = limitRepeatedWords(text, 6)
|
||||||
|
assert result == expected
|
||||||
|
|
||||||
|
|
||||||
def runAllTests():
|
def runAllTests():
|
||||||
print('Running tests...')
|
print('Running tests...')
|
||||||
updateDefaultThemesList(os.getcwd())
|
updateDefaultThemesList(os.getcwd())
|
||||||
|
_testLimitRepetedWords()
|
||||||
_testLimitWordLengths()
|
_testLimitWordLengths()
|
||||||
_testSwitchWords()
|
_testSwitchWords()
|
||||||
_testFunctions()
|
_testFunctions()
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@ __module_group__ = "Web Interface Columns"
|
||||||
import os
|
import os
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from content import removeLongWords
|
from content import removeLongWords
|
||||||
|
from content import limitRepeatedWords
|
||||||
from utils import removeHtml
|
from utils import removeHtml
|
||||||
from utils import locatePost
|
from utils import locatePost
|
||||||
from utils import loadJson
|
from utils import loadJson
|
||||||
|
|
@ -265,6 +266,7 @@ def _htmlNewswire(baseDir: str, newswire: {}, nickname: str, moderator: bool,
|
||||||
_votesIndicator(totalVotes, positiveVoting)
|
_votesIndicator(totalVotes, positiveVoting)
|
||||||
|
|
||||||
title = removeLongWords(item[0], 16, []).replace('\n', '<br>')
|
title = removeLongWords(item[0], 16, []).replace('\n', '<br>')
|
||||||
|
title = limitRepeatedWords(title, 6)
|
||||||
htmlStr += '<p class="newswireItemVotedOn">' + \
|
htmlStr += '<p class="newswireItemVotedOn">' + \
|
||||||
'<a href="' + url + '" target="_blank" ' + \
|
'<a href="' + url + '" target="_blank" ' + \
|
||||||
'rel="nofollow noopener noreferrer">' + \
|
'rel="nofollow noopener noreferrer">' + \
|
||||||
|
|
@ -293,6 +295,7 @@ def _htmlNewswire(baseDir: str, newswire: {}, nickname: str, moderator: bool,
|
||||||
_votesIndicator(totalVotes, positiveVoting)
|
_votesIndicator(totalVotes, positiveVoting)
|
||||||
|
|
||||||
title = removeLongWords(item[0], 16, []).replace('\n', '<br>')
|
title = removeLongWords(item[0], 16, []).replace('\n', '<br>')
|
||||||
|
title = limitRepeatedWords(title, 6)
|
||||||
if moderator and moderatedItem:
|
if moderator and moderatedItem:
|
||||||
htmlStr += '<p class="newswireItemModerated">' + \
|
htmlStr += '<p class="newswireItemModerated">' + \
|
||||||
'<a href="' + url + '" target="_blank" ' + \
|
'<a href="' + url + '" target="_blank" ' + \
|
||||||
|
|
@ -417,6 +420,7 @@ def htmlCitations(baseDir: str, nickname: str, domain: str,
|
||||||
dateShown = publishedDate.strftime("%Y-%m-%d %H:%M")
|
dateShown = publishedDate.strftime("%Y-%m-%d %H:%M")
|
||||||
|
|
||||||
title = removeLongWords(item[0], 16, []).replace('\n', '<br>')
|
title = removeLongWords(item[0], 16, []).replace('\n', '<br>')
|
||||||
|
title = limitRepeatedWords(title, 6)
|
||||||
link = item[1]
|
link = item[1]
|
||||||
|
|
||||||
citationValue = \
|
citationValue = \
|
||||||
|
|
|
||||||
|
|
@ -45,6 +45,7 @@ from utils import removeIdEnding
|
||||||
from utils import getNicknameFromActor
|
from utils import getNicknameFromActor
|
||||||
from utils import getDomainFromActor
|
from utils import getDomainFromActor
|
||||||
from utils import isEventPost
|
from utils import isEventPost
|
||||||
|
from content import limitRepeatedWords
|
||||||
from content import replaceEmojiFromTags
|
from content import replaceEmojiFromTags
|
||||||
from content import htmlReplaceQuoteMarks
|
from content import htmlReplaceQuoteMarks
|
||||||
from content import htmlReplaceEmailQuote
|
from content import htmlReplaceEmailQuote
|
||||||
|
|
@ -1601,6 +1602,7 @@ def individualPostAsHtml(allowDownloads: bool,
|
||||||
objectContent = \
|
objectContent = \
|
||||||
removeLongWords(postJsonObject['object']['content'], 40, [])
|
removeLongWords(postJsonObject['object']['content'], 40, [])
|
||||||
objectContent = removeTextFormatting(objectContent)
|
objectContent = removeTextFormatting(objectContent)
|
||||||
|
objectContent = limitRepeatedWords(objectContent, 6)
|
||||||
objectContent = \
|
objectContent = \
|
||||||
switchWords(baseDir, nickname, domain, objectContent)
|
switchWords(baseDir, nickname, domain, objectContent)
|
||||||
objectContent = htmlReplaceEmailQuote(objectContent)
|
objectContent = htmlReplaceEmailQuote(objectContent)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue