Another long lines failure case

merge-requests/20/merge
Bob Mottram 2021-03-17 21:17:27 +00:00
parent d5f82c568d
commit eb9135ac3e
2 changed files with 20 additions and 0 deletions

View File

@ -643,6 +643,8 @@ def removeLongWords(content: str, maxWordLength: int,
if wordStr not in longWordsList:
longWordsList.append(wordStr)
for wordStr in longWordsList:
if wordStr.startswith('<p>'):
wordStr = wordStr.replace('<p>', '')
if wordStr.startswith('<'):
continue
if len(wordStr) == 76:
@ -678,6 +680,8 @@ def removeLongWords(content: str, maxWordLength: int,
continue
if '<' in wordStr:
replaceWord = wordStr.split('<', 1)[0]
# if len(replaceWord) > maxWordLength:
# replaceWord = replaceWord[:maxWordLength]
content = content.replace(wordStr, replaceWord)
wordStr = replaceWord
if '/' in wordStr:

View File

@ -1911,6 +1911,22 @@ def testActorParsing():
def testWebLinks():
print('testWebLinks')
exampleText = \
"<p>Aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + \
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + \
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + \
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + \
" <a href=\"https://domain.ugh/tags/turbot\" class=\"mention " + \
"hashtag\" rel=\"tag\">#<span>turbot</span></a> <a href=\"" + \
"https://domain.ugh/tags/haddock\" class=\"mention hashtag\"" + \
" rel=\"tag\">#<span>haddock</span></a></p>"
resultText = removeLongWords(exampleText, 40, [])
assert resultText == "<p>Aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + \
" <a href=\"https://domain.ugh/tags/turbot\" class=\"mention " + \
"hashtag\" rel=\"tag\">#<span>turbot</span></a> " + \
"<a href=\"https://domain.ugh/tags/haddock\" " + \
"class=\"mention hashtag\" rel=\"tag\">#<span>haddock</span></a></p>"
exampleText = \
'<p><span class=\"h-card\"><a href=\"https://something/@orother' + \
'\" class=\"u-url mention\">@<span>foo</span></a></span> Some ' + \