diff --git a/tests.py b/tests.py index f66d6bb6f..233e8d029 100644 --- a/tests.py +++ b/tests.py @@ -2275,6 +2275,8 @@ def testRemoveHtml(): assert(removeHtml(testStr) == testStr) testStr = 'This string has html.' assert(removeHtml(testStr) == 'This string has html.') + testStr = '

This string has.

Two paragraphs.

' + assert(removeHtml(testStr) == 'This string has. Two paragraphs.') def testDangerousCSS(): diff --git a/utils.py b/utils.py index 25e184efa..16250f7d4 100644 --- a/utils.py +++ b/utils.py @@ -263,8 +263,18 @@ def removeHtml(content: str) -> str: removing = False elif not removing: result += ch - result = result.replace(' ', ' ') - result = result.replace('.', '. ') + + plainText = result.replace(' ', ' ') + + # insert spaces after full stops + strLen = len(plainText) + result = '' + for i in range(strLen): + result += plainText[i] + if plainText[i] == '.' and i < strLen - 1: + if plainText[i + 1] >= 'A' and plainText[i + 1] <= 'Z': + result += ' ' + result = result.replace(' ', ' ').strip() return result