Remove html from searched profile descriptions

main
Bob Mottram 2020-07-07 15:18:02 +01:00
parent be39532262
commit 4c14d4a649
4 changed files with 33 additions and 1 deletions

View File

@ -400,6 +400,24 @@ def removeTextFormatting(content: str) -> str:
return content return content
def removeHtml(content: str) -> str:
"""Removes html links from the given content.
Used to ensure that profile descriptions don't contain dubious content
"""
if '<' not in content:
return content
removing = False
result = ''
for ch in content:
if ch == '<':
removing = True
elif ch == '>':
removing = False
elif not removing:
result += ch
return result
def removeLongWords(content: str, maxWordLength: int, def removeLongWords(content: str, maxWordLength: int,
longWordsList: []) -> str: longWordsList: []) -> str:
"""Breaks up long words so that on mobile screens this doesn't """Breaks up long words so that on mobile screens this doesn't

View File

@ -64,6 +64,7 @@ from media import getAttachmentMediaType
from delete import sendDeleteViaServer from delete import sendDeleteViaServer
from inbox import validInbox from inbox import validInbox
from inbox import validInboxFilenames from inbox import validInboxFilenames
from content import removeHtml
from content import addWebLinks from content import addWebLinks
from content import replaceEmojiFromTags from content import replaceEmojiFromTags
from content import addHtmlTags from content import addHtmlTags
@ -1873,8 +1874,18 @@ def testSiteIsActive():
assert(not siteIsActive('https://notarealwebsite.a.b.c')) assert(not siteIsActive('https://notarealwebsite.a.b.c'))
def testRemoveHtml():
print('testRemoveHtml')
testStr = 'This string has no html.'
assert(removeHtml(testStr) == testStr)
testStr = 'This string <a href="1234.567">has html</a>.'
print(removeHtml(testStr))
assert(removeHtml(testStr) == 'This string has html.')
def runAllTests(): def runAllTests():
print('Running tests...') print('Running tests...')
testRemoveHtml()
testSiteIsActive() testSiteIsActive()
testJsonld() testJsonld()
testRemoveTextFormatting() testRemoveTextFormatting()

View File

@ -63,6 +63,7 @@ from content import getMentionsFromHtml
from content import addHtmlTags from content import addHtmlTags
from content import replaceEmojiFromTags from content import replaceEmojiFromTags
from content import removeLongWords from content import removeLongWords
from content import removeHtml
from config import getConfigParam from config import getConfigParam
from skills import getSkills from skills import getSkills
from cache import getPersonFromCache from cache import getPersonFromCache
@ -6215,6 +6216,8 @@ def htmlProfileAfterSearch(recentPostsCache: {}, maxRecentPosts: int,
profileJson['summary'].replace('<br>', '\n') profileJson['summary'].replace('<br>', '\n')
avatarDescription = avatarDescription.replace('<p>', '') avatarDescription = avatarDescription.replace('<p>', '')
avatarDescription = avatarDescription.replace('</p>', '') avatarDescription = avatarDescription.replace('</p>', '')
if '<' in avatarDescription:
avatarDescription = removeHtml(avatarDescription)
profileStr = ' <div class="hero-image">' profileStr = ' <div class="hero-image">'
profileStr += ' <div class="hero-text">' profileStr += ' <div class="hero-text">'
if avatarUrl: if avatarUrl: