Remove html from searched profile descriptions

main
Bob Mottram 2020-07-07 15:18:02 +01:00
parent be39532262
commit 4c14d4a649
4 changed files with 33 additions and 1 deletions

View File

@ -400,6 +400,24 @@ def removeTextFormatting(content: str) -> str:
return content
def removeHtml(content: str) -> str:
"""Removes html links from the given content.
Used to ensure that profile descriptions don't contain dubious content
"""
if '<' not in content:
return content
removing = False
result = ''
for ch in content:
if ch == '<':
removing = True
elif ch == '>':
removing = False
elif not removing:
result += ch
return result
def removeLongWords(content: str, maxWordLength: int,
longWordsList: []) -> str:
"""Breaks up long words so that on mobile screens this doesn't

View File

@ -747,7 +747,7 @@ class PubServer(BaseHTTPRequestHandler):
self._write(msg)
print('instance metadata sent')
return True
if self.path.startswith('/api/v1/instance/peers'):
if self.path.startswith('/api/v1/instance/peers'):
# This is just a dummy result.
# Showing the full list of peers would have privacy implications.
# On a large instance you are somewhat lost in the crowd, but on

View File

@ -64,6 +64,7 @@ from media import getAttachmentMediaType
from delete import sendDeleteViaServer
from inbox import validInbox
from inbox import validInboxFilenames
from content import removeHtml
from content import addWebLinks
from content import replaceEmojiFromTags
from content import addHtmlTags
@ -1873,8 +1874,18 @@ def testSiteIsActive():
assert(not siteIsActive('https://notarealwebsite.a.b.c'))
def testRemoveHtml():
print('testRemoveHtml')
testStr = 'This string has no html.'
assert(removeHtml(testStr) == testStr)
testStr = 'This string <a href="1234.567">has html</a>.'
print(removeHtml(testStr))
assert(removeHtml(testStr) == 'This string has html.')
def runAllTests():
print('Running tests...')
testRemoveHtml()
testSiteIsActive()
testJsonld()
testRemoveTextFormatting()

View File

@ -63,6 +63,7 @@ from content import getMentionsFromHtml
from content import addHtmlTags
from content import replaceEmojiFromTags
from content import removeLongWords
from content import removeHtml
from config import getConfigParam
from skills import getSkills
from cache import getPersonFromCache
@ -6215,6 +6216,8 @@ def htmlProfileAfterSearch(recentPostsCache: {}, maxRecentPosts: int,
profileJson['summary'].replace('<br>', '\n')
avatarDescription = avatarDescription.replace('<p>', '')
avatarDescription = avatarDescription.replace('</p>', '')
if '<' in avatarDescription:
avatarDescription = removeHtml(avatarDescription)
profileStr = ' <div class="hero-image">'
profileStr += ' <div class="hero-text">'
if avatarUrl: