From 4c14d4a6499c2f45370506a84bc535e1b7dea8c8 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Tue, 7 Jul 2020 15:18:02 +0100 Subject: [PATCH] Remove html from searched profile descriptions --- content.py | 18 ++++++++++++++++++ daemon.py | 2 +- tests.py | 11 +++++++++++ webinterface.py | 3 +++ 4 files changed, 33 insertions(+), 1 deletion(-) diff --git a/content.py b/content.py index 6fae45675..1df206fdd 100644 --- a/content.py +++ b/content.py @@ -400,6 +400,24 @@ def removeTextFormatting(content: str) -> str: return content +def removeHtml(content: str) -> str: + """Removes html links from the given content. + Used to ensure that profile descriptions don't contain dubious content + """ + if '<' not in content: + return content + removing = False + result = '' + for ch in content: + if ch == '<': + removing = True + elif ch == '>': + removing = False + elif not removing: + result += ch + return result + + def removeLongWords(content: str, maxWordLength: int, longWordsList: []) -> str: """Breaks up long words so that on mobile screens this doesn't diff --git a/daemon.py b/daemon.py index 53a9ae6a0..64aa7263f 100644 --- a/daemon.py +++ b/daemon.py @@ -747,7 +747,7 @@ class PubServer(BaseHTTPRequestHandler): self._write(msg) print('instance metadata sent') return True - if self.path.startswith('/api/v1/instance/peers'): + if self.path.startswith('/api/v1/instance/peers'): # This is just a dummy result. # Showing the full list of peers would have privacy implications. # On a large instance you are somewhat lost in the crowd, but on diff --git a/tests.py b/tests.py index a0a60dfd0..ba21720bc 100644 --- a/tests.py +++ b/tests.py @@ -64,6 +64,7 @@ from media import getAttachmentMediaType from delete import sendDeleteViaServer from inbox import validInbox from inbox import validInboxFilenames +from content import removeHtml from content import addWebLinks from content import replaceEmojiFromTags from content import addHtmlTags @@ -1873,8 +1874,18 @@ def testSiteIsActive(): assert(not siteIsActive('https://notarealwebsite.a.b.c')) +def testRemoveHtml(): + print('testRemoveHtml') + testStr = 'This string has no html.' + assert(removeHtml(testStr) == testStr) + testStr = 'This string has html.' + print(removeHtml(testStr)) + assert(removeHtml(testStr) == 'This string has html.') + + def runAllTests(): print('Running tests...') + testRemoveHtml() testSiteIsActive() testJsonld() testRemoveTextFormatting() diff --git a/webinterface.py b/webinterface.py index a8f7521aa..dad767950 100644 --- a/webinterface.py +++ b/webinterface.py @@ -63,6 +63,7 @@ from content import getMentionsFromHtml from content import addHtmlTags from content import replaceEmojiFromTags from content import removeLongWords +from content import removeHtml from config import getConfigParam from skills import getSkills from cache import getPersonFromCache @@ -6215,6 +6216,8 @@ def htmlProfileAfterSearch(recentPostsCache: {}, maxRecentPosts: int, profileJson['summary'].replace('
', '\n') avatarDescription = avatarDescription.replace('

', '') avatarDescription = avatarDescription.replace('

', '') + if '<' in avatarDescription: + avatarDescription = removeHtml(avatarDescription) profileStr = '
' profileStr += '
' if avatarUrl: