From fd300836968409f32862e616ed6c7940f95645d6 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Wed, 19 May 2021 12:29:37 +0100 Subject: [PATCH] Detect encoded script markup --- tests.py | 7 +++++-- utils.py | 52 ++++++++++++++++++++++++++++------------------------ 2 files changed, 33 insertions(+), 26 deletions(-) diff --git a/tests.py b/tests.py index 5a267029c..22d29ebb7 100644 --- a/tests.py +++ b/tests.py @@ -2268,6 +2268,11 @@ def testDangerousMarkup(): '.innerHTML = "evil";

' assert(dangerousMarkup(content, allowLocalNetworkAccess)) + content = '

This is a valid-looking message. But wait... ' + \ + '<script>document.getElementById("concentrated")' + \ + '.innerHTML = "evil";</script>

' + assert(dangerousMarkup(content, allowLocalNetworkAccess)) + content = '

This html contains more than you expected... ' + \ '

' @@ -3646,8 +3651,6 @@ def testSpoofGeolocation() -> None: "%Y-%m-%d %H:%M") coords = spoofGeolocation('', 'new york, usa', currTime, decoySeed, citiesList) - #coords = spoofGeolocation('', 'berlin, germany', currTime, - # decoySeed, citiesList) longitude = coords[1] if coords[3] == 'W': longitude = -coords[1] diff --git a/utils.py b/utils.py index 0a54a3148..e7e1382d3 100644 --- a/utils.py +++ b/utils.py @@ -663,32 +663,36 @@ def getLocalNetworkAddresses() -> []: def dangerousMarkup(content: str, allowLocalNetworkAccess: bool) -> bool: """Returns true if the given content contains dangerous html markup """ - if '<' not in content: - return False - if '>' not in content: - return False - contentSections = content.split('<') - invalidPartials = () - if not allowLocalNetworkAccess: - invalidPartials = getLocalNetworkAddresses() - invalidStrings = ('script', 'canvas', 'style', 'abbr', - 'frame', 'iframe', 'html', 'body', - 'hr', 'allow-popups', 'allow-scripts') - for markup in contentSections: - if '>' not in markup: + separators = (['<', '>'], ['<', '>']) + for separatorStyle in separators: + startChar = separatorStyle[0] + endChar = separatorStyle[1] + if startChar not in content: continue - markup = markup.split('>')[0].strip() - for partialMatch in invalidPartials: - if partialMatch in markup: - return True - if ' ' not in markup: - for badStr in invalidStrings: - if badStr in markup: - return True - else: - for badStr in invalidStrings: - if badStr + ' ' in markup: + if endChar not in content: + continue + contentSections = content.split(startChar) + invalidPartials = () + if not allowLocalNetworkAccess: + invalidPartials = getLocalNetworkAddresses() + invalidStrings = ('script', 'canvas', 'style', 'abbr', + 'frame', 'iframe', 'html', 'body', + 'hr', 'allow-popups', 'allow-scripts') + for markup in contentSections: + if endChar not in markup: + continue + markup = markup.split(endChar)[0].strip() + for partialMatch in invalidPartials: + if partialMatch in markup: return True + if ' ' not in markup: + for badStr in invalidStrings: + if badStr in markup: + return True + else: + for badStr in invalidStrings: + if badStr + ' ' in markup: + return True return False