Improve checking for bad markup

main
Bob Mottram 2020-07-10 14:15:01 +00:00
parent ac0dd52c78
commit 1a15d07dfd
3 changed files with 69 additions and 11 deletions

View File

@ -14,6 +14,32 @@ from utils import fileLastModified
from utils import getLinkPrefixes from utils import getLinkPrefixes
def dangerousMarkup(content: str) -> bool:
"""Returns true if the given content contains dangerous html markup
"""
if '<' not in content:
return False
if '>' not in content:
return False
contentSections = content.split('<')
invalidStrings = ('script', 'canvas', 'style', 'abbr',
'frame', 'iframe', 'html', 'body',
'hr', 'br')
for markup in contentSections:
if '>' not in markup:
continue
markup = markup.split('>')[0].strip()
if ' ' not in markup:
for badStr in invalidStrings:
if badStr in markup:
return True
else:
for badStr in invalidStrings:
if badStr + ' ' in markup:
return True
return False
def switchWords(baseDir: str, nickname: str, domain: str, content: str) -> str: def switchWords(baseDir: str, nickname: str, domain: str, content: str) -> str:
"""Performs word replacements. eg. Trump -> The Orange Menace """Performs word replacements. eg. Trump -> The Orange Menace
""" """

View File

@ -63,6 +63,7 @@ from media import replaceYouTube
from git import isGitPatch from git import isGitPatch
from git import receiveGitPatch from git import receiveGitPatch
from followingCalendar import receivingCalendarEvents from followingCalendar import receivingCalendarEvents
from content import dangerousMarkup
def storeHashTags(baseDir: str, nickname: str, postJsonObject: {}) -> None: def storeHashTags(baseDir: str, nickname: str, postJsonObject: {}) -> None:
@ -1599,22 +1600,20 @@ def validPostContent(baseDir: str, nickname: str, domain: str,
return False return False
if 'Z' not in messageJson['object']['published']: if 'Z' not in messageJson['object']['published']:
return False return False
if isGitPatch(baseDir, nickname, domain, if isGitPatch(baseDir, nickname, domain,
messageJson['object']['type'], messageJson['object']['type'],
messageJson['object']['summary'], messageJson['object']['summary'],
messageJson['object']['content']): messageJson['object']['content']):
return True return True
# check for bad html
invalidStrings = ('<script>', '</script>', '</canvas>', if dangerousMarkup(messageJson['object']['content']):
'</style>', '</abbr>', if messageJson['object'].get('id'):
'</html>', '</body>', '<br>', '<hr>') print('REJECT ARBITRARY HTML: ' + messageJson['object']['id'])
for badStr in invalidStrings: print('REJECT ARBITRARY HTML: bad string in post - ' +
if badStr in messageJson['object']['content']: messageJson['object']['content'])
if messageJson['object'].get('id'): return False
print('REJECT ARBITRARY HTML: ' + messageJson['object']['id'])
print('REJECT ARBITRARY HTML: bad string in post - ' +
messageJson['object']['content'])
return False
# check (rough) number of mentions # check (rough) number of mentions
mentionsEst = estimateNumberOfMentions(messageJson['object']['content']) mentionsEst = estimateNumberOfMentions(messageJson['object']['content'])
if mentionsEst > maxMentions: if mentionsEst > maxMentions:

View File

@ -64,6 +64,7 @@ from media import getAttachmentMediaType
from delete import sendDeleteViaServer from delete import sendDeleteViaServer
from inbox import validInbox from inbox import validInbox
from inbox import validInboxFilenames from inbox import validInboxFilenames
from content import dangerousMarkup
from content import removeHtml from content import removeHtml
from content import addWebLinks from content import addWebLinks
from content import replaceEmojiFromTags from content import replaceEmojiFromTags
@ -1882,8 +1883,40 @@ def testRemoveHtml():
assert(removeHtml(testStr) == 'This string has html.') assert(removeHtml(testStr) == 'This string has html.')
def testDangerousMarkup():
print('testDangerousMarkup')
content = '<p>This is a valid message</p>'
assert(not dangerousMarkup(content))
content = 'This is a valid message without markup'
assert(not dangerousMarkup(content))
content = '<p>This is a valid-looking message. But wait... ' + \
'<script>document.getElementById("concentrated")' + \
'.innerHTML = "evil";</script></p>'
assert(dangerousMarkup(content))
content = '<p>This is a valid-looking message. But wait... ' + \
'<script src="https://evilsite/payload.js" /></p>'
assert(dangerousMarkup(content))
content = '<p>This message embeds an evil frame.' + \
'<iframe src="somesite"></iframe></p>'
assert(dangerousMarkup(content))
content = '<p>This message tries to obfuscate an evil frame.' + \
'< iframe src = "somesite"></ iframe ></p>'
assert(dangerousMarkup(content))
content = '<p>This message is not necessarily evil, but annoying.' + \
'<hr><br><br><br><br><br><br><br><hr><hr></p>'
assert(dangerousMarkup(content))
content = '<p>This message contans a ' + \
'<a href="https://validsite/index.html">valid link.</a></p>'
assert(not dangerousMarkup(content))
content = '<p>This message contans a ' + \
'<a href="https://validsite/iframe.html">' + \
'valid link having invalid but harmless name.</a></p>'
assert(not dangerousMarkup(content))
def runAllTests(): def runAllTests():
print('Running tests...') print('Running tests...')
testDangerousMarkup()
testRemoveHtml() testRemoveHtml()
testSiteIsActive() testSiteIsActive()
testJsonld() testJsonld()