Improve checking for bad markup

main
Bob Mottram 2020-07-10 14:15:01 +00:00
parent ac0dd52c78
commit 1a15d07dfd
3 changed files with 69 additions and 11 deletions

View File

@ -14,6 +14,32 @@ from utils import fileLastModified
from utils import getLinkPrefixes
def dangerousMarkup(content: str) -> bool:
"""Returns true if the given content contains dangerous html markup
"""
if '<' not in content:
return False
if '>' not in content:
return False
contentSections = content.split('<')
invalidStrings = ('script', 'canvas', 'style', 'abbr',
'frame', 'iframe', 'html', 'body',
'hr', 'br')
for markup in contentSections:
if '>' not in markup:
continue
markup = markup.split('>')[0].strip()
if ' ' not in markup:
for badStr in invalidStrings:
if badStr in markup:
return True
else:
for badStr in invalidStrings:
if badStr + ' ' in markup:
return True
return False
def switchWords(baseDir: str, nickname: str, domain: str, content: str) -> str:
"""Performs word replacements. eg. Trump -> The Orange Menace
"""

View File

@ -63,6 +63,7 @@ from media import replaceYouTube
from git import isGitPatch
from git import receiveGitPatch
from followingCalendar import receivingCalendarEvents
from content import dangerousMarkup
def storeHashTags(baseDir: str, nickname: str, postJsonObject: {}) -> None:
@ -1599,22 +1600,20 @@ def validPostContent(baseDir: str, nickname: str, domain: str,
return False
if 'Z' not in messageJson['object']['published']:
return False
if isGitPatch(baseDir, nickname, domain,
messageJson['object']['type'],
messageJson['object']['summary'],
messageJson['object']['content']):
return True
# check for bad html
invalidStrings = ('<script>', '</script>', '</canvas>',
'</style>', '</abbr>',
'</html>', '</body>', '<br>', '<hr>')
for badStr in invalidStrings:
if badStr in messageJson['object']['content']:
if dangerousMarkup(messageJson['object']['content']):
if messageJson['object'].get('id'):
print('REJECT ARBITRARY HTML: ' + messageJson['object']['id'])
print('REJECT ARBITRARY HTML: bad string in post - ' +
messageJson['object']['content'])
return False
# check (rough) number of mentions
mentionsEst = estimateNumberOfMentions(messageJson['object']['content'])
if mentionsEst > maxMentions:

View File

@ -64,6 +64,7 @@ from media import getAttachmentMediaType
from delete import sendDeleteViaServer
from inbox import validInbox
from inbox import validInboxFilenames
from content import dangerousMarkup
from content import removeHtml
from content import addWebLinks
from content import replaceEmojiFromTags
@ -1882,8 +1883,40 @@ def testRemoveHtml():
assert(removeHtml(testStr) == 'This string has html.')
def testDangerousMarkup():
print('testDangerousMarkup')
content = '<p>This is a valid message</p>'
assert(not dangerousMarkup(content))
content = 'This is a valid message without markup'
assert(not dangerousMarkup(content))
content = '<p>This is a valid-looking message. But wait... ' + \
'<script>document.getElementById("concentrated")' + \
'.innerHTML = "evil";</script></p>'
assert(dangerousMarkup(content))
content = '<p>This is a valid-looking message. But wait... ' + \
'<script src="https://evilsite/payload.js" /></p>'
assert(dangerousMarkup(content))
content = '<p>This message embeds an evil frame.' + \
'<iframe src="somesite"></iframe></p>'
assert(dangerousMarkup(content))
content = '<p>This message tries to obfuscate an evil frame.' + \
'< iframe src = "somesite"></ iframe ></p>'
assert(dangerousMarkup(content))
content = '<p>This message is not necessarily evil, but annoying.' + \
'<hr><br><br><br><br><br><br><br><hr><hr></p>'
assert(dangerousMarkup(content))
content = '<p>This message contans a ' + \
'<a href="https://validsite/index.html">valid link.</a></p>'
assert(not dangerousMarkup(content))
content = '<p>This message contans a ' + \
'<a href="https://validsite/iframe.html">' + \
'valid link having invalid but harmless name.</a></p>'
assert(not dangerousMarkup(content))
def runAllTests():
print('Running tests...')
testDangerousMarkup()
testRemoveHtml()
testSiteIsActive()
testJsonld()