forked from indymedia/epicyon
Improve checking for bad markup
parent
ac0dd52c78
commit
1a15d07dfd
26
content.py
26
content.py
|
@ -14,6 +14,32 @@ from utils import fileLastModified
|
|||
from utils import getLinkPrefixes
|
||||
|
||||
|
||||
def dangerousMarkup(content: str) -> bool:
|
||||
"""Returns true if the given content contains dangerous html markup
|
||||
"""
|
||||
if '<' not in content:
|
||||
return False
|
||||
if '>' not in content:
|
||||
return False
|
||||
contentSections = content.split('<')
|
||||
invalidStrings = ('script', 'canvas', 'style', 'abbr',
|
||||
'frame', 'iframe', 'html', 'body',
|
||||
'hr', 'br')
|
||||
for markup in contentSections:
|
||||
if '>' not in markup:
|
||||
continue
|
||||
markup = markup.split('>')[0].strip()
|
||||
if ' ' not in markup:
|
||||
for badStr in invalidStrings:
|
||||
if badStr in markup:
|
||||
return True
|
||||
else:
|
||||
for badStr in invalidStrings:
|
||||
if badStr + ' ' in markup:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def switchWords(baseDir: str, nickname: str, domain: str, content: str) -> str:
|
||||
"""Performs word replacements. eg. Trump -> The Orange Menace
|
||||
"""
|
||||
|
|
21
inbox.py
21
inbox.py
|
@ -63,6 +63,7 @@ from media import replaceYouTube
|
|||
from git import isGitPatch
|
||||
from git import receiveGitPatch
|
||||
from followingCalendar import receivingCalendarEvents
|
||||
from content import dangerousMarkup
|
||||
|
||||
|
||||
def storeHashTags(baseDir: str, nickname: str, postJsonObject: {}) -> None:
|
||||
|
@ -1599,22 +1600,20 @@ def validPostContent(baseDir: str, nickname: str, domain: str,
|
|||
return False
|
||||
if 'Z' not in messageJson['object']['published']:
|
||||
return False
|
||||
|
||||
if isGitPatch(baseDir, nickname, domain,
|
||||
messageJson['object']['type'],
|
||||
messageJson['object']['summary'],
|
||||
messageJson['object']['content']):
|
||||
return True
|
||||
# check for bad html
|
||||
invalidStrings = ('<script>', '</script>', '</canvas>',
|
||||
'</style>', '</abbr>',
|
||||
'</html>', '</body>', '<br>', '<hr>')
|
||||
for badStr in invalidStrings:
|
||||
if badStr in messageJson['object']['content']:
|
||||
if messageJson['object'].get('id'):
|
||||
print('REJECT ARBITRARY HTML: ' + messageJson['object']['id'])
|
||||
print('REJECT ARBITRARY HTML: bad string in post - ' +
|
||||
messageJson['object']['content'])
|
||||
return False
|
||||
|
||||
if dangerousMarkup(messageJson['object']['content']):
|
||||
if messageJson['object'].get('id'):
|
||||
print('REJECT ARBITRARY HTML: ' + messageJson['object']['id'])
|
||||
print('REJECT ARBITRARY HTML: bad string in post - ' +
|
||||
messageJson['object']['content'])
|
||||
return False
|
||||
|
||||
# check (rough) number of mentions
|
||||
mentionsEst = estimateNumberOfMentions(messageJson['object']['content'])
|
||||
if mentionsEst > maxMentions:
|
||||
|
|
33
tests.py
33
tests.py
|
@ -64,6 +64,7 @@ from media import getAttachmentMediaType
|
|||
from delete import sendDeleteViaServer
|
||||
from inbox import validInbox
|
||||
from inbox import validInboxFilenames
|
||||
from content import dangerousMarkup
|
||||
from content import removeHtml
|
||||
from content import addWebLinks
|
||||
from content import replaceEmojiFromTags
|
||||
|
@ -1882,8 +1883,40 @@ def testRemoveHtml():
|
|||
assert(removeHtml(testStr) == 'This string has html.')
|
||||
|
||||
|
||||
def testDangerousMarkup():
|
||||
print('testDangerousMarkup')
|
||||
content = '<p>This is a valid message</p>'
|
||||
assert(not dangerousMarkup(content))
|
||||
content = 'This is a valid message without markup'
|
||||
assert(not dangerousMarkup(content))
|
||||
content = '<p>This is a valid-looking message. But wait... ' + \
|
||||
'<script>document.getElementById("concentrated")' + \
|
||||
'.innerHTML = "evil";</script></p>'
|
||||
assert(dangerousMarkup(content))
|
||||
content = '<p>This is a valid-looking message. But wait... ' + \
|
||||
'<script src="https://evilsite/payload.js" /></p>'
|
||||
assert(dangerousMarkup(content))
|
||||
content = '<p>This message embeds an evil frame.' + \
|
||||
'<iframe src="somesite"></iframe></p>'
|
||||
assert(dangerousMarkup(content))
|
||||
content = '<p>This message tries to obfuscate an evil frame.' + \
|
||||
'< iframe src = "somesite"></ iframe ></p>'
|
||||
assert(dangerousMarkup(content))
|
||||
content = '<p>This message is not necessarily evil, but annoying.' + \
|
||||
'<hr><br><br><br><br><br><br><br><hr><hr></p>'
|
||||
assert(dangerousMarkup(content))
|
||||
content = '<p>This message contans a ' + \
|
||||
'<a href="https://validsite/index.html">valid link.</a></p>'
|
||||
assert(not dangerousMarkup(content))
|
||||
content = '<p>This message contans a ' + \
|
||||
'<a href="https://validsite/iframe.html">' + \
|
||||
'valid link having invalid but harmless name.</a></p>'
|
||||
assert(not dangerousMarkup(content))
|
||||
|
||||
|
||||
def runAllTests():
|
||||
print('Running tests...')
|
||||
testDangerousMarkup()
|
||||
testRemoveHtml()
|
||||
testSiteIsActive()
|
||||
testJsonld()
|
||||
|
|
Loading…
Reference in New Issue