forked from indymedia/epicyon
Improve checking for bad markup
parent
ac0dd52c78
commit
1a15d07dfd
26
content.py
26
content.py
|
@ -14,6 +14,32 @@ from utils import fileLastModified
|
||||||
from utils import getLinkPrefixes
|
from utils import getLinkPrefixes
|
||||||
|
|
||||||
|
|
||||||
|
def dangerousMarkup(content: str) -> bool:
|
||||||
|
"""Returns true if the given content contains dangerous html markup
|
||||||
|
"""
|
||||||
|
if '<' not in content:
|
||||||
|
return False
|
||||||
|
if '>' not in content:
|
||||||
|
return False
|
||||||
|
contentSections = content.split('<')
|
||||||
|
invalidStrings = ('script', 'canvas', 'style', 'abbr',
|
||||||
|
'frame', 'iframe', 'html', 'body',
|
||||||
|
'hr', 'br')
|
||||||
|
for markup in contentSections:
|
||||||
|
if '>' not in markup:
|
||||||
|
continue
|
||||||
|
markup = markup.split('>')[0].strip()
|
||||||
|
if ' ' not in markup:
|
||||||
|
for badStr in invalidStrings:
|
||||||
|
if badStr in markup:
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
for badStr in invalidStrings:
|
||||||
|
if badStr + ' ' in markup:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def switchWords(baseDir: str, nickname: str, domain: str, content: str) -> str:
|
def switchWords(baseDir: str, nickname: str, domain: str, content: str) -> str:
|
||||||
"""Performs word replacements. eg. Trump -> The Orange Menace
|
"""Performs word replacements. eg. Trump -> The Orange Menace
|
||||||
"""
|
"""
|
||||||
|
|
21
inbox.py
21
inbox.py
|
@ -63,6 +63,7 @@ from media import replaceYouTube
|
||||||
from git import isGitPatch
|
from git import isGitPatch
|
||||||
from git import receiveGitPatch
|
from git import receiveGitPatch
|
||||||
from followingCalendar import receivingCalendarEvents
|
from followingCalendar import receivingCalendarEvents
|
||||||
|
from content import dangerousMarkup
|
||||||
|
|
||||||
|
|
||||||
def storeHashTags(baseDir: str, nickname: str, postJsonObject: {}) -> None:
|
def storeHashTags(baseDir: str, nickname: str, postJsonObject: {}) -> None:
|
||||||
|
@ -1599,22 +1600,20 @@ def validPostContent(baseDir: str, nickname: str, domain: str,
|
||||||
return False
|
return False
|
||||||
if 'Z' not in messageJson['object']['published']:
|
if 'Z' not in messageJson['object']['published']:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if isGitPatch(baseDir, nickname, domain,
|
if isGitPatch(baseDir, nickname, domain,
|
||||||
messageJson['object']['type'],
|
messageJson['object']['type'],
|
||||||
messageJson['object']['summary'],
|
messageJson['object']['summary'],
|
||||||
messageJson['object']['content']):
|
messageJson['object']['content']):
|
||||||
return True
|
return True
|
||||||
# check for bad html
|
|
||||||
invalidStrings = ('<script>', '</script>', '</canvas>',
|
if dangerousMarkup(messageJson['object']['content']):
|
||||||
'</style>', '</abbr>',
|
if messageJson['object'].get('id'):
|
||||||
'</html>', '</body>', '<br>', '<hr>')
|
print('REJECT ARBITRARY HTML: ' + messageJson['object']['id'])
|
||||||
for badStr in invalidStrings:
|
print('REJECT ARBITRARY HTML: bad string in post - ' +
|
||||||
if badStr in messageJson['object']['content']:
|
messageJson['object']['content'])
|
||||||
if messageJson['object'].get('id'):
|
return False
|
||||||
print('REJECT ARBITRARY HTML: ' + messageJson['object']['id'])
|
|
||||||
print('REJECT ARBITRARY HTML: bad string in post - ' +
|
|
||||||
messageJson['object']['content'])
|
|
||||||
return False
|
|
||||||
# check (rough) number of mentions
|
# check (rough) number of mentions
|
||||||
mentionsEst = estimateNumberOfMentions(messageJson['object']['content'])
|
mentionsEst = estimateNumberOfMentions(messageJson['object']['content'])
|
||||||
if mentionsEst > maxMentions:
|
if mentionsEst > maxMentions:
|
||||||
|
|
33
tests.py
33
tests.py
|
@ -64,6 +64,7 @@ from media import getAttachmentMediaType
|
||||||
from delete import sendDeleteViaServer
|
from delete import sendDeleteViaServer
|
||||||
from inbox import validInbox
|
from inbox import validInbox
|
||||||
from inbox import validInboxFilenames
|
from inbox import validInboxFilenames
|
||||||
|
from content import dangerousMarkup
|
||||||
from content import removeHtml
|
from content import removeHtml
|
||||||
from content import addWebLinks
|
from content import addWebLinks
|
||||||
from content import replaceEmojiFromTags
|
from content import replaceEmojiFromTags
|
||||||
|
@ -1882,8 +1883,40 @@ def testRemoveHtml():
|
||||||
assert(removeHtml(testStr) == 'This string has html.')
|
assert(removeHtml(testStr) == 'This string has html.')
|
||||||
|
|
||||||
|
|
||||||
|
def testDangerousMarkup():
|
||||||
|
print('testDangerousMarkup')
|
||||||
|
content = '<p>This is a valid message</p>'
|
||||||
|
assert(not dangerousMarkup(content))
|
||||||
|
content = 'This is a valid message without markup'
|
||||||
|
assert(not dangerousMarkup(content))
|
||||||
|
content = '<p>This is a valid-looking message. But wait... ' + \
|
||||||
|
'<script>document.getElementById("concentrated")' + \
|
||||||
|
'.innerHTML = "evil";</script></p>'
|
||||||
|
assert(dangerousMarkup(content))
|
||||||
|
content = '<p>This is a valid-looking message. But wait... ' + \
|
||||||
|
'<script src="https://evilsite/payload.js" /></p>'
|
||||||
|
assert(dangerousMarkup(content))
|
||||||
|
content = '<p>This message embeds an evil frame.' + \
|
||||||
|
'<iframe src="somesite"></iframe></p>'
|
||||||
|
assert(dangerousMarkup(content))
|
||||||
|
content = '<p>This message tries to obfuscate an evil frame.' + \
|
||||||
|
'< iframe src = "somesite"></ iframe ></p>'
|
||||||
|
assert(dangerousMarkup(content))
|
||||||
|
content = '<p>This message is not necessarily evil, but annoying.' + \
|
||||||
|
'<hr><br><br><br><br><br><br><br><hr><hr></p>'
|
||||||
|
assert(dangerousMarkup(content))
|
||||||
|
content = '<p>This message contans a ' + \
|
||||||
|
'<a href="https://validsite/index.html">valid link.</a></p>'
|
||||||
|
assert(not dangerousMarkup(content))
|
||||||
|
content = '<p>This message contans a ' + \
|
||||||
|
'<a href="https://validsite/iframe.html">' + \
|
||||||
|
'valid link having invalid but harmless name.</a></p>'
|
||||||
|
assert(not dangerousMarkup(content))
|
||||||
|
|
||||||
|
|
||||||
def runAllTests():
|
def runAllTests():
|
||||||
print('Running tests...')
|
print('Running tests...')
|
||||||
|
testDangerousMarkup()
|
||||||
testRemoveHtml()
|
testRemoveHtml()
|
||||||
testSiteIsActive()
|
testSiteIsActive()
|
||||||
testJsonld()
|
testJsonld()
|
||||||
|
|
Loading…
Reference in New Issue