Check html for spyware

main
Bob Mottram 2022-03-22 18:22:09 +00:00
parent f5a7cbad6e
commit 40bf726eea
2 changed files with 63 additions and 11 deletions

View File

@ -3919,6 +3919,15 @@ def _test_danger_markup():
'<script src="https://evilsite/payload.js" /></p>'
assert dangerous_markup(content, allow_local_network_access)
content = '<p>This is a valid-looking message. But it contains ' + \
'spyware. <amp-analytics type="gtag" ' + \
'data-credentials="include"></amp-analytics></p>'
assert dangerous_markup(content, allow_local_network_access)
content = '<p>This is a valid-looking message. But it contains ' + \
'<a href="something.googleapis.com/anotherthing">spyware.</a></p>'
assert dangerous_markup(content, allow_local_network_access)
content = '<p>This message embeds an evil frame.' + \
'<iframe src="somesite"></iframe></p>'
assert dangerous_markup(content, allow_local_network_access)

View File

@ -884,8 +884,8 @@ def is_local_network_address(ip_address: str) -> bool:
return False
def _is_dangerous_string(content: str, allow_local_network_access: bool,
separators: [], invalid_strings: []) -> bool:
def _is_dangerous_string_tag(content: str, allow_local_network_access: bool,
separators: [], invalid_strings: []) -> bool:
"""Returns true if the given string is dangerous
"""
for separator_style in separators:
@ -908,12 +908,48 @@ def _is_dangerous_string(content: str, allow_local_network_access: bool,
return True
if ' ' not in markup:
for bad_str in invalid_strings:
if bad_str in markup:
return True
if not bad_str.endswith('-'):
if bad_str in markup:
return True
else:
if markup.startswith(bad_str):
return True
else:
for bad_str in invalid_strings:
if bad_str + ' ' in markup:
return True
if not bad_str.endswith('-'):
if bad_str + ' ' in markup:
return True
else:
if markup.startswith(bad_str):
return True
return False
def _is_dangerous_string_simple(content: str, allow_local_network_access: bool,
separators: [], invalid_strings: []) -> bool:
"""Returns true if the given string is dangerous
"""
for separator_style in separators:
start_char = separator_style[0]
end_char = separator_style[1]
if start_char not in content:
continue
if end_char not in content:
continue
content_sections = content.split(start_char)
invalid_partials = ()
if not allow_local_network_access:
invalid_partials = get_local_network_addresses()
for markup in content_sections:
if end_char not in markup:
continue
markup = markup.split(end_char)[0].strip()
for partial_match in invalid_partials:
if partial_match in markup:
return True
for bad_str in invalid_strings:
if bad_str in markup:
return True
return False
@ -921,13 +957,20 @@ def dangerous_markup(content: str, allow_local_network_access: bool) -> bool:
"""Returns true if the given content contains dangerous html markup
"""
separators = [['<', '>'], ['&lt;', '&gt;']]
invalid_strings = [
'analytics', 'ampproject', 'googleapis'
]
if _is_dangerous_string_simple(content, allow_local_network_access,
separators, invalid_strings):
return True
invalid_strings = [
'script', 'noscript', 'code', 'pre',
'canvas', 'style', 'abbr',
'frame', 'iframe', 'html', 'body',
'hr', 'allow-popups', 'allow-scripts'
]
return _is_dangerous_string(content, allow_local_network_access,
'hr', 'allow-popups', 'allow-scripts',
'amp-'
]
return _is_dangerous_string_tag(content, allow_local_network_access,
separators, invalid_strings)
@ -938,8 +981,8 @@ def dangerous_svg(content: str, allow_local_network_access: bool) -> bool:
invalid_strings = [
'script'
]
return _is_dangerous_string(content, allow_local_network_access,
separators, invalid_strings)
return _is_dangerous_string_tag(content, allow_local_network_access,
separators, invalid_strings)
def get_display_name(base_dir: str, actor: str, person_cache: {}) -> str: