mirror of https://gitlab.com/bashrc2/epicyon
Allow pre tag through dangerous markup filter in some cases, then remove it when rendering
parent
8efb5bedd4
commit
4caa930f67
|
@ -267,7 +267,7 @@ def dangerous_css(filename: str, allow_local_network_access: bool) -> bool:
|
|||
|
||||
# an attacker can include html inside of the css
|
||||
# file as a comment and this may then be run from the html
|
||||
if dangerous_markup(content, allow_local_network_access):
|
||||
if dangerous_markup(content, allow_local_network_access, []):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
|
|
@ -5544,7 +5544,7 @@ class PubServer(BaseHTTPRequestHandler):
|
|||
if fields.get('editedAbout'):
|
||||
about_str = fields['editedAbout']
|
||||
if not dangerous_markup(about_str,
|
||||
allow_local_network_access):
|
||||
allow_local_network_access, []):
|
||||
try:
|
||||
with open(about_filename, 'w+',
|
||||
encoding='utf-8') as aboutfile:
|
||||
|
@ -5563,7 +5563,7 @@ class PubServer(BaseHTTPRequestHandler):
|
|||
if fields.get('editedTOS'):
|
||||
tos_str = fields['editedTOS']
|
||||
if not dangerous_markup(tos_str,
|
||||
allow_local_network_access):
|
||||
allow_local_network_access, []):
|
||||
try:
|
||||
with open(tos_filename, 'w+',
|
||||
encoding='utf-8') as tosfile:
|
||||
|
|
4
inbox.py
4
inbox.py
|
@ -1360,7 +1360,7 @@ def _valid_post_content(base_dir: str, nickname: str, domain: str,
|
|||
if summary != valid_content_warning(summary):
|
||||
print('WARN: invalid content warning ' + summary)
|
||||
return False
|
||||
if dangerous_markup(summary, allow_local_network_access):
|
||||
if dangerous_markup(summary, allow_local_network_access, []):
|
||||
if message_json['object'].get('id'):
|
||||
print('REJECT ARBITRARY HTML: ' + message_json['object']['id'])
|
||||
print('REJECT ARBITRARY HTML: bad string in summary - ' +
|
||||
|
@ -1384,7 +1384,7 @@ def _valid_post_content(base_dir: str, nickname: str, domain: str,
|
|||
return False
|
||||
|
||||
content_str = get_base_content_from_post(message_json, system_language)
|
||||
if dangerous_markup(content_str, allow_local_network_access):
|
||||
if dangerous_markup(content_str, allow_local_network_access, ['pre']):
|
||||
if message_json['object'].get('id'):
|
||||
print('REJECT ARBITRARY HTML: ' + message_json['object']['id'])
|
||||
if debug:
|
||||
|
|
|
@ -601,8 +601,8 @@ def _convert_rss_to_activitypub(base_dir: str, http_prefix: str,
|
|||
|
||||
rss_title = _remove_control_characters(item[0])
|
||||
url = item[1]
|
||||
if dangerous_markup(url, allow_local_network_access) or \
|
||||
dangerous_markup(rss_title, allow_local_network_access):
|
||||
if dangerous_markup(url, allow_local_network_access, []) or \
|
||||
dangerous_markup(rss_title, allow_local_network_access, []):
|
||||
continue
|
||||
rss_description = ''
|
||||
|
||||
|
|
|
@ -303,7 +303,7 @@ def post_message_to_outbox(session, translate: {},
|
|||
system_language, translate,
|
||||
'nowplaying', 'NowPlaying')
|
||||
|
||||
if dangerous_markup(content_str, allow_local_network_access):
|
||||
if dangerous_markup(content_str, allow_local_network_access, []):
|
||||
print('POST to outbox contains dangerous markup: ' +
|
||||
str(message_json))
|
||||
return False
|
||||
|
|
4
posts.py
4
posts.py
|
@ -413,7 +413,7 @@ def get_person_box(signing_priv_key_pem: str, origin_domain: str,
|
|||
display_name = None
|
||||
if person_json.get('name'):
|
||||
display_name = person_json['name']
|
||||
if dangerous_markup(person_json['name'], False):
|
||||
if dangerous_markup(person_json['name'], False, []):
|
||||
display_name = '*ADVERSARY*'
|
||||
elif is_filtered(base_dir,
|
||||
nickname, domain,
|
||||
|
@ -5549,7 +5549,7 @@ def download_announce(session, base_dir: str, http_prefix: str,
|
|||
if announced_json['contentMap'].get(system_language):
|
||||
content_str = announced_json['contentMap'][system_language]
|
||||
using_content_map = True
|
||||
if dangerous_markup(content_str, allow_local_network_access):
|
||||
if dangerous_markup(content_str, allow_local_network_access, []):
|
||||
print('WARN: announced post contains dangerous markup ' +
|
||||
str(announced_json))
|
||||
_reject_announce(announce_filename,
|
||||
|
|
|
@ -229,6 +229,7 @@ def dangerous_question(question_json: {},
|
|||
question_options = question_json['object']['oneOf']
|
||||
for option in question_options:
|
||||
if option.get('name'):
|
||||
if dangerous_markup(option['name'], allow_local_network_access):
|
||||
if dangerous_markup(option['name'],
|
||||
allow_local_network_access, []):
|
||||
return True
|
||||
return False
|
||||
|
|
65
tests.py
65
tests.py
|
@ -55,6 +55,7 @@ from follow import clear_followers
|
|||
from follow import send_follow_request_via_server
|
||||
from follow import send_unfollow_request_via_server
|
||||
from siteactive import site_is_active
|
||||
from utils import remove_markup_tag
|
||||
from utils import remove_style_within_html
|
||||
from utils import html_tag_has_closing
|
||||
from utils import remove_inverted_text
|
||||
|
@ -4189,75 +4190,75 @@ def _test_danger_markup():
|
|||
print('test_dangerous_markup')
|
||||
allow_local_network_access = False
|
||||
content = '<p>This is a valid message</p>'
|
||||
assert not dangerous_markup(content, allow_local_network_access)
|
||||
assert not dangerous_markup(content, allow_local_network_access, [])
|
||||
|
||||
content = 'This is a valid message without markup'
|
||||
assert not dangerous_markup(content, allow_local_network_access)
|
||||
assert not dangerous_markup(content, allow_local_network_access, [])
|
||||
|
||||
content = '<p>This is a valid-looking message. But wait... ' + \
|
||||
'<script>document.getElementById("concentrated")' + \
|
||||
'.innerHTML = "evil";</script></p>'
|
||||
assert dangerous_markup(content, allow_local_network_access)
|
||||
assert dangerous_markup(content, allow_local_network_access, [])
|
||||
|
||||
content = '<p>This is a valid-looking message. But wait... ' + \
|
||||
'<script>document.getElementById("concentrated")' + \
|
||||
'.innerHTML = "evil";</script></p>'
|
||||
assert dangerous_markup(content, allow_local_network_access)
|
||||
assert dangerous_markup(content, allow_local_network_access, [])
|
||||
|
||||
content = '<p>This html contains more than you expected... ' + \
|
||||
'<script language="javascript">document.getElementById("abc")' + \
|
||||
'.innerHTML = "def";</script></p>'
|
||||
assert dangerous_markup(content, allow_local_network_access)
|
||||
assert dangerous_markup(content, allow_local_network_access, [])
|
||||
|
||||
content = '<p>This html contains more than you expected... ' + \
|
||||
'<?php $server_output = curl_exec($ch); ?></p>'
|
||||
assert dangerous_markup(content, allow_local_network_access)
|
||||
assert dangerous_markup(content, allow_local_network_access, [])
|
||||
|
||||
content = '<p>This is a valid-looking message. But wait... ' + \
|
||||
'<script src="https://evilsite/payload.js" /></p>'
|
||||
assert dangerous_markup(content, allow_local_network_access)
|
||||
assert dangerous_markup(content, allow_local_network_access, [])
|
||||
|
||||
content = '<p>This is a valid-looking message. But it contains ' + \
|
||||
'spyware. <amp-analytics type="gtag" ' + \
|
||||
'data-credentials="include"></amp-analytics></p>'
|
||||
assert dangerous_markup(content, allow_local_network_access)
|
||||
assert dangerous_markup(content, allow_local_network_access, [])
|
||||
|
||||
content = '<p>This is a valid-looking message. But it contains ' + \
|
||||
'<a href="something.googleapis.com/anotherthing">spyware.</a></p>'
|
||||
assert dangerous_markup(content, allow_local_network_access)
|
||||
assert dangerous_markup(content, allow_local_network_access, [])
|
||||
|
||||
content = '<p>This message embeds an evil frame.' + \
|
||||
'<iframe src="somesite"></iframe></p>'
|
||||
assert dangerous_markup(content, allow_local_network_access)
|
||||
assert dangerous_markup(content, allow_local_network_access, [])
|
||||
|
||||
content = '<p>This message tries to obfuscate an evil frame.' + \
|
||||
'< iframe src = "somesite"></ iframe ></p>'
|
||||
assert dangerous_markup(content, allow_local_network_access)
|
||||
assert dangerous_markup(content, allow_local_network_access, [])
|
||||
|
||||
content = '<p>This message is not necessarily evil, but annoying.' + \
|
||||
'<hr><br><br><br><br><br><br><br><hr><hr></p>'
|
||||
assert dangerous_markup(content, allow_local_network_access)
|
||||
assert dangerous_markup(content, allow_local_network_access, [])
|
||||
|
||||
content = '<p>This message contans a ' + \
|
||||
'<a href="https://validsite/index.html">valid link.</a></p>'
|
||||
assert not dangerous_markup(content, allow_local_network_access)
|
||||
assert not dangerous_markup(content, allow_local_network_access, [])
|
||||
|
||||
content = '<p>This message contans a ' + \
|
||||
'<a href="https://validsite/iframe.html">' + \
|
||||
'valid link having invalid but harmless name.</a></p>'
|
||||
assert not dangerous_markup(content, allow_local_network_access)
|
||||
assert not dangerous_markup(content, allow_local_network_access, [])
|
||||
|
||||
content = '<p>This message which <a href="127.0.0.1:8736">' + \
|
||||
'tries to access the local network</a></p>'
|
||||
assert dangerous_markup(content, allow_local_network_access)
|
||||
assert dangerous_markup(content, allow_local_network_access, [])
|
||||
|
||||
content = '<p>This message which <a href="http://192.168.5.10:7235">' + \
|
||||
'tries to access the local network</a></p>'
|
||||
assert dangerous_markup(content, allow_local_network_access)
|
||||
assert dangerous_markup(content, allow_local_network_access, [])
|
||||
|
||||
content = '<p>127.0.0.1 This message which does not access ' + \
|
||||
'the local network</a></p>'
|
||||
assert not dangerous_markup(content, allow_local_network_access)
|
||||
assert not dangerous_markup(content, allow_local_network_access, [])
|
||||
|
||||
|
||||
def _run_html_replace_quote_marks():
|
||||
|
@ -7983,6 +7984,35 @@ def _test_featured_tags() -> None:
|
|||
assert result == featured_tags
|
||||
|
||||
|
||||
def _test_remove_tag() -> None:
|
||||
print('remove_tag')
|
||||
test_html = 'This is a test'
|
||||
result = remove_markup_tag(test_html, 'pre')
|
||||
assert result == test_html
|
||||
|
||||
test_html = '<pre>This is a test</pre>'
|
||||
result = remove_markup_tag(test_html, 'pre')
|
||||
if result != 'This is a test':
|
||||
print('expected: This is a test')
|
||||
print('result: ' + result)
|
||||
assert result == 'This is a test'
|
||||
|
||||
test_html = 'Previous <pre>this is a test</pre>'
|
||||
result = remove_markup_tag(test_html, 'pre')
|
||||
if result != 'Previous this is a test':
|
||||
print('expected: Previous this is a test')
|
||||
print('result: ' + result)
|
||||
assert result == 'Previous this is a test'
|
||||
|
||||
test_html = '<pre>This is a test</pre><br>' + \
|
||||
'something<br><pre>again</pre>'
|
||||
result = remove_markup_tag(test_html, 'pre')
|
||||
if result != 'This is a test<br>something<br>again':
|
||||
print('expected: This is a test<br>something<br>again')
|
||||
print('result: ' + result)
|
||||
assert result == 'This is a test<br>something<br>again'
|
||||
|
||||
|
||||
def run_all_tests():
|
||||
base_dir = os.getcwd()
|
||||
print('Running tests...')
|
||||
|
@ -8000,6 +8030,7 @@ def run_all_tests():
|
|||
_test_checkbox_names()
|
||||
_test_thread_functions()
|
||||
_test_functions()
|
||||
_test_remove_tag()
|
||||
_test_featured_tags()
|
||||
_test_xor_hashes()
|
||||
_test_convert_markdown()
|
||||
|
|
55
utils.py
55
utils.py
|
@ -192,6 +192,38 @@ def has_object_dict(post_json_object: {}) -> bool:
|
|||
return False
|
||||
|
||||
|
||||
def remove_markup_tag(html: str, tag: str) -> str:
|
||||
"""Remove the given tag from the given html markup
|
||||
"""
|
||||
if '<' + tag not in html:
|
||||
return html
|
||||
|
||||
section = html.split('<' + tag)
|
||||
result = ''
|
||||
for text in section:
|
||||
if not result:
|
||||
if html.startswith('<' + tag) and '>' in text:
|
||||
result = text.split('>', 1)[1]
|
||||
else:
|
||||
result = text
|
||||
continue
|
||||
result += text.split('>', 1)[1]
|
||||
|
||||
html = result
|
||||
section = html.split('</' + tag)
|
||||
result = ''
|
||||
for text in section:
|
||||
if not result:
|
||||
if html.startswith('</' + tag) and '>' in text:
|
||||
result = text.split('>', 1)[1]
|
||||
else:
|
||||
result = text
|
||||
continue
|
||||
result += text.split('>', 1)[1]
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def get_content_from_post(post_json_object: {}, system_language: str,
|
||||
languages_understood: [],
|
||||
content_type: str = "content") -> str:
|
||||
|
@ -213,6 +245,7 @@ def get_content_from_post(post_json_object: {}, system_language: str,
|
|||
sys_lang = this_post_json[map_dict][system_language]
|
||||
if isinstance(sys_lang, str):
|
||||
content = this_post_json[map_dict][system_language]
|
||||
content = remove_markup_tag(content, 'pre')
|
||||
return standardize_text(content)
|
||||
else:
|
||||
# is there a contentMap/summaryMap entry for one of
|
||||
|
@ -220,10 +253,12 @@ def get_content_from_post(post_json_object: {}, system_language: str,
|
|||
for lang in languages_understood:
|
||||
if this_post_json[map_dict].get(lang):
|
||||
content = this_post_json[map_dict][lang]
|
||||
content = remove_markup_tag(content, 'pre')
|
||||
return standardize_text(content)
|
||||
else:
|
||||
if isinstance(this_post_json[content_type], str):
|
||||
content = this_post_json[content_type]
|
||||
content = remove_markup_tag(content, 'pre')
|
||||
return standardize_text(content)
|
||||
|
||||
|
||||
|
@ -1182,7 +1217,8 @@ def html_tag_has_closing(tag_name: str, content: str) -> bool:
|
|||
return True
|
||||
|
||||
|
||||
def dangerous_markup(content: str, allow_local_network_access: bool) -> bool:
|
||||
def dangerous_markup(content: str, allow_local_network_access: bool,
|
||||
allow_tags: []) -> bool:
|
||||
"""Returns true if the given content contains dangerous html markup
|
||||
"""
|
||||
separators = [['<', '>'], ['<', '>']]
|
||||
|
@ -1198,8 +1234,11 @@ def dangerous_markup(content: str, allow_local_network_access: bool) -> bool:
|
|||
invalid_strings = [
|
||||
'script', 'noscript', 'canvas', 'style', 'abbr', 'input',
|
||||
'frame', 'iframe', 'html', 'body', 'hr', 'allow-popups',
|
||||
'allow-scripts', 'amp-', '?php'
|
||||
'allow-scripts', 'amp-', '?php', 'pre'
|
||||
]
|
||||
for allowed in allow_tags:
|
||||
if allowed in invalid_strings:
|
||||
invalid_strings.remove(allowed)
|
||||
return _is_dangerous_string_tag(content, allow_local_network_access,
|
||||
separators, invalid_strings)
|
||||
|
||||
|
@ -1236,7 +1275,7 @@ def get_display_name(base_dir: str, actor: str, person_cache: {}) -> str:
|
|||
if actor_json.get('name'):
|
||||
name_found = actor_json['name']
|
||||
if name_found:
|
||||
if dangerous_markup(name_found, False):
|
||||
if dangerous_markup(name_found, False, []):
|
||||
name_found = "*ADVERSARY*"
|
||||
return standardize_text(name_found)
|
||||
|
||||
|
@ -4333,19 +4372,25 @@ def harmless_markup(post_json_object: {}) -> None:
|
|||
for field_name in ('content', 'summary'):
|
||||
if post_json_object['object'].get(field_name):
|
||||
if dangerous_markup(post_json_object['object'][field_name],
|
||||
False):
|
||||
False, ['pre']):
|
||||
post_json_object['object'][field_name] = \
|
||||
remove_html(post_json_object['object'][field_name])
|
||||
post_json_object['object'][field_name] = \
|
||||
remove_markup_tag(post_json_object['object'][field_name],
|
||||
'pre')
|
||||
map_name = field_name + 'Map'
|
||||
if post_json_object['object'].get(map_name):
|
||||
map_dict = post_json_object['object'][map_name].items()
|
||||
for lang, content in map_dict:
|
||||
if not isinstance(content, str):
|
||||
continue
|
||||
if dangerous_markup(content, False):
|
||||
if dangerous_markup(content, False, ['pre']):
|
||||
content = remove_html(content)
|
||||
post_json_object['object'][map_name][lang] = \
|
||||
content
|
||||
content = post_json_object['object'][map_name][lang]
|
||||
post_json_object['object'][map_name][lang] = \
|
||||
remove_markup_tag(content, 'pre')
|
||||
|
||||
|
||||
def ap_proxy_type(json_object: {}) -> str:
|
||||
|
|
|
@ -92,7 +92,7 @@ def _get_help_for_timeline(base_dir: str, box_name: str) -> str:
|
|||
instance_title = 'Epicyon'
|
||||
with open(help_filename, 'r', encoding='utf-8') as help_file:
|
||||
help_text = help_file.read()
|
||||
if dangerous_markup(help_text, False):
|
||||
if dangerous_markup(help_text, False, []):
|
||||
return ''
|
||||
help_text = help_text.replace('INSTANCE', instance_title)
|
||||
return '<div class="container">\n' + \
|
||||
|
|
|
@ -1269,7 +1269,7 @@ def get_post_attachments_as_html(base_dir: str,
|
|||
continue
|
||||
media_license = ''
|
||||
if attach.get('schema:license'):
|
||||
if not dangerous_markup(attach['schema:license'], False):
|
||||
if not dangerous_markup(attach['schema:license'], False, []):
|
||||
if not is_filtered(base_dir, nickname, domain,
|
||||
attach['schema:license'],
|
||||
system_language):
|
||||
|
@ -1279,7 +1279,7 @@ def get_post_attachments_as_html(base_dir: str,
|
|||
else:
|
||||
media_license = attach['schema:license']
|
||||
elif attach.get('license'):
|
||||
if not dangerous_markup(attach['license'], False):
|
||||
if not dangerous_markup(attach['license'], False, []):
|
||||
if not is_filtered(base_dir, nickname, domain,
|
||||
attach['license'],
|
||||
system_language):
|
||||
|
@ -1291,7 +1291,7 @@ def get_post_attachments_as_html(base_dir: str,
|
|||
media_creator = ''
|
||||
if attach.get('schema:creator'):
|
||||
if len(attach['schema:creator']) < 120:
|
||||
if not dangerous_markup(attach['schema:creator'], False):
|
||||
if not dangerous_markup(attach['schema:creator'], False, []):
|
||||
if not is_filtered(base_dir, nickname, domain,
|
||||
attach['schema:creator'],
|
||||
system_language):
|
||||
|
@ -1300,7 +1300,7 @@ def get_post_attachments_as_html(base_dir: str,
|
|||
if isinstance(attach['attribution'], list):
|
||||
if len(attach['attribution']) > 0:
|
||||
attrib_str = attach['attribution'][0]
|
||||
if not dangerous_markup(attrib_str, False):
|
||||
if not dangerous_markup(attrib_str, False, []):
|
||||
if not is_filtered(base_dir, nickname, domain,
|
||||
attrib_str, system_language):
|
||||
media_creator = attrib_str
|
||||
|
|
Loading…
Reference in New Issue