Log svg scripts for subsequent review

merge-requests/30/head
Bob Mottram 2022-05-26 13:17:56 +01:00
parent 88fe018f1b
commit 5852de9746
3 changed files with 23 additions and 5 deletions

View File

@ -1683,7 +1683,8 @@ def create_edits_html(edits_json: {}, post_json_object: {},
edits_str + '</details>'
def remove_script(content: str) -> str:
def remove_script(content: str, log_filename: str,
actor: str, url: str) -> str:
"""Removes <script> from some content
"""
separators = [['<', '>'], ['&lt;', '&gt;']]
@ -1704,5 +1705,16 @@ def remove_script(content: str) -> str:
text = prefix + text.split(ending)[0] + ending
else:
text = prefix + text.split('/' + sep[1])[0] + '/' + sep[1]
if log_filename and actor:
# write the detected script to a log file
log_str = actor + ' ' + url + ' ' + text + '\n'
writeType = 'a+'
if os.path.isfile(log_filename):
writeType = 'w+'
try:
with open(log_filename, writeType) as fp_log:
fp_log.write(log_str)
except OSError:
print('EX: cannot append to svg script log')
content = content.replace(text, '')
return content

View File

@ -150,6 +150,10 @@ def _cache_svg_images(session, base_dir: str, http_prefix: str,
return False
cached = False
post_id = remove_id_ending(obj['id']).replace('/', '--')
actor = 'unknown'
if obj.get('attributedTo'):
actor = obj['attributedTo']
log_filename = base_dir + '/accounts/svg_scripts_log.txt'
for index in range(len(obj['attachment'])):
attach = obj['attachment'][index]
if not attach.get('mediaType'):
@ -169,7 +173,7 @@ def _cache_svg_images(session, base_dir: str, http_prefix: str,
continue
if '://' + i2p_domain in url:
continue
if '/' in filename:
if '/' in url:
filename = url.split('/')[-1]
else:
filename = url
@ -186,8 +190,10 @@ def _cache_svg_images(session, base_dir: str, http_prefix: str,
print('EX: unable to read svg file data')
if image_data:
image_data = image_data.decode()
cleaned_up = remove_script(image_data)
cleaned_up = \
remove_script(image_data, log_filename, actor, url)
if cleaned_up != image_data:
# write the cleaned up svg image
svg_written = False
cleaned_up = cleaned_up.encode('utf-8')
try:

View File

@ -3979,7 +3979,7 @@ def _test_danger_svg(base_dir: str) -> None:
' <circle cx="5" cy="5" r="4" />' + \
'</svg>'
assert not dangerous_svg(svg_content, False)
cleaned_up = remove_script(svg_content)
cleaned_up = remove_script(svg_content, None, None, None)
assert cleaned_up == svg_content
svg_content = \
' <svg viewBox="0 0 10 10" xmlns="http://www.w3.org/2000/svg">' + \
@ -4007,7 +4007,7 @@ def _test_danger_svg(base_dir: str) -> None:
' <circle cx="5" cy="5" r="4" />' + \
'</svg>'
cleaned_up = remove_script(svg_content)
cleaned_up = remove_script(svg_content, None, None, None)
assert '<script' not in cleaned_up
assert '/script>' not in cleaned_up
if cleaned_up != svg_clean: