Whenever saving an image verify that the binary looks like an image

This should help to head off any future nefariousness
main
Bob Mottram 2023-09-12 19:38:56 +01:00
parent 782cee6279
commit baf39c1518
5 changed files with 64 additions and 45 deletions

View File

@ -15,6 +15,7 @@ import email.parser
import urllib.parse
from shutil import copyfile
from dateutil.parser import parse
from utils import binary_is_image
from utils import get_content_from_post
from utils import get_full_domain
from utils import get_user_paths
@ -1648,6 +1649,14 @@ def save_media_in_form_post(media_bytes, debug: bool,
if not _valid_follows_csv(csv_str):
return None, None
# if this is an image then check that the binary looks like an image
image_extension_types = get_image_extensions()
if detected_extension in image_extension_types:
if not binary_is_image(filename, media_bytes[start_pos:]):
print('WARN: save_media_in_form_post ' +
'image binary not recognized ' + filename)
return None, None
try:
with open(filename, 'wb') as fp_media:
fp_media.write(media_bytes[start_pos:])
@ -2184,40 +2193,3 @@ def add_name_emojis_to_tags(base_dir: str, http_prefix: str,
if updated:
new_tag['updated'] = updated
actor_json['tag'].append(new_tag)
def binary_is_image(filename: str, media_binary) -> bool:
"""Returns true if the given file binary data contains an image
"""
if len(media_binary) < 13:
return False
filename_lower = filename.lower()
bin_is_image = False
if filename_lower.endswith('.jpeg') or filename_lower.endswith('jpg'):
if media_binary[6:10] in (b'JFIF', b'Exif'):
bin_is_image = True
elif filename_lower.endswith('.ico'):
if media_binary.startswith(b'\x00\x00\x01\x00'):
bin_is_image = True
elif filename_lower.endswith('.png'):
if media_binary.startswith(b'\211PNG\r\n\032\n'):
bin_is_image = True
elif filename_lower.endswith('.webp'):
if media_binary.startswith(b'RIFF') and media_binary[8:12] == b'WEBP':
bin_is_image = True
elif filename_lower.endswith('.gif'):
if media_binary[:6] in (b'GIF87a', b'GIF89a'):
bin_is_image = True
elif filename_lower.endswith('.avif'):
if media_binary[4:12] == b'ftypavif':
bin_is_image = True
elif filename_lower.endswith('.heic'):
if media_binary[4:12] == b'ftypmif1':
bin_is_image = True
elif filename_lower.endswith('.jxl'):
if media_binary.startswith(b'\xff\n'):
bin_is_image = True
elif filename_lower.endswith('.svg'):
if '<svg' in str(media_binary):
bin_is_image = True
return bin_is_image

View File

@ -378,10 +378,10 @@ from utils import dangerous_markup
from utils import refresh_newswire
from utils import is_image_file
from utils import has_group_type
from utils import binary_is_image
from manualapprove import manual_deny_follow_request_thread
from manualapprove import manual_approve_follow_request_thread
from announce import create_announce
from content import binary_is_image
from content import add_name_emojis_to_tags
from content import load_dogwhistles
from content import valid_url_lengths
@ -5217,6 +5217,9 @@ class PubServer(BaseHTTPRequestHandler):
media_filename = \
media_filename_base + '.' + \
get_image_extension_from_mime_type(self.headers['Content-type'])
if not binary_is_image(media_filename, media_bytes):
print('WARN: _receive_image image binary is not recognized ' +
media_filename)
try:
with open(media_filename, 'wb') as av_file:
av_file.write(media_bytes)

View File

@ -13,6 +13,7 @@ from utils import text_in_file
from utils import acct_dir
from utils import url_permitted
from utils import is_image_file
from utils import binary_is_image
from httpsig import create_signed_header
import json
from socket import error as SocketError
@ -791,11 +792,16 @@ def download_image(session, url: str, image_filename: str, debug: bool,
print('EX: download_image unable to delete ' +
image_filename)
else:
media_binary = result.content
if binary_is_image(image_filename, media_binary):
with open(image_filename, 'wb') as im_file:
im_file.write(result.content)
im_file.write(media_binary)
if debug:
print('Image downloaded from ' + url)
return True
else:
print('WARN: download_image binary not recognized ' +
image_filename)
except BaseException as ex:
print('EX: Failed to download image: ' +
str(url) + ' ' + str(ex))

View File

@ -4540,3 +4540,40 @@ def language_right_to_left(language: str) -> bool:
if language in rtl_languages:
return True
return False
def binary_is_image(filename: str, media_binary) -> bool:
"""Returns true if the given file binary data contains an image
"""
if len(media_binary) < 13:
return False
filename_lower = filename.lower()
bin_is_image = False
if filename_lower.endswith('.jpeg') or filename_lower.endswith('jpg'):
if media_binary[6:10] in (b'JFIF', b'Exif'):
bin_is_image = True
elif filename_lower.endswith('.ico'):
if media_binary.startswith(b'\x00\x00\x01\x00'):
bin_is_image = True
elif filename_lower.endswith('.png'):
if media_binary.startswith(b'\211PNG\r\n\032\n'):
bin_is_image = True
elif filename_lower.endswith('.webp'):
if media_binary.startswith(b'RIFF') and media_binary[8:12] == b'WEBP':
bin_is_image = True
elif filename_lower.endswith('.gif'):
if media_binary[:6] in (b'GIF87a', b'GIF89a'):
bin_is_image = True
elif filename_lower.endswith('.avif'):
if media_binary[4:12] == b'ftypavif':
bin_is_image = True
elif filename_lower.endswith('.heic'):
if media_binary[4:12] == b'ftypmif1':
bin_is_image = True
elif filename_lower.endswith('.jxl'):
if media_binary.startswith(b'\xff\n'):
bin_is_image = True
elif filename_lower.endswith('.svg'):
if '<svg' in str(media_binary):
bin_is_image = True
return bin_is_image

View File

@ -33,10 +33,10 @@ from utils import get_image_extensions
from utils import local_actor_url
from utils import text_in_file
from utils import remove_eol
from utils import binary_is_image
from filters import is_filtered
from cache import get_actor_public_key_from_id
from cache import store_person_in_cache
from content import binary_is_image
from content import add_html_tags
from content import replace_emoji_from_tags
from person import get_person_avatar_url
@ -409,7 +409,8 @@ def update_avatar_image_cache(signing_priv_key_pem: str,
return avatar_image_filename.replace(base_dir +
'/cache', '')
else:
print('WARN: avatar image binary not recognized ' +
print('WARN: update_avatar_image_cache ' +
'avatar image binary not recognized ' +
actor + ' ' + str(media_binary[0:20]))
except Exception as ex:
print('EX: Failed to download avatar image: ' +