From 40bf726eea2c2a017c7a86f0b13f6e6d0654d6f6 Mon Sep 17 00:00:00 2001
From: Bob Mottram
Date: Tue, 22 Mar 2022 18:22:09 +0000
Subject: [PATCH 1/5] Check html for spyware
---
tests.py | 9 ++++++++
utils.py | 65 ++++++++++++++++++++++++++++++++++++++++++++++----------
2 files changed, 63 insertions(+), 11 deletions(-)
diff --git a/tests.py b/tests.py
index c852060ff..d23461a08 100644
--- a/tests.py
+++ b/tests.py
@@ -3919,6 +3919,15 @@ def _test_danger_markup():
'
'
assert dangerous_markup(content, allow_local_network_access)
+ content = 'This is a valid-looking message. But it contains ' + \
+ 'spyware.
'
+ assert dangerous_markup(content, allow_local_network_access)
+
+ content = 'This is a valid-looking message. But it contains ' + \
+ 'spyware.
'
+ assert dangerous_markup(content, allow_local_network_access)
+
content = 'This message embeds an evil frame.' + \
'
'
assert dangerous_markup(content, allow_local_network_access)
diff --git a/utils.py b/utils.py
index e647e5680..bedd024c7 100644
--- a/utils.py
+++ b/utils.py
@@ -884,8 +884,8 @@ def is_local_network_address(ip_address: str) -> bool:
return False
-def _is_dangerous_string(content: str, allow_local_network_access: bool,
- separators: [], invalid_strings: []) -> bool:
+def _is_dangerous_string_tag(content: str, allow_local_network_access: bool,
+ separators: [], invalid_strings: []) -> bool:
"""Returns true if the given string is dangerous
"""
for separator_style in separators:
@@ -908,12 +908,48 @@ def _is_dangerous_string(content: str, allow_local_network_access: bool,
return True
if ' ' not in markup:
for bad_str in invalid_strings:
- if bad_str in markup:
- return True
+ if not bad_str.endswith('-'):
+ if bad_str in markup:
+ return True
+ else:
+ if markup.startswith(bad_str):
+ return True
else:
for bad_str in invalid_strings:
- if bad_str + ' ' in markup:
- return True
+ if not bad_str.endswith('-'):
+ if bad_str + ' ' in markup:
+ return True
+ else:
+ if markup.startswith(bad_str):
+ return True
+ return False
+
+
+def _is_dangerous_string_simple(content: str, allow_local_network_access: bool,
+ separators: [], invalid_strings: []) -> bool:
+ """Returns true if the given string is dangerous
+ """
+ for separator_style in separators:
+ start_char = separator_style[0]
+ end_char = separator_style[1]
+ if start_char not in content:
+ continue
+ if end_char not in content:
+ continue
+ content_sections = content.split(start_char)
+ invalid_partials = ()
+ if not allow_local_network_access:
+ invalid_partials = get_local_network_addresses()
+ for markup in content_sections:
+ if end_char not in markup:
+ continue
+ markup = markup.split(end_char)[0].strip()
+ for partial_match in invalid_partials:
+ if partial_match in markup:
+ return True
+ for bad_str in invalid_strings:
+ if bad_str in markup:
+ return True
return False
@@ -921,13 +957,20 @@ def dangerous_markup(content: str, allow_local_network_access: bool) -> bool:
"""Returns true if the given content contains dangerous html markup
"""
separators = [['<', '>'], ['<', '>']]
+ invalid_strings = [
+ 'analytics', 'ampproject', 'googleapis'
+ ]
+ if _is_dangerous_string_simple(content, allow_local_network_access,
+ separators, invalid_strings):
+ return True
invalid_strings = [
'script', 'noscript', 'code', 'pre',
'canvas', 'style', 'abbr',
'frame', 'iframe', 'html', 'body',
- 'hr', 'allow-popups', 'allow-scripts'
- ]
- return _is_dangerous_string(content, allow_local_network_access,
+ 'hr', 'allow-popups', 'allow-scripts',
+ 'amp-'
+ ]
+ return _is_dangerous_string_tag(content, allow_local_network_access,
separators, invalid_strings)
@@ -938,8 +981,8 @@ def dangerous_svg(content: str, allow_local_network_access: bool) -> bool:
invalid_strings = [
'script'
]
- return _is_dangerous_string(content, allow_local_network_access,
- separators, invalid_strings)
+ return _is_dangerous_string_tag(content, allow_local_network_access,
+ separators, invalid_strings)
def get_display_name(base_dir: str, actor: str, person_cache: {}) -> str:
From 1a1bfabc668345b808c11215ad863cf8d694c5a0 Mon Sep 17 00:00:00 2001
From: Bob Mottram
Date: Tue, 22 Mar 2022 18:25:42 +0000
Subject: [PATCH 2/5] Tidying
---
utils.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/utils.py b/utils.py
index bedd024c7..5158ae9b1 100644
--- a/utils.py
+++ b/utils.py
@@ -969,9 +969,9 @@ def dangerous_markup(content: str, allow_local_network_access: bool) -> bool:
'frame', 'iframe', 'html', 'body',
'hr', 'allow-popups', 'allow-scripts',
'amp-'
- ]
+ ]
return _is_dangerous_string_tag(content, allow_local_network_access,
- separators, invalid_strings)
+ separators, invalid_strings)
def dangerous_svg(content: str, allow_local_network_access: bool) -> bool:
From b3eedd85d73e5f29c954d00b3142a43fdf3e5bb5 Mon Sep 17 00:00:00 2001
From: Bob Mottram
Date: Wed, 23 Mar 2022 14:29:55 +0000
Subject: [PATCH 3/5] Another peertube site
---
webapp_media.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/webapp_media.py b/webapp_media.py
index d57585792..843dcd992 100644
--- a/webapp_media.py
+++ b/webapp_media.py
@@ -153,6 +153,7 @@ def _add_embedded_video_from_sites(translate: {}, content: str,
peertube_sites = (
'share.tube',
'visionon.tv',
+ 'anarchy.tube',
'peertube.fr',
'kolektiva.media',
'peertube.social',
From 44d386db86570601859e6b9dda66d6b5d8b78b6d Mon Sep 17 00:00:00 2001
From: Bob Mottram
Date: Wed, 23 Mar 2022 15:37:32 +0000
Subject: [PATCH 4/5] Another peertube site
---
webapp_media.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/webapp_media.py b/webapp_media.py
index 843dcd992..87cd44f25 100644
--- a/webapp_media.py
+++ b/webapp_media.py
@@ -155,6 +155,7 @@ def _add_embedded_video_from_sites(translate: {}, content: str,
'visionon.tv',
'anarchy.tube',
'peertube.fr',
+ 'video.nerdcave.site',
'kolektiva.media',
'peertube.social',
'videos.lescommuns.org'
From 95eed2183a8eb006ef786370e0118902ae486308 Mon Sep 17 00:00:00 2001
From: Bob Mottram
Date: Wed, 23 Mar 2022 22:24:49 +0000
Subject: [PATCH 5/5] Check for type
---
daemon.py | 18 +++++++++++-------
inbox.py | 2 ++
2 files changed, 13 insertions(+), 7 deletions(-)
diff --git a/daemon.py b/daemon.py
index 421456f21..1c3d22acb 100644
--- a/daemon.py
+++ b/daemon.py
@@ -6657,14 +6657,16 @@ class PubServer(BaseHTTPRequestHandler):
# this account is a bot
if fields.get('isBot'):
- if fields['isBot'] == 'on':
+ if fields['isBot'] == 'on' and \
+ actor_json.get('type'):
if actor_json['type'] != 'Service':
actor_json['type'] = 'Service'
actor_changed = True
else:
# this account is a group
if fields.get('isGroup'):
- if fields['isGroup'] == 'on':
+ if fields['isGroup'] == 'on' and \
+ actor_json.get('type'):
if actor_json['type'] != 'Group':
# only allow admin to create groups
if path.startswith('/users/' +
@@ -6673,9 +6675,10 @@ class PubServer(BaseHTTPRequestHandler):
actor_changed = True
else:
# this account is a person (default)
- if actor_json['type'] != 'Person':
- actor_json['type'] = 'Person'
- actor_changed = True
+ if actor_json.get('type'):
+ if actor_json['type'] != 'Person':
+ actor_json['type'] = 'Person'
+ actor_changed = True
# grayscale theme
if path.startswith('/users/' + admin_nickname + '/') or \
@@ -7700,8 +7703,9 @@ class PubServer(BaseHTTPRequestHandler):
moved_to = actor_json['movedTo']
if '"' in moved_to:
moved_to = moved_to.split('"')[1]
- if actor_json['type'] == 'Group':
- is_group = True
+ if actor_json.get('type'):
+ if actor_json['type'] == 'Group':
+ is_group = True
locked_account = get_locked_account(actor_json)
donate_url = get_donation_url(actor_json)
website_url = get_website(actor_json, self.server.translate)
diff --git a/inbox.py b/inbox.py
index 7006a156d..2aeb4df21 100644
--- a/inbox.py
+++ b/inbox.py
@@ -2690,6 +2690,8 @@ def _group_handle(base_dir: str, handle: str) -> bool:
actor_json = load_json(actor_file)
if not actor_json:
return False
+ if not actor_json.get('type'):
+ return False
return actor_json['type'] == 'Group'