From 8070d2a27614565fb27bbde72b5b727eca4752a2 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Tue, 8 Mar 2022 12:40:15 +0000 Subject: [PATCH 1/2] More robot strings --- crawlers.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/crawlers.py b/crawlers.py index 952f3ffdd..8bdffa1c2 100644 --- a/crawlers.py +++ b/crawlers.py @@ -122,7 +122,13 @@ def blocked_user_agent(calling_domain: str, agent_str: str, if agent_str: # is this a web crawler? If so then block it by default # unless this is a news instance or if it is in the allowed list - if 'bot/' in agent_str_lower or 'bot-' in agent_str_lower: + bot_strings = ('bot/', 'bot-', '/bot', '/robot') + contains_bot_string = False + for bot_str in bot_strings: + if bot_str in agent_str_lower: + contains_bot_string = True + break + if contains_bot_string: if agent_str_lower not in known_bots: known_bots.append(agent_str_lower) known_bots.sort() From 7ce38ceef8b7a15a02b54025055593d023cc23cd Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Tue, 8 Mar 2022 16:09:11 +0000 Subject: [PATCH 2/2] Also ignore youtube playlists --- webapp_media.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/webapp_media.py b/webapp_media.py index db642168b..d57585792 100644 --- a/webapp_media.py +++ b/webapp_media.py @@ -55,7 +55,7 @@ def _add_embedded_video_from_sites(translate: {}, content: str, url = content.split('"' + video_site)[1] if '"' in url: url = url.split('"')[0] - if '/channel/' not in url: + if '/channel/' not in url and '/playlist' not in url: url = url.replace('/watch?v=', '/embed/') if '&' in url: url = url.split('&')[0] @@ -74,7 +74,7 @@ def _add_embedded_video_from_sites(translate: {}, content: str, url = content.split('"' + video_site)[1] if '"' in url: url = url.split('"')[0] - if '/channel/' not in url: + if '/channel/' not in url and '/playlist' not in url: url = 'embed/' + url if '&' in url: url = url.split('&')[0]