From 3f653dc90b4044c6fad1f8eeefcc3e0ca95f051d Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 20 Jun 2021 15:16:04 +0100 Subject: [PATCH 01/13] Check User-Agent during POST --- daemon.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/daemon.py b/daemon.py index 3c76d90a3..fcd2fc30b 100644 --- a/daemon.py +++ b/daemon.py @@ -14130,6 +14130,10 @@ class PubServer(BaseHTTPRequestHandler): self._400() return + if self._blockedUserAgent(): + self._400() + return + self.server.POSTbusy = True if not self.headers.get('Content-type'): print('Content-type header missing') From 4decc56a37f90d64e39de8216a056219c2dd040a Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 20 Jun 2021 15:23:20 +0100 Subject: [PATCH 02/13] Check user agent domain against calling domain --- daemon.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/daemon.py b/daemon.py index fcd2fc30b..018d00950 100644 --- a/daemon.py +++ b/daemon.py @@ -473,17 +473,19 @@ class PubServer(BaseHTTPRequestHandler): return None return agentDomain - def _blockedUserAgent(self) -> bool: + def _blockedUserAgent(self, callingDomain: str) -> bool: """Should a GET or POST be blocked based upon its user agent? """ agentDomain = self._userAgentDomain() + blockedUA = False if not agentDomain: if self.server.userAgentDomainRequired: return True - return False - blockedUA = isBlockedDomain(self.server.baseDir, agentDomain) - if blockedUA and self.server.debug: - print('Blocked User agent: ' + agentDomain) + return blockedUA + if not agentDomain.startswith(callingDomain): + blockedUA = isBlockedDomain(self.server.baseDir, agentDomain) + if blockedUA and self.server.debug: + print('Blocked User agent: ' + agentDomain) return blockedUA def _requestHTTP(self) -> bool: @@ -10628,7 +10630,7 @@ class PubServer(BaseHTTPRequestHandler): self._400() return - if self._blockedUserAgent(): + if self._blockedUserAgent(callingDomain): self._400() return @@ -14130,7 +14132,7 @@ class PubServer(BaseHTTPRequestHandler): self._400() return - if self._blockedUserAgent(): + if self._blockedUserAgent(callingDomain): self._400() return From 1b125711e5c2263e6f0dca2d7ca5d10bb8e90339 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 20 Jun 2021 15:45:39 +0100 Subject: [PATCH 03/13] Show user-agent domain --- daemon.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/daemon.py b/daemon.py index 018d00950..3f8445528 100644 --- a/daemon.py +++ b/daemon.py @@ -465,12 +465,15 @@ class PubServer(BaseHTTPRequestHandler): agentDomain = agentDomain.split('://')[1] if '/' in agentDomain: agentDomain = agentDomain.split('/')[0] + if ')' in agentDomain: + agentDomain = agentDomain.split('/')[0].strip() if ' ' in agentDomain: agentDomain = agentDomain.replace(' ', '') if ';' in agentDomain: agentDomain = agentDomain.replace(';', '') if '.' not in agentDomain: return None + print('User-Agent Domain: ' + agentDomain) return agentDomain def _blockedUserAgent(self, callingDomain: str) -> bool: From 072b690f31974ccab1c7868f6989fdd4f26434a1 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 20 Jun 2021 15:51:03 +0100 Subject: [PATCH 04/13] User-Agent domain detection --- daemon.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/daemon.py b/daemon.py index 3f8445528..78f98b9f1 100644 --- a/daemon.py +++ b/daemon.py @@ -458,15 +458,15 @@ class PubServer(BaseHTTPRequestHandler): if not self.headers.get('User-Agent'): return None agentStr = self.headers.get('User-Agent') - if '+' not in agentStr: + if '+http' not in agentStr: return None - agentDomain = agentStr.split('+')[1].strip() + agentDomain = agentStr.split('+http')[1].strip() if '://' in agentDomain: agentDomain = agentDomain.split('://')[1] if '/' in agentDomain: agentDomain = agentDomain.split('/')[0] if ')' in agentDomain: - agentDomain = agentDomain.split('/')[0].strip() + agentDomain = agentDomain.split(')')[0].strip() if ' ' in agentDomain: agentDomain = agentDomain.replace(' ', '') if ';' in agentDomain: From 746cc362e623f3e396483c6afcb278868503e89f Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 20 Jun 2021 16:00:07 +0100 Subject: [PATCH 05/13] Show user agent domain in debug --- daemon.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/daemon.py b/daemon.py index 78f98b9f1..f2719e072 100644 --- a/daemon.py +++ b/daemon.py @@ -473,7 +473,8 @@ class PubServer(BaseHTTPRequestHandler): agentDomain = agentDomain.replace(';', '') if '.' not in agentDomain: return None - print('User-Agent Domain: ' + agentDomain) + if self.server.debug: + print('User-Agent Domain: ' + agentDomain) return agentDomain def _blockedUserAgent(self, callingDomain: str) -> bool: From af556dc134ff049c5c4231d599e4297bc0eede03 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 20 Jun 2021 16:28:23 +0100 Subject: [PATCH 06/13] Show blocked user agents --- daemon.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/daemon.py b/daemon.py index f2719e072..6732dacab 100644 --- a/daemon.py +++ b/daemon.py @@ -488,7 +488,8 @@ class PubServer(BaseHTTPRequestHandler): return blockedUA if not agentDomain.startswith(callingDomain): blockedUA = isBlockedDomain(self.server.baseDir, agentDomain) - if blockedUA and self.server.debug: + # if self.server.debug: + if blockedUA: print('Blocked User agent: ' + agentDomain) return blockedUA From 0068b2b8cd317d086365215f8e74ea7cf83ee3ff Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 20 Jun 2021 16:45:29 +0100 Subject: [PATCH 07/13] Unit test for user agent domain --- daemon.py | 31 +++++-------------------------- tests.py | 14 +++++++++++++- utils.py | 24 ++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 27 deletions(-) diff --git a/daemon.py b/daemon.py index 6732dacab..221a5c13c 100644 --- a/daemon.py +++ b/daemon.py @@ -207,6 +207,7 @@ from shares import addShare from shares import removeShare from shares import expireShares from categories import setHashtagCategory +from utils import userAgentDomain from utils import isLocalNetworkAddress from utils import permittedDir from utils import isAccountDir @@ -452,35 +453,13 @@ class PubServer(BaseHTTPRequestHandler): else: print('ERROR: unable to create vote') - def _userAgentDomain(self) -> str: - """Returns the domain specified within User-Agent header - """ - if not self.headers.get('User-Agent'): - return None - agentStr = self.headers.get('User-Agent') - if '+http' not in agentStr: - return None - agentDomain = agentStr.split('+http')[1].strip() - if '://' in agentDomain: - agentDomain = agentDomain.split('://')[1] - if '/' in agentDomain: - agentDomain = agentDomain.split('/')[0] - if ')' in agentDomain: - agentDomain = agentDomain.split(')')[0].strip() - if ' ' in agentDomain: - agentDomain = agentDomain.replace(' ', '') - if ';' in agentDomain: - agentDomain = agentDomain.replace(';', '') - if '.' not in agentDomain: - return None - if self.server.debug: - print('User-Agent Domain: ' + agentDomain) - return agentDomain - def _blockedUserAgent(self, callingDomain: str) -> bool: """Should a GET or POST be blocked based upon its user agent? """ - agentDomain = self._userAgentDomain() + agentDomain = None + if self.headers.get('User-Agent'): + agentDomain = userAgentDomain(self.headers['User-Agent'], + self.server.debug) blockedUA = False if not agentDomain: if self.server.userAgentDomainRequired: diff --git a/tests.py b/tests.py index 54f17b7d3..c21df305d 100644 --- a/tests.py +++ b/tests.py @@ -37,13 +37,14 @@ from follow import clearFollows from follow import clearFollowers from follow import sendFollowRequestViaServer from follow import sendUnfollowRequestViaServer +from siteactive import siteIsActive +from utils import userAgentDomain from utils import camelCaseSplit from utils import decodedHost from utils import getFullDomain from utils import validNickname from utils import firstParagraphFromString from utils import removeIdEnding -from siteactive import siteIsActive from utils import updateRecentPostsCache from utils import followPerson from utils import getNicknameFromActor @@ -3938,10 +3939,21 @@ def _testRoles() -> None: assert not actorHasRole(actorJson, "artist") +def _testUserAgentDomain() -> None: + print('testUserAgentDomain') + userAgent = \ + 'http.rb/4.4.1 (Mastodon/9.10.11; +https://mastodon.something/)' + assert userAgentDomain(userAgent, False) == 'mastodon.something' + userAgent = \ + 'Mozilla/70.0 (X11; Linux x86_64; rv:1.0) Gecko/20450101 Firefox/1.0' + assert userAgentDomain(userAgent, False) is None + + def runAllTests(): print('Running tests...') updateDefaultThemesList(os.getcwd()) _testFunctions() + _testUserAgentDomain() _testRoles() _testSkills() _testSpoofGeolocation() diff --git a/utils.py b/utils.py index 0905a810f..ad277004b 100644 --- a/utils.py +++ b/utils.py @@ -2433,3 +2433,27 @@ def permittedDir(path: str) -> bool: path.startswith('/accounts'): return False return True + + +def userAgentDomain(userAgent: str, debug: bool) -> str: + """If the User-Agent string contains a domain + then return it + """ + if '+http' not in userAgent: + return None + agentDomain = userAgent.split('+http')[1].strip() + if '://' in agentDomain: + agentDomain = agentDomain.split('://')[1] + if '/' in agentDomain: + agentDomain = agentDomain.split('/')[0] + if ')' in agentDomain: + agentDomain = agentDomain.split(')')[0].strip() + if ' ' in agentDomain: + agentDomain = agentDomain.replace(' ', '') + if ';' in agentDomain: + agentDomain = agentDomain.replace(';', '') + if '.' not in agentDomain: + return None + if debug: + print('User-Agent Domain: ' + agentDomain) + return agentDomain From f5930daf4183144492209aafe289b1e8ae81f5e2 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 20 Jun 2021 16:59:16 +0100 Subject: [PATCH 08/13] Expect a user agent --- daemon.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/daemon.py b/daemon.py index 221a5c13c..37fe3ab9f 100644 --- a/daemon.py +++ b/daemon.py @@ -460,6 +460,8 @@ class PubServer(BaseHTTPRequestHandler): if self.headers.get('User-Agent'): agentDomain = userAgentDomain(self.headers['User-Agent'], self.server.debug) + else: + return True blockedUA = False if not agentDomain: if self.server.userAgentDomainRequired: From b45d034332656443e9580dc0562c8a1702545b4e Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 20 Jun 2021 17:10:08 +0100 Subject: [PATCH 09/13] Reject obvious bots --- daemon.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/daemon.py b/daemon.py index 37fe3ab9f..e25eb7bb5 100644 --- a/daemon.py +++ b/daemon.py @@ -458,10 +458,14 @@ class PubServer(BaseHTTPRequestHandler): """ agentDomain = None if self.headers.get('User-Agent'): - agentDomain = userAgentDomain(self.headers['User-Agent'], - self.server.debug) + agentStr = self.headers['User-Agent'] + if 'bot/' in agentStr or 'bot-' in agentStr.lower(): + print('Crawler: ' + agentStr) + return True + agentDomain = userAgentDomain(agentStr, self.server.debug) else: return True + blockedUA = False if not agentDomain: if self.server.userAgentDomainRequired: From 02b53231b948b25e3d4f7c52bd9caaf2af25bb66 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 20 Jun 2021 17:12:19 +0100 Subject: [PATCH 10/13] Check in lower case --- daemon.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/daemon.py b/daemon.py index e25eb7bb5..d0db0248a 100644 --- a/daemon.py +++ b/daemon.py @@ -459,7 +459,8 @@ class PubServer(BaseHTTPRequestHandler): agentDomain = None if self.headers.get('User-Agent'): agentStr = self.headers['User-Agent'] - if 'bot/' in agentStr or 'bot-' in agentStr.lower(): + agentStrLower = agentStr.lower() + if 'bot/' in agentStrLower or 'bot-' in agentStrLower: print('Crawler: ' + agentStr) return True agentDomain = userAgentDomain(agentStr, self.server.debug) From 3030ddcd5056df010f08e0ac72fc9515b47ec37d Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 20 Jun 2021 18:04:29 +0100 Subject: [PATCH 11/13] Tidying --- daemon.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/daemon.py b/daemon.py index d0db0248a..7f1c97458 100644 --- a/daemon.py +++ b/daemon.py @@ -467,11 +467,11 @@ class PubServer(BaseHTTPRequestHandler): else: return True - blockedUA = False if not agentDomain: if self.server.userAgentDomainRequired: return True - return blockedUA + return False + blockedUA = False if not agentDomain.startswith(callingDomain): blockedUA = isBlockedDomain(self.server.baseDir, agentDomain) # if self.server.debug: From 90d5d04090573d20a16bfcee7eccbb0857f9bc6e Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 20 Jun 2021 18:48:50 +0100 Subject: [PATCH 12/13] Ability to block user agents --- daemon.py | 25 +++++++++++++++++++------ epicyon.py | 26 +++++++++++++++----------- tests.py | 12 ++++++------ 3 files changed, 40 insertions(+), 23 deletions(-) diff --git a/daemon.py b/daemon.py index 7f1c97458..28bb4791d 100644 --- a/daemon.py +++ b/daemon.py @@ -457,20 +457,34 @@ class PubServer(BaseHTTPRequestHandler): """Should a GET or POST be blocked based upon its user agent? """ agentDomain = None + agentStr = None if self.headers.get('User-Agent'): agentStr = self.headers['User-Agent'] + # is this a web crawler? If so the block it agentStrLower = agentStr.lower() if 'bot/' in agentStrLower or 'bot-' in agentStrLower: print('Crawler: ' + agentStr) return True + # get domain name from User-Agent agentDomain = userAgentDomain(agentStr, self.server.debug) else: + # no User-Agent header is present return True - if not agentDomain: - if self.server.userAgentDomainRequired: + # is the User-Agent type blocked? eg. "Mastodon" + if self.server.userAgentsBlocked: + blockedUA = False + for agentName in self.server.userAgentsBlocked: + if agentName in agentStr: + blockedUA = True + break + if blockedUA: return True + + if not agentDomain: return False + + # is the User-Agent domain blocked blockedUA = False if not agentDomain.startswith(callingDomain): blockedUA = isBlockedDomain(self.server.baseDir, agentDomain) @@ -14878,7 +14892,7 @@ def loadTokens(baseDir: str, tokensDict: {}, tokensLookup: {}) -> None: break -def runDaemon(userAgentDomainRequired: bool, +def runDaemon(userAgentsBlocked: [], logLoginFailures: bool, city: str, showNodeInfoAccounts: bool, @@ -15005,9 +15019,8 @@ def runDaemon(userAgentDomainRequired: bool, httpd.keyShortcuts = {} loadAccessKeysForAccounts(baseDir, httpd.keyShortcuts, httpd.accessKeys) - # if set to True then the calling domain must be specified - # within the User-Agent header - httpd.userAgentDomainRequired = userAgentDomainRequired + # list of blocked user agent types within the User-Agent header + httpd.userAgentsBlocked = userAgentsBlocked httpd.unitTest = unitTest httpd.allowLocalNetworkAccess = allowLocalNetworkAccess diff --git a/epicyon.py b/epicyon.py index 406ba3856..a36e4af93 100644 --- a/epicyon.py +++ b/epicyon.py @@ -104,6 +104,9 @@ def str2bool(v) -> bool: parser = argparse.ArgumentParser(description='ActivityPub Server') +parser.add_argument('--userAgentBlocks', type=str, + default=None, + help='List of blocked user agents, separated by commas') parser.add_argument('-n', '--nickname', dest='nickname', type=str, default=None, help='Nickname of the account to use') @@ -274,12 +277,6 @@ parser.add_argument("--repliesEnabled", "--commentsEnabled", type=str2bool, nargs='?', const=True, default=True, help="Enable replies to a post") -parser.add_argument("--userAgentDomainRequired", - dest='userAgentDomainRequired', - type=str2bool, nargs='?', - const=True, default=False, - help="Whether User-Agent header must " + - "contain the calling domain") parser.add_argument("--showPublishAsIcon", dest='showPublishAsIcon', type=str2bool, nargs='?', @@ -2522,10 +2519,17 @@ showNodeInfoVersion = \ if showNodeInfoVersion is not None: args.showNodeInfoVersion = bool(showNodeInfoVersion) -userAgentDomainRequired = \ - getConfigParam(baseDir, 'userAgentDomainRequired') -if userAgentDomainRequired is not None: - args.userAgentDomainRequired = bool(userAgentDomainRequired) +userAgentsBlocked = [] +if args.userAgentBlocks: + userAgentsBlockedStr = args.userAgentBlocks + setConfigParam(baseDir, 'userAgentsBlocked', userAgentsBlockedStr) +else: + userAgentsBlockedStr = \ + getConfigParam(baseDir, 'userAgentsBlocked') +if userAgentsBlockedStr: + agentBlocksList = userAgentsBlockedStr.split(',') + for agentBlockStr in agentBlocksList: + userAgentsBlocked.append(agentBlockStr.strip()) city = \ getConfigParam(baseDir, 'city') @@ -2563,7 +2567,7 @@ if args.registration: print('New registrations closed') if __name__ == "__main__": - runDaemon(args.userAgentDomainRequired, + runDaemon(userAgentsBlocked, args.logLoginFailures, args.city, args.showNodeInfoAccounts, diff --git a/tests.py b/tests.py index c21df305d..f270ae9e1 100644 --- a/tests.py +++ b/tests.py @@ -520,9 +520,9 @@ def createServerAlice(path: str, domain: str, port: int, showNodeInfoVersion = True city = 'London, England' logLoginFailures = False - userAgentDomainRequired = False + userAgentsBlocked = [] print('Server running: Alice') - runDaemon(userAgentDomainRequired, + runDaemon(userAgentsBlocked, logLoginFailures, city, showNodeInfoAccounts, showNodeInfoVersion, @@ -625,9 +625,9 @@ def createServerBob(path: str, domain: str, port: int, showNodeInfoVersion = True city = 'London, England' logLoginFailures = False - userAgentDomainRequired = False + userAgentsBlocked = [] print('Server running: Bob') - runDaemon(userAgentDomainRequired, + runDaemon(userAgentsBlocked, logLoginFailures, city, showNodeInfoAccounts, showNodeInfoVersion, @@ -685,9 +685,9 @@ def createServerEve(path: str, domain: str, port: int, federationList: [], showNodeInfoVersion = True city = 'London, England' logLoginFailures = False - userAgentDomainRequired = False + userAgentsBlocked = [] print('Server running: Eve') - runDaemon(userAgentDomainRequired, + runDaemon(userAgentsBlocked, logLoginFailures, city, showNodeInfoAccounts, showNodeInfoVersion, From 0b2b4de64e66981182e482f05946864a6d0064e7 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 20 Jun 2021 18:50:43 +0100 Subject: [PATCH 13/13] Block message --- daemon.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/daemon.py b/daemon.py index 28bb4791d..955bebeee 100644 --- a/daemon.py +++ b/daemon.py @@ -463,7 +463,7 @@ class PubServer(BaseHTTPRequestHandler): # is this a web crawler? If so the block it agentStrLower = agentStr.lower() if 'bot/' in agentStrLower or 'bot-' in agentStrLower: - print('Crawler: ' + agentStr) + print('Blocked Crawler: ' + agentStr) return True # get domain name from User-Agent agentDomain = userAgentDomain(agentStr, self.server.debug)