Merge branch 'main' of gitlab.com:bashrc2/epicyon

merge-requests/30/head
Bob Mottram 2021-10-24 21:11:46 +01:00
commit b76d4bf4c3
21 changed files with 181 additions and 45 deletions

104
daemon.py
View File

@ -392,6 +392,36 @@ def saveDomainQrcode(baseDir: str, httpPrefix: str,
class PubServer(BaseHTTPRequestHandler):
protocol_version = 'HTTP/1.1'
def _updateKnownCrawlers(self, uaStr: str) -> None:
"""Updates a dictionary of known crawlers accessing nodeinfo
or the masto API
"""
if not uaStr:
return
currTime = int(time.time())
if self.server.knownCrawlers.get(uaStr):
self.server.knownCrawlers[uaStr]['hits'] += 1
self.server.knownCrawlers[uaStr]['lastseen'] = currTime
else:
self.server.knownCrawlers[uaStr] = {
"lastseen": currTime,
"hits": 1
}
if currTime - self.server.lastKnownCrawler >= 30:
# remove any old observations
removeCrawlers = []
for ua, item in self.server.knownCrawlers.items():
if currTime - item['lastseen'] >= 60 * 60 * 24 * 30:
removeCrawlers.append(ua)
for ua in removeCrawlers:
del self.server.knownCrawlers[ua]
# save the list of crawlers
saveJson(self.server.knownCrawlers,
self.server.baseDir + '/accounts/knownCrawlers.json')
self.server.lastKnownCrawler = currTime
def _getInstanceUrl(self, callingDomain: str) -> str:
"""Returns the URL for this instance
"""
@ -520,11 +550,22 @@ class PubServer(BaseHTTPRequestHandler):
def _blockedUserAgent(self, callingDomain: str, agentStr: str) -> bool:
"""Should a GET or POST be blocked based upon its user agent?
"""
if not agentStr:
return False
agentStrLower = agentStr.lower()
defaultAgentBlocks = [
'fedilist.com'
]
for uaBlock in defaultAgentBlocks:
if uaBlock in agentStrLower:
print('Blocked User agent: ' + uaBlock)
return True
agentDomain = None
if agentStr:
# is this a web crawler? If so the block it
agentStrLower = agentStr.lower()
if 'bot/' in agentStrLower or 'bot-' in agentStrLower:
if self.server.newsInstance:
return False
@ -969,6 +1010,7 @@ class PubServer(BaseHTTPRequestHandler):
return False
def _mastoApiV1(self, path: str, callingDomain: str,
uaStr: str,
authorized: bool,
httpPrefix: str,
baseDir: str, nickname: str, domain: str,
@ -989,10 +1031,12 @@ class PubServer(BaseHTTPRequestHandler):
print('mastodon api v1: ' + path)
print('mastodon api v1: authorized ' + str(authorized))
print('mastodon api v1: nickname ' + str(nickname))
self._updateKnownCrawlers(uaStr)
brochMode = brochModeIsActive(baseDir)
sendJson, sendJsonStr = mastoApiV1Response(path,
callingDomain,
uaStr,
authorized,
httpPrefix,
baseDir,
@ -1031,6 +1075,7 @@ class PubServer(BaseHTTPRequestHandler):
return True
def _mastoApi(self, path: str, callingDomain: str,
uaStr: str,
authorized: bool, httpPrefix: str,
baseDir: str, nickname: str, domain: str,
domainFull: str,
@ -1041,18 +1086,19 @@ class PubServer(BaseHTTPRequestHandler):
projectVersion: str,
customEmoji: [],
showNodeInfoAccounts: bool) -> bool:
return self._mastoApiV1(path, callingDomain, authorized,
return self._mastoApiV1(path, callingDomain, uaStr, authorized,
httpPrefix, baseDir, nickname, domain,
domainFull, onionDomain, i2pDomain,
translate, registration, systemLanguage,
projectVersion, customEmoji,
showNodeInfoAccounts)
def _nodeinfo(self, callingDomain: str) -> bool:
def _nodeinfo(self, uaStr: str, callingDomain: str) -> bool:
if not self.path.startswith('/nodeinfo/2.0'):
return False
if self.server.debug:
print('DEBUG: nodeinfo ' + self.path)
self._updateKnownCrawlers(uaStr)
# If we are in broch mode then don't show potentially
# sensitive metadata.
@ -1091,7 +1137,7 @@ class PubServer(BaseHTTPRequestHandler):
self._set_headers('application/ld+json', msglen,
None, callingDomain, True)
self._write(msg)
print('nodeinfo sent')
print('nodeinfo sent to ' + callingDomain)
return True
self._404()
return True
@ -11819,6 +11865,36 @@ class PubServer(BaseHTTPRequestHandler):
return True
return False
def _showKnownCrawlers(self, callingDomain: str, path: str,
baseDir: str, knownCrawlers: {}) -> bool:
"""Show a list of known web crawlers
"""
if '/users/' not in path:
return False
if not path.endswith('/crawlers'):
return False
nickname = getNicknameFromActor(path)
if not nickname:
return False
if not isModerator(baseDir, nickname):
return False
crawlersList = []
currTime = int(time.time())
recentCrawlers = 60 * 60 * 24 * 30
for uaStr, item in knownCrawlers.items():
if item['lastseen'] - currTime < recentCrawlers:
crawlersList.append(str(item['hits']) + ' ' + uaStr)
crawlersList.sort(reverse=True)
msg = ''
for lineStr in crawlersList:
msg += lineStr + '\n'
msg = msg.encode('utf-8')
msglen = len(msg)
self._set_headers('text/plain; charset=utf-8', msglen,
None, callingDomain, True)
self._write(msg)
return True
def _editProfile(self, callingDomain: str, path: str,
translate: {}, baseDir: str,
httpPrefix: str, domain: str, port: int,
@ -12113,7 +12189,7 @@ class PubServer(BaseHTTPRequestHandler):
# Since fediverse crawlers are quite active,
# make returning info to them high priority
# get nodeinfo endpoint
if self._nodeinfo(callingDomain):
if self._nodeinfo(uaStr, callingDomain):
return
fitnessPerformance(GETstartTime, self.server.fitness,
@ -12446,7 +12522,8 @@ class PubServer(BaseHTTPRequestHandler):
return
# minimal mastodon api
if self._mastoApi(self.path, callingDomain, authorized,
if self._mastoApi(self.path, callingDomain, uaStr,
authorized,
self.server.httpPrefix,
self.server.baseDir,
self.authorizedNickname,
@ -14349,6 +14426,12 @@ class PubServer(BaseHTTPRequestHandler):
self.server.GETbusy = False
return
# list of known crawlers accessing nodeinfo or masto API
if self._showKnownCrawlers(callingDomain, self.path,
self.server.baseDir,
self.server.knownCrawlers):
return
# edit profile in web interface
if self._editProfile(callingDomain, self.path,
self.server.translate,
@ -17302,6 +17385,15 @@ def runDaemon(listsEnabled: str,
createNewsInbox(baseDir, domain, port, httpPrefix)
setConfigParam(baseDir, "listsEnabled", "Murdoch press")
# dict of known web crawlers accessing nodeinfo or the masto API
# and how many times they have been seen
httpd.knownCrawlers = {}
knownCrawlersFilename = baseDir + '/accounts/knownCrawlers.json'
if os.path.isfile(knownCrawlersFilename):
httpd.knownCrawlers = loadJson(knownCrawlersFilename)
# when was the last crawler seen?
httpd.lastKnownCrawler = 0
if listsEnabled:
httpd.listsEnabled = listsEnabled
else:

View File

@ -82,6 +82,7 @@ def _getMastoApiV1Account(baseDir: str, nickname: str, domain: str) -> {}:
def mastoApiV1Response(path: str, callingDomain: str,
uaStr: str,
authorized: bool,
httpPrefix: str,
baseDir: str, nickname: str, domain: str,
@ -100,12 +101,18 @@ def mastoApiV1Response(path: str, callingDomain: str,
"""
sendJson = None
sendJsonStr = ''
if not uaStr:
uaStr = ''
# parts of the api needing authorization
if authorized and nickname:
if path == '/api/v1/accounts/verify_credentials':
sendJson = _getMastoApiV1Account(baseDir, nickname, domain)
sendJsonStr = 'masto API account sent for ' + nickname
sendJsonStr = \
'masto API account sent for ' + nickname + ' ' + uaStr
# information about where the request is coming from
callingInfo = ' ' + uaStr + ', ' + callingDomain
# Parts of the api which don't need authorization
mastoId = _getMastApiV1Id(path)
@ -121,57 +128,73 @@ def mastoApiV1Response(path: str, callingDomain: str,
path = path.split('?')[0]
if path.endswith('/followers'):
sendJson = []
sendJsonStr = 'masto API followers sent for ' + nickname
sendJsonStr = \
'masto API followers sent for ' + nickname + \
callingInfo
elif path.endswith('/following'):
sendJson = []
sendJsonStr = 'masto API following sent for ' + nickname
sendJsonStr = \
'masto API following sent for ' + nickname + \
callingInfo
elif path.endswith('/statuses'):
sendJson = []
sendJsonStr = 'masto API statuses sent for ' + nickname
sendJsonStr = \
'masto API statuses sent for ' + nickname + \
callingInfo
elif path.endswith('/search'):
sendJson = []
sendJsonStr = 'masto API search sent ' + originalPath
sendJsonStr = \
'masto API search sent ' + originalPath + \
callingInfo
elif path.endswith('/relationships'):
sendJson = []
sendJsonStr = \
'masto API relationships sent ' + originalPath
'masto API relationships sent ' + originalPath + \
callingInfo
else:
sendJson = \
_getMastoApiV1Account(baseDir, pathNickname, domain)
sendJsonStr = 'masto API account sent for ' + nickname
sendJsonStr = \
'masto API account sent for ' + nickname + \
callingInfo
# NOTE: adding support for '/api/v1/directory seems to create
# federation problems, so avoid implementing that
if path.startswith('/api/v1/blocks'):
sendJson = []
sendJsonStr = 'masto API instance blocks sent ' + path
sendJsonStr = \
'masto API instance blocks sent ' + path + callingInfo
elif path.startswith('/api/v1/favorites'):
sendJson = []
sendJsonStr = 'masto API favorites sent ' + path
sendJsonStr = 'masto API favorites sent ' + path + callingInfo
elif path.startswith('/api/v1/follow_requests'):
sendJson = []
sendJsonStr = 'masto API follow requests sent ' + path
sendJsonStr = \
'masto API follow requests sent ' + path + callingInfo
elif path.startswith('/api/v1/mutes'):
sendJson = []
sendJsonStr = 'masto API mutes sent ' + path
sendJsonStr = \
'masto API mutes sent ' + path + callingInfo
elif path.startswith('/api/v1/notifications'):
sendJson = []
sendJsonStr = 'masto API notifications sent ' + path
sendJsonStr = \
'masto API notifications sent ' + path + callingInfo
elif path.startswith('/api/v1/reports'):
sendJson = []
sendJsonStr = 'masto API reports sent ' + path
sendJsonStr = 'masto API reports sent ' + path + callingInfo
elif path.startswith('/api/v1/statuses'):
sendJson = []
sendJsonStr = 'masto API statuses sent ' + path
sendJsonStr = 'masto API statuses sent ' + path + callingInfo
elif path.startswith('/api/v1/timelines'):
sendJson = {
'error': 'This method requires an authenticated user'
}
sendJsonStr = 'masto API timelines sent ' + path
sendJsonStr = 'masto API timelines sent ' + path + callingInfo
elif path.startswith('/api/v1/custom_emojis'):
sendJson = customEmoji
sendJsonStr = 'masto API custom emojis sent ' + path
sendJsonStr = \
'masto API custom emojis sent ' + path + callingInfo
adminNickname = getConfigParam(baseDir, 'admin')
if adminNickname and path == '/api/v1/instance':
@ -208,7 +231,7 @@ def mastoApiV1Response(path: str, callingDomain: str,
registration,
systemLanguage,
projectVersion)
sendJsonStr = 'masto API instance metadata sent'
sendJsonStr = 'masto API instance metadata sent ' + uaStr
elif path.startswith('/api/v1/instance/peers'):
# This is just a dummy result.
# Showing the full list of peers would have privacy implications.
@ -216,8 +239,8 @@ def mastoApiV1Response(path: str, callingDomain: str,
# small instances a full list of peers would convey a lot of
# information about the interests of a small number of accounts
sendJson = ['mastodon.social', domainFull]
sendJsonStr = 'masto API peers metadata sent'
sendJsonStr = 'masto API peers metadata sent ' + uaStr
elif path.startswith('/api/v1/instance/activity'):
sendJson = []
sendJsonStr = 'masto API activity metadata sent'
sendJsonStr = 'masto API activity metadata sent ' + uaStr
return sendJson, sendJsonStr

View File

@ -489,5 +489,6 @@
"Join": "انضم",
"Leave": "يترك",
"System Monitor": "مراقب النظام",
"Add content warnings for the following sites": "أضف تحذيرات المحتوى للمواقع التالية"
"Add content warnings for the following sites": "أضف تحذيرات المحتوى للمواقع التالية",
"Known Web Crawlers": "برامج زحف الويب المعروفة"
}

View File

@ -489,5 +489,6 @@
"Join": "Uneix-te",
"Leave": "Marxa",
"System Monitor": "Monitor del sistema",
"Add content warnings for the following sites": "Afegiu advertiments de contingut per als llocs següents"
"Add content warnings for the following sites": "Afegiu advertiments de contingut per als llocs següents",
"Known Web Crawlers": "Exploradors web coneguts"
}

View File

@ -489,5 +489,6 @@
"Join": "Ymunwch",
"Leave": "Gadewch",
"System Monitor": "Monitor System",
"Add content warnings for the following sites": "Ychwanegwch rybuddion cynnwys ar gyfer y gwefannau canlynol"
"Add content warnings for the following sites": "Ychwanegwch rybuddion cynnwys ar gyfer y gwefannau canlynol",
"Known Web Crawlers": "Crawlers Gwe Hysbys"
}

View File

@ -489,5 +489,6 @@
"Join": "Verbinden",
"Leave": "Verlassen",
"System Monitor": "Systemmonitor",
"Add content warnings for the following sites": "Inhaltswarnungen für die folgenden Websites hinzufügen"
"Add content warnings for the following sites": "Inhaltswarnungen für die folgenden Websites hinzufügen",
"Known Web Crawlers": "Bekannte Web-Crawler"
}

View File

@ -489,5 +489,6 @@
"Join": "Join",
"Leave": "Leave",
"System Monitor": "System Monitor",
"Add content warnings for the following sites": "Add content warnings for the following sites"
"Add content warnings for the following sites": "Add content warnings for the following sites",
"Known Web Crawlers": "Known Web Crawlers"
}

View File

@ -489,5 +489,6 @@
"Join": "Entrar",
"Leave": "Dejar",
"System Monitor": "Monitor del sistema",
"Add content warnings for the following sites": "Agregue advertencias de contenido para los siguientes sitios"
"Add content warnings for the following sites": "Agregue advertencias de contenido para los siguientes sitios",
"Known Web Crawlers": "Rastreadores web conocidos"
}

View File

@ -489,5 +489,6 @@
"Join": "Rejoindre",
"Leave": "Laisser",
"System Monitor": "Moniteur système",
"Add content warnings for the following sites": "Ajouter des avertissements de contenu pour les sites suivants"
"Add content warnings for the following sites": "Ajouter des avertissements de contenu pour les sites suivants",
"Known Web Crawlers": "Crawlers Web connus"
}

View File

@ -489,5 +489,6 @@
"Join": "Bí páirteach",
"Leave": "Fág",
"System Monitor": "Monatóir Córais",
"Add content warnings for the following sites": "Cuir rabhaidh ábhair leis na suíomhanna seo a leanas"
"Add content warnings for the following sites": "Cuir rabhaidh ábhair leis na suíomhanna seo a leanas",
"Known Web Crawlers": "Crawlers Gréasáin Aitheanta"
}

View File

@ -489,5 +489,6 @@
"Join": "शामिल हों",
"Leave": "छोड़ना",
"System Monitor": "सिस्टम मॉनिटर",
"Add content warnings for the following sites": "निम्नलिखित साइटों के लिए सामग्री चेतावनियाँ जोड़ें"
"Add content warnings for the following sites": "निम्नलिखित साइटों के लिए सामग्री चेतावनियाँ जोड़ें",
"Known Web Crawlers": "ज्ञात वेब क्रॉलर"
}

View File

@ -489,5 +489,6 @@
"Join": "Aderire",
"Leave": "Lasciare",
"System Monitor": "Monitor di sistema",
"Add content warnings for the following sites": "Aggiungi avvisi sui contenuti per i seguenti siti"
"Add content warnings for the following sites": "Aggiungi avvisi sui contenuti per i seguenti siti",
"Known Web Crawlers": "Crawler Web conosciuti"
}

View File

@ -489,5 +489,6 @@
"Join": "加入",
"Leave": "離れる",
"System Monitor": "システムモニター",
"Add content warnings for the following sites": "次のサイトのコンテンツ警告を追加します"
"Add content warnings for the following sites": "次のサイトのコンテンツ警告を追加します",
"Known Web Crawlers": "既知のWebクローラー"
}

View File

@ -489,5 +489,6 @@
"Join": "Bihevgirêdan",
"Leave": "Terikandin",
"System Monitor": "System Monitor",
"Add content warnings for the following sites": "Ji bo malperên jêrîn hişyariyên naverokê zêde bikin"
"Add content warnings for the following sites": "Ji bo malperên jêrîn hişyariyên naverokê zêde bikin",
"Known Web Crawlers": "Crawlerên Webê yên naskirî"
}

View File

@ -485,5 +485,6 @@
"Join": "Join",
"Leave": "Leave",
"System Monitor": "System Monitor",
"Add content warnings for the following sites": "Add content warnings for the following sites"
"Add content warnings for the following sites": "Add content warnings for the following sites",
"Known Web Crawlers": "Known Web Crawlers"
}

View File

@ -489,5 +489,6 @@
"Join": "Juntar",
"Leave": "Sair",
"System Monitor": "Monitor de Sistema",
"Add content warnings for the following sites": "Adicione avisos de conteúdo para os seguintes sites"
"Add content warnings for the following sites": "Adicione avisos de conteúdo para os seguintes sites",
"Known Web Crawlers": "Rastreadores da Web conhecidos"
}

View File

@ -489,5 +489,6 @@
"Join": "Присоединиться",
"Leave": "Оставлять",
"System Monitor": "Системный монитор",
"Add content warnings for the following sites": "Добавить предупреждения о содержании для следующих сайтов"
"Add content warnings for the following sites": "Добавить предупреждения о содержании для следующих сайтов",
"Known Web Crawlers": "Известные веб-сканеры"
}

View File

@ -489,5 +489,6 @@
"Join": "Jiunge",
"Leave": "Ondoka",
"System Monitor": "Ufuatiliaji wa Mfumo",
"Add content warnings for the following sites": "Ongeza maonyo ya yaliyomo kwa wavuti zifuatazo"
"Add content warnings for the following sites": "Ongeza maonyo ya yaliyomo kwa wavuti zifuatazo",
"Known Web Crawlers": "Watambaji Wavuti Wanaojulikana"
}

View File

@ -489,5 +489,6 @@
"Join": "加入",
"Leave": "离开",
"System Monitor": "系统监视器",
"Add content warnings for the following sites": "为以下网站添加内容警告"
"Add content warnings for the following sites": "为以下网站添加内容警告",
"Known Web Crawlers": "已知的网络爬虫"
}

View File

@ -715,7 +715,7 @@ def getStatusNumber(publishedStr: str = None) -> (str, str):
def evilIncarnate() -> []:
return ('gab.com', 'gabfed.com', 'spinster.xyz',
return ('fedilist.com', 'gab.com', 'gabfed.com', 'spinster.xyz',
'kiwifarms.cc', 'djitter.com')

View File

@ -1520,8 +1520,8 @@ def _htmlEditProfileSharedItems(baseDir: str, nickname: str, domain: str,
def _htmlEditProfileFiltering(baseDir: str, nickname: str, domain: str,
userAgentsBlocked: str, translate: {},
replyIntervalHours: int,
userAgentsBlocked: str,
translate: {}, replyIntervalHours: int,
CWlists: {}, listsEnabled: str) -> str:
"""Filtering and blocking section of edit profile screen
"""
@ -1669,6 +1669,10 @@ def _htmlEditProfileFiltering(baseDir: str, nickname: str, domain: str,
allowedInstancesStr + '</textarea>\n'
if isModerator(baseDir, nickname):
editProfileForm += \
'<a href="/users/' + nickname + '/crawlers">' + \
translate['Known Web Crawlers'] + '</a><br>\n'
userAgentsBlockedStr = ''
for ua in userAgentsBlocked:
if userAgentsBlockedStr: