Keep track of http header fields

main
Bob Mottram 2024-07-20 19:22:38 +01:00
parent c4a4198e8a
commit a88aecf564
4 changed files with 36 additions and 0 deletions

View File

@ -837,6 +837,12 @@ def run_daemon(accounts_data_dir: str,
# cache for automatic content warnings # cache for automatic content warnings
httpd.auto_cw_cache = load_auto_cw_cache(base_dir) httpd.auto_cw_cache = load_auto_cw_cache(base_dir)
# loads a catalog of http header fields
headers_catalog_fieldname = data_dir(base_dir) + '/headers_catalog.json'
httpd.headers_catalog = {}
if os.path.isfile(headers_catalog_fieldname):
httpd.headers_catalog = load_json(headers_catalog_fieldname)
# list of websites which are currently down # list of websites which are currently down
httpd.sites_unavailable = load_unavailable_sites(base_dir) httpd.sites_unavailable = load_unavailable_sites(base_dir)

View File

@ -67,6 +67,7 @@ from daemon_utils import has_accept
from daemon_utils import show_person_options from daemon_utils import show_person_options
from daemon_utils import is_authorized from daemon_utils import is_authorized
from daemon_utils import get_user_agent from daemon_utils import get_user_agent
from httpheaders import update_headers_catalog
from httpheaders import set_headers_etag from httpheaders import set_headers_etag
from httpheaders import login_headers from httpheaders import login_headers
from httpheaders import redirect_headers from httpheaders import redirect_headers
@ -243,6 +244,11 @@ def daemon_http_get(self) -> None:
calling_domain = self.server.domain_full calling_domain = self.server.domain_full
# record header fields encountered
update_headers_catalog(self.server.base_dir,
self.server.headers_catalog,
self.headers)
if self.headers.get('Server'): if self.headers.get('Server'):
if self.headers['Server'] in corp_servers(): if self.headers['Server'] in corp_servers():
print('GET HTTP Corporate leech bounced: ' + print('GET HTTP Corporate leech bounced: ' +

View File

@ -34,6 +34,7 @@ from httpcodes import http_400
from httpcodes import http_402 from httpcodes import http_402
from httpcodes import http_404 from httpcodes import http_404
from httpcodes import http_503 from httpcodes import http_503
from httpheaders import update_headers_catalog
from httpheaders import redirect_headers from httpheaders import redirect_headers
from daemon_utils import get_user_agent from daemon_utils import get_user_agent
from daemon_utils import post_to_outbox from daemon_utils import post_to_outbox
@ -87,6 +88,10 @@ def daemon_http_post(self) -> None:
' path: ' + self.path + ' busy: ' + ' path: ' + self.path + ' busy: ' +
str(self.server.postreq_busy)) str(self.server.postreq_busy))
update_headers_catalog(self.server.base_dir,
self.server.headers_catalog,
self.headers)
calling_domain = self.server.domain_full calling_domain = self.server.domain_full
if self.headers.get('Host'): if self.headers.get('Host'):
calling_domain = decoded_host(self.headers['Host']) calling_domain = decoded_host(self.headers['Host'])

View File

@ -12,6 +12,8 @@ import urllib.parse
from hashlib import md5 from hashlib import md5
from utils import string_contains from utils import string_contains
from utils import get_instance_url from utils import get_instance_url
from utils import data_dir
from utils import save_json
def login_headers(self, file_format: str, length: int, def login_headers(self, file_format: str, length: int,
@ -205,3 +207,20 @@ def set_headers_etag(self, media_filename: str, file_format: str,
self.send_header('last-modified', last_modified) self.send_header('last-modified', last_modified)
self.send_header('accept-ranges', 'bytes') self.send_header('accept-ranges', 'bytes')
self.end_headers() self.end_headers()
def update_headers_catalog(base_dir: str, headers_catalog: {},
headers: {}) -> None:
"""Creates a catalog of headers
This allows us to spot anything unexpected for later investigation
"""
headers_catalog_fieldname = data_dir(base_dir) + '/headers_catalog.json'
changed = False
for fieldname, fieldvalue in headers.items():
if fieldname in headers_catalog:
continue
headers_catalog[fieldname] = fieldvalue
changed = True
if changed:
save_json(headers_catalog, headers_catalog_fieldname)