Improve detection of corporates

Because they are always creating new variations of their domain name
main
Bob Mottram 2025-05-19 11:18:02 +01:00
parent 79aa8cccc3
commit fc556850fe
4 changed files with 18 additions and 11 deletions

View File

@ -89,6 +89,7 @@ from httpcodes import http_304
from httpcodes import http_400
from httpcodes import http_503
from httpcodes import write2
from flags import is_corporate
from flags import is_image_file
from flags import is_artist
from flags import is_blog_post
@ -112,7 +113,6 @@ from utils import convert_domains
from utils import get_nickname_from_actor
from utils import get_json_content_from_accept
from utils import check_bad_path
from utils import corp_servers
from utils import decoded_host
from utils import detect_mitm
from person import get_person_notes_endpoint
@ -257,8 +257,9 @@ def daemon_http_get(self) -> None:
self.server.headers_catalog,
self.headers)
# bounce corporate servers
if self.headers.get('Server'):
if self.headers['Server'] in corp_servers():
if is_corporate(self.headers['Server']):
print('GET HTTP Corporate leech bounced: ' +
self.headers['Server'])
http_402(self)

View File

@ -11,8 +11,8 @@ import time
import errno
import json
from socket import error as SocketError
from flags import is_corporate
from utils import replace_strings
from utils import corp_servers
from utils import string_ends_with
from utils import get_config_param
from utils import decoded_host
@ -156,7 +156,7 @@ def daemon_http_post(self) -> None:
self.server.last_postreq = curr_time_postreq
if self.headers.get('Server'):
if self.headers['Server'] in corp_servers():
if is_corporate(self.headers['Server']):
print('POST HTTP Corporate leech bounced: ' +
self.headers['Server'])
http_402(self)

View File

@ -623,3 +623,16 @@ def url_permitted(url: str, federation_list: []) -> bool:
if domain in url:
return True
return False
def is_corporate(server_name: str) -> bool:
"""Is the given server name a corporate leech?
"""
server_lower = server_name.lower()
if 'google' in server_lower or \
'cloudflare' in server_lower or \
'facebook' in server_lower or \
'microsoft' in server_lower or \
'github' in server_lower:
return True
return False

View File

@ -5018,13 +5018,6 @@ def lines_in_file(filename: str) -> int:
return 0
def corp_servers() -> ():
"""Returns a list of despised corporate thieves
"""
return ('GitHub.com', 'github.com', 'cloudflare', 'microsoft.com',
'google.com', 'google.co.')
def get_media_url_from_video(post_json_object: {}) -> (str, str, str, str):
"""Within a Video post (eg peertube) return the media details
"""