mirror of https://gitlab.com/bashrc2/epicyon
				
				
				
			Check if LLM crawlers are encountered too frequently
							parent
							
								
									60c15d2358
								
							
						
					
					
						commit
						fc7feaf1cd
					
				|  | @ -687,6 +687,9 @@ def run_daemon(accounts_data_dir: str, | |||
| 
 | ||||
|     httpd.starting_daemon = True | ||||
| 
 | ||||
|     # the last time when an LLM scraper was replied to | ||||
|     httpd.last_llm_time = None | ||||
| 
 | ||||
|     # width, position and opacity of watermark applied to attached images | ||||
|     # as a percentage of the attached image width | ||||
|     httpd.watermark_width_percent = watermark_width_percent | ||||
|  |  | |||
|  | @ -87,6 +87,7 @@ from httpcodes import http_304 | |||
| from httpcodes import http_400 | ||||
| from httpcodes import http_503 | ||||
| from httpcodes import write2 | ||||
| from utils import date_utcnow | ||||
| from utils import replace_strings | ||||
| from utils import contains_invalid_chars | ||||
| from utils import save_json | ||||
|  | @ -349,8 +350,16 @@ def daemon_http_get(self) -> None: | |||
|                                self.path, self.server.block_military) | ||||
|         if block: | ||||
|             if llm: | ||||
|                 # check if LLM is too frequent | ||||
|                 if self.server.last_llm_time: | ||||
|                     curr_date = date_utcnow() | ||||
|                     time_diff = curr_date - self.server.last_llm_time | ||||
|                     diff_secs = time_diff.total_seconds() | ||||
|                     if diff_secs < 60: | ||||
|                         http_402(self) | ||||
|                         return | ||||
|                 if is_image_file(self.path): | ||||
|                     http_404(self, 723) | ||||
|                     http_402(self) | ||||
|                     return | ||||
|                 # if this is an LLM crawler then feed it some trash | ||||
|                 print('GET HTTP LLM scraper poisoned: ' + str(self.headers)) | ||||
|  | @ -361,6 +370,7 @@ def daemon_http_get(self) -> None: | |||
|                 set_headers(self, 'text/html', msglen, | ||||
|                             '', calling_domain, False) | ||||
|                 write2(self, msg) | ||||
|                 self.server.last_llm_time = date_utcnow() | ||||
|                 return | ||||
|             http_400(self) | ||||
|             return | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue