mirror of https://gitlab.com/bashrc2/epicyon
Return a poisoned resonse to LLM scrapers
parent
4c7cbd6341
commit
cb069bbed5
|
@ -95,6 +95,7 @@ from httpcodes import write2
|
||||||
from httpheaders import set_headers
|
from httpheaders import set_headers
|
||||||
from daemon_utils import has_accept
|
from daemon_utils import has_accept
|
||||||
from daemon_utils import is_authorized
|
from daemon_utils import is_authorized
|
||||||
|
from poison import load_dictionary
|
||||||
|
|
||||||
|
|
||||||
class PubServer(BaseHTTPRequestHandler):
|
class PubServer(BaseHTTPRequestHandler):
|
||||||
|
@ -876,6 +877,9 @@ def run_daemon(accounts_data_dir: str,
|
||||||
# timeout used when getting rss feeds
|
# timeout used when getting rss feeds
|
||||||
httpd.rss_timeout_sec = 20
|
httpd.rss_timeout_sec = 20
|
||||||
|
|
||||||
|
# load dictionary used for LLM poisoning
|
||||||
|
httpd.dictionary = load_dictionary(base_dir)
|
||||||
|
|
||||||
# timeout used when checking for actor changes when clicking an avatar
|
# timeout used when checking for actor changes when clicking an avatar
|
||||||
# and entering person options screen
|
# and entering person options screen
|
||||||
if check_actor_timeout < 2:
|
if check_actor_timeout < 2:
|
||||||
|
|
|
@ -210,6 +210,7 @@ from daemon_get_blog import show_blog_page
|
||||||
from daemon_get_links import edit_links2
|
from daemon_get_links import edit_links2
|
||||||
from daemon_get_login import redirect_to_login_screen
|
from daemon_get_login import redirect_to_login_screen
|
||||||
from daemon_get_login import show_login_screen
|
from daemon_get_login import show_login_screen
|
||||||
|
from poison import html_poisoned
|
||||||
|
|
||||||
# Blogs can be longer, so don't show many per page
|
# Blogs can be longer, so don't show many per page
|
||||||
MAX_POSTS_IN_BLOGS_FEED = 4
|
MAX_POSTS_IN_BLOGS_FEED = 4
|
||||||
|
@ -260,8 +261,13 @@ def daemon_http_get(self) -> None:
|
||||||
|
|
||||||
# headers used by LLM scrapers
|
# headers used by LLM scrapers
|
||||||
if 'oai-host-hash' in self.headers:
|
if 'oai-host-hash' in self.headers:
|
||||||
print('GET HTTP LLM scraper bounced: ' + str(self.headers))
|
msg = html_poisoned(self.server.dictionary)
|
||||||
http_402(self)
|
msg = msg.encode('utf-8')
|
||||||
|
msglen = len(msg)
|
||||||
|
set_headers(self, 'text/html', msglen,
|
||||||
|
'', calling_domain, False)
|
||||||
|
write2(self, msg)
|
||||||
|
print('GET HTTP LLM scraper poisoned: ' + str(self.headers))
|
||||||
return
|
return
|
||||||
|
|
||||||
# replace invalid .well-known path, prior to checking for suspicious paths
|
# replace invalid .well-known path, prior to checking for suspicious paths
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
15
epicyon.py
15
epicyon.py
|
@ -123,6 +123,8 @@ from happening import dav_day_via_server
|
||||||
from content import import_emoji
|
from content import import_emoji
|
||||||
from relationships import get_moved_accounts
|
from relationships import get_moved_accounts
|
||||||
from blocking import get_blocks_via_server
|
from blocking import get_blocks_via_server
|
||||||
|
from poison import html_poisoned
|
||||||
|
from poison import load_dictionary
|
||||||
|
|
||||||
|
|
||||||
def str2bool(value_str) -> bool:
|
def str2bool(value_str) -> bool:
|
||||||
|
@ -440,6 +442,11 @@ def _command_options() -> None:
|
||||||
dest='shared_items_federated_domains',
|
dest='shared_items_federated_domains',
|
||||||
help='Specify federation list for shared items, ' +
|
help='Specify federation list for shared items, ' +
|
||||||
'separated by spaces')
|
'separated by spaces')
|
||||||
|
parser.add_argument("--poisoned", "--poison",
|
||||||
|
dest='poisoned',
|
||||||
|
type=str2bool, nargs='?',
|
||||||
|
const=True, default=False,
|
||||||
|
help="Example poisoned output")
|
||||||
parser.add_argument("--following", "--followingList",
|
parser.add_argument("--following", "--followingList",
|
||||||
dest='followingList',
|
dest='followingList',
|
||||||
type=str2bool, nargs='?',
|
type=str2bool, nargs='?',
|
||||||
|
@ -840,6 +847,14 @@ def _command_options() -> None:
|
||||||
|
|
||||||
argb = parser.parse_args()
|
argb = parser.parse_args()
|
||||||
|
|
||||||
|
if argb.poisoned:
|
||||||
|
# LLM poisoning example
|
||||||
|
base_dir = os.getcwd()
|
||||||
|
dictionary = load_dictionary(base_dir)
|
||||||
|
poisoned_str = html_poisoned(dictionary)
|
||||||
|
print(poisoned_str)
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
debug = False
|
debug = False
|
||||||
if argb.debug:
|
if argb.debug:
|
||||||
debug = True
|
debug = True
|
||||||
|
|
Loading…
Reference in New Issue