mirror of https://gitlab.com/bashrc2/epicyon
				
				
				
			Merge branch 'main' of gitlab.com:bashrc2/epicyon
						commit
						560f02044a
					
				| 
						 | 
				
			
			@ -388,3 +388,15 @@ The CalDav endpoint for an account is:
 | 
			
		|||
```bash
 | 
			
		||||
yourdomain/calendars/yournick
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## Web Crawlers
 | 
			
		||||
 | 
			
		||||
Having search engines index social media posts is not usually considered appropriate, since even if "public" they may contain personally identifiable information. If you are running a news instance then web crawlers will be permitted by the system, but otherwise by default they will be blocked.
 | 
			
		||||
 | 
			
		||||
If you want to allow specific web crawlers then when running the daemon (typically with systemd) you can use the **crawlersAllowed** option. It can take a list of bot names, separated by commas. For example:
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
--crawlersAllowed "googlebot, apple"
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Typically web crawlers have names ending in "bot", but partial names can also be used.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -0,0 +1,120 @@
 | 
			
		|||
__filename__ = "crawlers.py"
 | 
			
		||||
__author__ = "Bob Mottram"
 | 
			
		||||
__license__ = "AGPL3+"
 | 
			
		||||
__version__ = "1.3.0"
 | 
			
		||||
__maintainer__ = "Bob Mottram"
 | 
			
		||||
__email__ = "bob@libreserver.org"
 | 
			
		||||
__status__ = "Production"
 | 
			
		||||
__module_group__ = "Core"
 | 
			
		||||
 | 
			
		||||
import time
 | 
			
		||||
from utils import save_json
 | 
			
		||||
from utils import user_agent_domain
 | 
			
		||||
from blocking import update_blocked_cache
 | 
			
		||||
from blocking import is_blocked_domain
 | 
			
		||||
 | 
			
		||||
default_user_agent_blocks = [
 | 
			
		||||
    'fedilist'
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def update_known_crawlers(ua_str: str,
 | 
			
		||||
                          base_dir: str, known_crawlers: {},
 | 
			
		||||
                          last_known_crawler: int):
 | 
			
		||||
    """Updates a dictionary of known crawlers accessing nodeinfo
 | 
			
		||||
    or the masto API
 | 
			
		||||
    """
 | 
			
		||||
    if not ua_str:
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
    curr_time = int(time.time())
 | 
			
		||||
    if known_crawlers.get(ua_str):
 | 
			
		||||
        known_crawlers[ua_str]['hits'] += 1
 | 
			
		||||
        known_crawlers[ua_str]['lastseen'] = curr_time
 | 
			
		||||
    else:
 | 
			
		||||
        known_crawlers[ua_str] = {
 | 
			
		||||
            "lastseen": curr_time,
 | 
			
		||||
            "hits": 1
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    if curr_time - last_known_crawler >= 30:
 | 
			
		||||
        # remove any old observations
 | 
			
		||||
        remove_crawlers = []
 | 
			
		||||
        for uagent, item in known_crawlers.items():
 | 
			
		||||
            if curr_time - item['lastseen'] >= 60 * 60 * 24 * 30:
 | 
			
		||||
                remove_crawlers.append(uagent)
 | 
			
		||||
        for uagent in remove_crawlers:
 | 
			
		||||
            del known_crawlers[uagent]
 | 
			
		||||
        # save the list of crawlers
 | 
			
		||||
        save_json(known_crawlers,
 | 
			
		||||
                  base_dir + '/accounts/knownCrawlers.json')
 | 
			
		||||
    return curr_time
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def blocked_user_agent(calling_domain: str, agent_str: str,
 | 
			
		||||
                       news_instance: bool, debug: bool,
 | 
			
		||||
                       user_agents_blocked: [],
 | 
			
		||||
                       blocked_cache_last_updated,
 | 
			
		||||
                       base_dir: str,
 | 
			
		||||
                       blocked_cache: [],
 | 
			
		||||
                       blocked_cache_update_secs: int,
 | 
			
		||||
                       crawlers_allowed: []):
 | 
			
		||||
    """Should a GET or POST be blocked based upon its user agent?
 | 
			
		||||
    """
 | 
			
		||||
    if not agent_str:
 | 
			
		||||
        return False, blocked_cache_last_updated
 | 
			
		||||
 | 
			
		||||
    agent_str_lower = agent_str.lower()
 | 
			
		||||
    for ua_block in default_user_agent_blocks:
 | 
			
		||||
        if ua_block in agent_str_lower:
 | 
			
		||||
            print('Blocked User agent: ' + ua_block)
 | 
			
		||||
            return True, blocked_cache_last_updated
 | 
			
		||||
 | 
			
		||||
    agent_domain = None
 | 
			
		||||
 | 
			
		||||
    if agent_str:
 | 
			
		||||
        # is this a web crawler? If so the block it
 | 
			
		||||
        if 'bot/' in agent_str_lower or 'bot-' in agent_str_lower:
 | 
			
		||||
            # if this is a news instance then we want it
 | 
			
		||||
            # to be indexed by search engines
 | 
			
		||||
            if news_instance:
 | 
			
		||||
                return False, blocked_cache_last_updated
 | 
			
		||||
            # is this crawler allowed?
 | 
			
		||||
            for crawler in crawlers_allowed:
 | 
			
		||||
                if crawler.lower() in agent_str_lower:
 | 
			
		||||
                    return False, blocked_cache_last_updated
 | 
			
		||||
            print('Blocked Crawler: ' + agent_str)
 | 
			
		||||
            return True, blocked_cache_last_updated
 | 
			
		||||
        # get domain name from User-Agent
 | 
			
		||||
        agent_domain = user_agent_domain(agent_str, debug)
 | 
			
		||||
    else:
 | 
			
		||||
        # no User-Agent header is present
 | 
			
		||||
        return True, blocked_cache_last_updated
 | 
			
		||||
 | 
			
		||||
    # is the User-Agent type blocked? eg. "Mastodon"
 | 
			
		||||
    if user_agents_blocked:
 | 
			
		||||
        blocked_ua = False
 | 
			
		||||
        for agent_name in user_agents_blocked:
 | 
			
		||||
            if agent_name in agent_str:
 | 
			
		||||
                blocked_ua = True
 | 
			
		||||
                break
 | 
			
		||||
        if blocked_ua:
 | 
			
		||||
            return True, blocked_cache_last_updated
 | 
			
		||||
 | 
			
		||||
    if not agent_domain:
 | 
			
		||||
        return False, blocked_cache_last_updated
 | 
			
		||||
 | 
			
		||||
    # is the User-Agent domain blocked
 | 
			
		||||
    blocked_ua = False
 | 
			
		||||
    if not agent_domain.startswith(calling_domain):
 | 
			
		||||
        blocked_cache_last_updated = \
 | 
			
		||||
            update_blocked_cache(base_dir, blocked_cache,
 | 
			
		||||
                                 blocked_cache_last_updated,
 | 
			
		||||
                                 blocked_cache_update_secs)
 | 
			
		||||
 | 
			
		||||
        blocked_ua = \
 | 
			
		||||
            is_blocked_domain(base_dir, agent_domain, blocked_cache)
 | 
			
		||||
        # if self.server.debug:
 | 
			
		||||
        if blocked_ua:
 | 
			
		||||
            print('Blocked User agent: ' + agent_domain)
 | 
			
		||||
    return blocked_ua, blocked_cache_last_updated
 | 
			
		||||
							
								
								
									
										172
									
								
								daemon.py
								
								
								
								
							
							
						
						
									
										172
									
								
								daemon.py
								
								
								
								
							| 
						 | 
				
			
			@ -378,6 +378,8 @@ from fitnessFunctions import sorted_watch_points
 | 
			
		|||
from fitnessFunctions import html_watch_points_graph
 | 
			
		||||
from siteactive import referer_is_active
 | 
			
		||||
from webapp_likers import html_likers_of_post
 | 
			
		||||
from crawlers import update_known_crawlers
 | 
			
		||||
from crawlers import blocked_user_agent
 | 
			
		||||
import os
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -418,36 +420,6 @@ def save_domain_qrcode(base_dir: str, http_prefix: str,
 | 
			
		|||
class PubServer(BaseHTTPRequestHandler):
 | 
			
		||||
    protocol_version = 'HTTP/1.1'
 | 
			
		||||
 | 
			
		||||
    def _update_known_crawlers(self, ua_str: str) -> None:
 | 
			
		||||
        """Updates a dictionary of known crawlers accessing nodeinfo
 | 
			
		||||
        or the masto API
 | 
			
		||||
        """
 | 
			
		||||
        if not ua_str:
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        curr_time = int(time.time())
 | 
			
		||||
        if self.server.known_crawlers.get(ua_str):
 | 
			
		||||
            self.server.known_crawlers[ua_str]['hits'] += 1
 | 
			
		||||
            self.server.known_crawlers[ua_str]['lastseen'] = curr_time
 | 
			
		||||
        else:
 | 
			
		||||
            self.server.known_crawlers[ua_str] = {
 | 
			
		||||
                "lastseen": curr_time,
 | 
			
		||||
                "hits": 1
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
        if curr_time - self.server.last_known_crawler >= 30:
 | 
			
		||||
            # remove any old observations
 | 
			
		||||
            remove_crawlers = []
 | 
			
		||||
            for uagent, item in self.server.known_crawlers.items():
 | 
			
		||||
                if curr_time - item['lastseen'] >= 60 * 60 * 24 * 30:
 | 
			
		||||
                    remove_crawlers.append(uagent)
 | 
			
		||||
            for uagent in remove_crawlers:
 | 
			
		||||
                del self.server.known_crawlers[uagent]
 | 
			
		||||
            # save the list of crawlers
 | 
			
		||||
            save_json(self.server.known_crawlers,
 | 
			
		||||
                      self.server.base_dir + '/accounts/knownCrawlers.json')
 | 
			
		||||
        self.server.last_known_crawler = curr_time
 | 
			
		||||
 | 
			
		||||
    def _get_instance_url(self, calling_domain: str) -> str:
 | 
			
		||||
        """Returns the URL for this instance
 | 
			
		||||
        """
 | 
			
		||||
| 
						 | 
				
			
			@ -589,65 +561,6 @@ class PubServer(BaseHTTPRequestHandler):
 | 
			
		|||
        else:
 | 
			
		||||
            print('ERROR: unable to create vote')
 | 
			
		||||
 | 
			
		||||
    def _blocked_user_agent(self, calling_domain: str, agent_str: str) -> bool:
 | 
			
		||||
        """Should a GET or POST be blocked based upon its user agent?
 | 
			
		||||
        """
 | 
			
		||||
        if not agent_str:
 | 
			
		||||
            return False
 | 
			
		||||
 | 
			
		||||
        agent_str_lower = agent_str.lower()
 | 
			
		||||
        default_agent_blocks = [
 | 
			
		||||
            'fedilist'
 | 
			
		||||
        ]
 | 
			
		||||
        for ua_block in default_agent_blocks:
 | 
			
		||||
            if ua_block in agent_str_lower:
 | 
			
		||||
                print('Blocked User agent: ' + ua_block)
 | 
			
		||||
                return True
 | 
			
		||||
 | 
			
		||||
        agent_domain = None
 | 
			
		||||
 | 
			
		||||
        if agent_str:
 | 
			
		||||
            # is this a web crawler? If so the block it
 | 
			
		||||
            if 'bot/' in agent_str_lower or 'bot-' in agent_str_lower:
 | 
			
		||||
                if self.server.news_instance:
 | 
			
		||||
                    return False
 | 
			
		||||
                print('Blocked Crawler: ' + agent_str)
 | 
			
		||||
                return True
 | 
			
		||||
            # get domain name from User-Agent
 | 
			
		||||
            agent_domain = user_agent_domain(agent_str, self.server.debug)
 | 
			
		||||
        else:
 | 
			
		||||
            # no User-Agent header is present
 | 
			
		||||
            return True
 | 
			
		||||
 | 
			
		||||
        # is the User-Agent type blocked? eg. "Mastodon"
 | 
			
		||||
        if self.server.user_agents_blocked:
 | 
			
		||||
            blocked_ua = False
 | 
			
		||||
            for agent_name in self.server.user_agents_blocked:
 | 
			
		||||
                if agent_name in agent_str:
 | 
			
		||||
                    blocked_ua = True
 | 
			
		||||
                    break
 | 
			
		||||
            if blocked_ua:
 | 
			
		||||
                return True
 | 
			
		||||
 | 
			
		||||
        if not agent_domain:
 | 
			
		||||
            return False
 | 
			
		||||
 | 
			
		||||
        # is the User-Agent domain blocked
 | 
			
		||||
        blocked_ua = False
 | 
			
		||||
        if not agent_domain.startswith(calling_domain):
 | 
			
		||||
            self.server.blocked_cache_last_updated = \
 | 
			
		||||
                update_blocked_cache(self.server.base_dir,
 | 
			
		||||
                                     self.server.blocked_cache,
 | 
			
		||||
                                     self.server.blocked_cache_last_updated,
 | 
			
		||||
                                     self.server.blocked_cache_update_secs)
 | 
			
		||||
 | 
			
		||||
            blocked_ua = is_blocked_domain(self.server.base_dir, agent_domain,
 | 
			
		||||
                                           self.server.blocked_cache)
 | 
			
		||||
            # if self.server.debug:
 | 
			
		||||
            if blocked_ua:
 | 
			
		||||
                print('Blocked User agent: ' + agent_domain)
 | 
			
		||||
        return blocked_ua
 | 
			
		||||
 | 
			
		||||
    def _request_csv(self) -> bool:
 | 
			
		||||
        """Should a csv response be given?
 | 
			
		||||
        """
 | 
			
		||||
| 
						 | 
				
			
			@ -1115,7 +1028,8 @@ class PubServer(BaseHTTPRequestHandler):
 | 
			
		|||
                      show_node_info_accounts: bool,
 | 
			
		||||
                      referer_domain: str,
 | 
			
		||||
                      debug: bool,
 | 
			
		||||
                      calling_site_timeout: int) -> bool:
 | 
			
		||||
                      calling_site_timeout: int,
 | 
			
		||||
                      known_crawlers: {}) -> bool:
 | 
			
		||||
        """This is a vestigil mastodon API for the purpose
 | 
			
		||||
        of returning an empty result to sites like
 | 
			
		||||
        https://mastopeek.app-dist.eu
 | 
			
		||||
| 
						 | 
				
			
			@ -1171,7 +1085,12 @@ class PubServer(BaseHTTPRequestHandler):
 | 
			
		|||
        print('mastodon api v1: authorized ' + str(authorized))
 | 
			
		||||
        print('mastodon api v1: nickname ' + str(nickname))
 | 
			
		||||
        print('mastodon api v1: referer ' + referer_domain)
 | 
			
		||||
        self._update_known_crawlers(ua_str)
 | 
			
		||||
        crawl_time = \
 | 
			
		||||
            update_known_crawlers(ua_str, base_dir,
 | 
			
		||||
                                  self.server.known_crawlers,
 | 
			
		||||
                                  self.server.last_known_crawler)
 | 
			
		||||
        if crawl_time is not None:
 | 
			
		||||
            self.server.last_known_crawler = crawl_time
 | 
			
		||||
 | 
			
		||||
        broch_mode = broch_mode_is_active(base_dir)
 | 
			
		||||
        send_json, send_json_str = \
 | 
			
		||||
| 
						 | 
				
			
			@ -1229,14 +1148,16 @@ class PubServer(BaseHTTPRequestHandler):
 | 
			
		|||
                   project_version: str,
 | 
			
		||||
                   custom_emoji: [],
 | 
			
		||||
                   show_node_info_accounts: bool,
 | 
			
		||||
                   referer_domain: str, debug: bool) -> bool:
 | 
			
		||||
                   referer_domain: str, debug: bool,
 | 
			
		||||
                   known_crawlers: {}) -> bool:
 | 
			
		||||
        return self._masto_api_v1(path, calling_domain, ua_str, authorized,
 | 
			
		||||
                                  http_prefix, base_dir, nickname, domain,
 | 
			
		||||
                                  domain_full, onion_domain, i2p_domain,
 | 
			
		||||
                                  translate, registration, system_language,
 | 
			
		||||
                                  project_version, custom_emoji,
 | 
			
		||||
                                  show_node_info_accounts,
 | 
			
		||||
                                  referer_domain, debug, 5)
 | 
			
		||||
                                  referer_domain, debug, 5,
 | 
			
		||||
                                  known_crawlers)
 | 
			
		||||
 | 
			
		||||
    def _show_vcard(self, base_dir: str, path: str, calling_domain: str,
 | 
			
		||||
                    referer_domain: str, domain: str, debug: bool) -> bool:
 | 
			
		||||
| 
						 | 
				
			
			@ -1349,7 +1270,13 @@ class PubServer(BaseHTTPRequestHandler):
 | 
			
		|||
                return True
 | 
			
		||||
        if self.server.debug:
 | 
			
		||||
            print('DEBUG: nodeinfo ' + self.path)
 | 
			
		||||
        self._update_known_crawlers(ua_str)
 | 
			
		||||
        crawl_time = \
 | 
			
		||||
            update_known_crawlers(ua_str,
 | 
			
		||||
                                  self.server.base_dir,
 | 
			
		||||
                                  self.server.known_crawlers,
 | 
			
		||||
                                  self.server.last_known_crawler)
 | 
			
		||||
        if crawl_time is not None:
 | 
			
		||||
            self.server.last_known_crawler = crawl_time
 | 
			
		||||
 | 
			
		||||
        # If we are in broch mode then don't show potentially
 | 
			
		||||
        # sensitive metadata.
 | 
			
		||||
| 
						 | 
				
			
			@ -6762,6 +6689,29 @@ class PubServer(BaseHTTPRequestHandler):
 | 
			
		|||
                            set_config_param(base_dir, 'userAgentsBlocked',
 | 
			
		||||
                                             user_agents_blocked_str)
 | 
			
		||||
 | 
			
		||||
                        # save allowed web crawlers
 | 
			
		||||
                        crawlers_allowed = []
 | 
			
		||||
                        if fields.get('crawlersAllowedStr'):
 | 
			
		||||
                            crawlers_allowed_str = \
 | 
			
		||||
                                fields['crawlersAllowedStr']
 | 
			
		||||
                            crawlers_allowed_list = \
 | 
			
		||||
                                crawlers_allowed_str.split('\n')
 | 
			
		||||
                            for uagent in crawlers_allowed_list:
 | 
			
		||||
                                if uagent in crawlers_allowed:
 | 
			
		||||
                                    continue
 | 
			
		||||
                                crawlers_allowed.append(uagent.strip())
 | 
			
		||||
                        if str(self.server.crawlers_allowed) != \
 | 
			
		||||
                           str(crawlers_allowed):
 | 
			
		||||
                            self.server.crawlers_allowed = \
 | 
			
		||||
                                crawlers_allowed
 | 
			
		||||
                            crawlers_allowed_str = ''
 | 
			
		||||
                            for uagent in crawlers_allowed:
 | 
			
		||||
                                if crawlers_allowed_str:
 | 
			
		||||
                                    crawlers_allowed_str += ','
 | 
			
		||||
                                crawlers_allowed_str += uagent
 | 
			
		||||
                            set_config_param(base_dir, 'crawlersAllowed',
 | 
			
		||||
                                             crawlers_allowed_str)
 | 
			
		||||
 | 
			
		||||
                        # save peertube instances list
 | 
			
		||||
                        peertube_instances_file = \
 | 
			
		||||
                            base_dir + '/accounts/peertube.txt'
 | 
			
		||||
| 
						 | 
				
			
			@ -13806,6 +13756,7 @@ class PubServer(BaseHTTPRequestHandler):
 | 
			
		|||
                                    self.server.text_mode_banner,
 | 
			
		||||
                                    city,
 | 
			
		||||
                                    self.server.user_agents_blocked,
 | 
			
		||||
                                    self.server.crawlers_allowed,
 | 
			
		||||
                                    access_keys,
 | 
			
		||||
                                    default_reply_interval_hrs,
 | 
			
		||||
                                    self.server.cw_lists,
 | 
			
		||||
| 
						 | 
				
			
			@ -14048,7 +13999,17 @@ class PubServer(BaseHTTPRequestHandler):
 | 
			
		|||
        ua_str = self._get_user_agent()
 | 
			
		||||
 | 
			
		||||
        if not self._permitted_crawler_path(self.path):
 | 
			
		||||
            if self._blocked_user_agent(calling_domain, ua_str):
 | 
			
		||||
            block, self.server.blocked_cache_last_updated = \
 | 
			
		||||
                blocked_user_agent(calling_domain, ua_str,
 | 
			
		||||
                                   self.server.news_instance,
 | 
			
		||||
                                   self.server.debug,
 | 
			
		||||
                                   self.server.user_agents_blocked,
 | 
			
		||||
                                   self.server.blocked_cache_last_updated,
 | 
			
		||||
                                   self.server.base_dir,
 | 
			
		||||
                                   self.server.blocked_cache,
 | 
			
		||||
                                   self.server.blocked_cache_update_secs,
 | 
			
		||||
                                   self.server.crawlers_allowed)
 | 
			
		||||
            if block:
 | 
			
		||||
                self._400()
 | 
			
		||||
                return
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -14430,7 +14391,8 @@ class PubServer(BaseHTTPRequestHandler):
 | 
			
		|||
                           self.server.custom_emoji,
 | 
			
		||||
                           self.server.show_node_info_accounts,
 | 
			
		||||
                           referer_domain,
 | 
			
		||||
                           self.server.debug):
 | 
			
		||||
                           self.server.debug,
 | 
			
		||||
                           self.server.known_crawlers):
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        fitness_performance(getreq_start_time, self.server.fitness,
 | 
			
		||||
| 
						 | 
				
			
			@ -18579,7 +18541,17 @@ class PubServer(BaseHTTPRequestHandler):
 | 
			
		|||
 | 
			
		||||
        ua_str = self._get_user_agent()
 | 
			
		||||
 | 
			
		||||
        if self._blocked_user_agent(calling_domain, ua_str):
 | 
			
		||||
        block, self.server.blocked_cache_last_updated = \
 | 
			
		||||
            blocked_user_agent(calling_domain, ua_str,
 | 
			
		||||
                               self.server.news_instance,
 | 
			
		||||
                               self.server.debug,
 | 
			
		||||
                               self.server.user_agents_blocked,
 | 
			
		||||
                               self.server.blocked_cache_last_updated,
 | 
			
		||||
                               self.server.base_dir,
 | 
			
		||||
                               self.server.blocked_cache,
 | 
			
		||||
                               self.server.blocked_cache_update_secs,
 | 
			
		||||
                               self.server.crawlers_allowed)
 | 
			
		||||
        if block:
 | 
			
		||||
            self._400()
 | 
			
		||||
            self.server.postreq_busy = False
 | 
			
		||||
            return
 | 
			
		||||
| 
						 | 
				
			
			@ -19511,7 +19483,8 @@ def load_tokens(base_dir: str, tokens_dict: {}, tokens_lookup: {}) -> None:
 | 
			
		|||
        break
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def run_daemon(dyslexic_font: bool,
 | 
			
		||||
def run_daemon(crawlers_allowed: [],
 | 
			
		||||
               dyslexic_font: bool,
 | 
			
		||||
               content_license_url: str,
 | 
			
		||||
               lists_enabled: str,
 | 
			
		||||
               default_reply_interval_hrs: int,
 | 
			
		||||
| 
						 | 
				
			
			@ -19690,6 +19663,9 @@ def run_daemon(dyslexic_font: bool,
 | 
			
		|||
    # list of blocked user agent types within the User-Agent header
 | 
			
		||||
    httpd.user_agents_blocked = user_agents_blocked
 | 
			
		||||
 | 
			
		||||
    # list of crawler bots permitted within the User-Agent header
 | 
			
		||||
    httpd.crawlers_allowed = crawlers_allowed
 | 
			
		||||
 | 
			
		||||
    httpd.unit_test = unit_test
 | 
			
		||||
    httpd.allow_local_network_access = allow_local_network_access
 | 
			
		||||
    if unit_test:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										23
									
								
								epicyon.py
								
								
								
								
							
							
						
						
									
										23
									
								
								epicyon.py
								
								
								
								
							| 
						 | 
				
			
			@ -141,6 +141,10 @@ parser.add_argument('--lists_enabled', type=str,
 | 
			
		|||
parser.add_argument('--userAgentBlocks', type=str,
 | 
			
		||||
                    default=None,
 | 
			
		||||
                    help='List of blocked user agents, separated by commas')
 | 
			
		||||
parser.add_argument('--crawlersAllowed', type=str,
 | 
			
		||||
                    default=None,
 | 
			
		||||
                    help='List of permitted web crawler user agents, ' +
 | 
			
		||||
                    'separated by commas')
 | 
			
		||||
parser.add_argument('--libretranslate', dest='libretranslateUrl', type=str,
 | 
			
		||||
                    default=None,
 | 
			
		||||
                    help='URL for LibreTranslate service')
 | 
			
		||||
| 
						 | 
				
			
			@ -3301,8 +3305,20 @@ else:
 | 
			
		|||
        get_config_param(base_dir, 'userAgentsBlocked')
 | 
			
		||||
if user_agents_blocked_str:
 | 
			
		||||
    agent_blocks_list = user_agents_blocked_str.split(',')
 | 
			
		||||
    for agentBlockStr in agent_blocks_list:
 | 
			
		||||
        user_agents_blocked.append(agentBlockStr.strip())
 | 
			
		||||
    for user_agents_blocked_str2 in agent_blocks_list:
 | 
			
		||||
        user_agents_blocked.append(user_agents_blocked_str2.strip())
 | 
			
		||||
 | 
			
		||||
crawlers_allowed = []
 | 
			
		||||
if args.crawlersAllowed:
 | 
			
		||||
    crawlers_allowed_str = args.crawlersAllowed
 | 
			
		||||
    set_config_param(base_dir, 'crawlersAllowed', crawlers_allowed_str)
 | 
			
		||||
else:
 | 
			
		||||
    crawlers_allowed_str = \
 | 
			
		||||
        get_config_param(base_dir, 'crawlersAllowed')
 | 
			
		||||
if crawlers_allowed_str:
 | 
			
		||||
    crawlers_allowed_list = crawlers_allowed_str.split(',')
 | 
			
		||||
    for crawlers_allowed_str2 in crawlers_allowed_list:
 | 
			
		||||
        crawlers_allowed.append(crawlers_allowed_str2.strip())
 | 
			
		||||
 | 
			
		||||
lists_enabled = ''
 | 
			
		||||
if args.lists_enabled:
 | 
			
		||||
| 
						 | 
				
			
			@ -3365,7 +3381,8 @@ if args.defaultCurrency:
 | 
			
		|||
        print('Default currency set to ' + args.defaultCurrency)
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
    run_daemon(args.dyslexic_font,
 | 
			
		||||
    run_daemon(crawlers_allowed,
 | 
			
		||||
               args.dyslexic_font,
 | 
			
		||||
               content_license_url,
 | 
			
		||||
               lists_enabled,
 | 
			
		||||
               args.default_reply_interval_hrs,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										16
									
								
								tests.py
								
								
								
								
							
							
						
						
									
										16
									
								
								tests.py
								
								
								
								
							| 
						 | 
				
			
			@ -822,8 +822,10 @@ def create_server_alice(path: str, domain: str, port: int,
 | 
			
		|||
    lists_enabled = ''
 | 
			
		||||
    content_license_url = 'https://creativecommons.org/licenses/by/4.0'
 | 
			
		||||
    dyslexic_font = False
 | 
			
		||||
    crawlers_allowed = []
 | 
			
		||||
    print('Server running: Alice')
 | 
			
		||||
    run_daemon(dyslexic_font,
 | 
			
		||||
    run_daemon(crawlers_allowed,
 | 
			
		||||
               dyslexic_font,
 | 
			
		||||
               content_license_url,
 | 
			
		||||
               lists_enabled, default_reply_interval_hrs,
 | 
			
		||||
               low_bandwidth, max_like_count,
 | 
			
		||||
| 
						 | 
				
			
			@ -975,8 +977,10 @@ def create_server_bob(path: str, domain: str, port: int,
 | 
			
		|||
    lists_enabled = ''
 | 
			
		||||
    content_license_url = 'https://creativecommons.org/licenses/by/4.0'
 | 
			
		||||
    dyslexic_font = False
 | 
			
		||||
    crawlers_allowed = []
 | 
			
		||||
    print('Server running: Bob')
 | 
			
		||||
    run_daemon(dyslexic_font,
 | 
			
		||||
    run_daemon(crawlers_allowed,
 | 
			
		||||
               dyslexic_font,
 | 
			
		||||
               content_license_url,
 | 
			
		||||
               lists_enabled, default_reply_interval_hrs,
 | 
			
		||||
               low_bandwidth, max_like_count,
 | 
			
		||||
| 
						 | 
				
			
			@ -1051,8 +1055,10 @@ def create_server_eve(path: str, domain: str, port: int, federation_list: [],
 | 
			
		|||
    lists_enabled = ''
 | 
			
		||||
    content_license_url = 'https://creativecommons.org/licenses/by/4.0'
 | 
			
		||||
    dyslexic_font = False
 | 
			
		||||
    crawlers_allowed = []
 | 
			
		||||
    print('Server running: Eve')
 | 
			
		||||
    run_daemon(dyslexic_font,
 | 
			
		||||
    run_daemon(crawlers_allowed,
 | 
			
		||||
               dyslexic_font,
 | 
			
		||||
               content_license_url,
 | 
			
		||||
               lists_enabled, default_reply_interval_hrs,
 | 
			
		||||
               low_bandwidth, max_like_count,
 | 
			
		||||
| 
						 | 
				
			
			@ -1129,8 +1135,10 @@ def create_server_group(path: str, domain: str, port: int,
 | 
			
		|||
    lists_enabled = ''
 | 
			
		||||
    content_license_url = 'https://creativecommons.org/licenses/by/4.0'
 | 
			
		||||
    dyslexic_font = False
 | 
			
		||||
    crawlers_allowed = []
 | 
			
		||||
    print('Server running: Group')
 | 
			
		||||
    run_daemon(dyslexic_font,
 | 
			
		||||
    run_daemon(crawlers_allowed,
 | 
			
		||||
               dyslexic_font,
 | 
			
		||||
               content_license_url,
 | 
			
		||||
               lists_enabled, default_reply_interval_hrs,
 | 
			
		||||
               low_bandwidth, max_like_count,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -515,5 +515,6 @@
 | 
			
		|||
    "Show who liked this post": "أظهر من أحب هذا المنشور",
 | 
			
		||||
    "Show who repeated this post": "أظهر من كرر هذا المنصب",
 | 
			
		||||
    "Repeated by": "يتكرر بواسطة",
 | 
			
		||||
    "Register": "يسجل"
 | 
			
		||||
    "Register": "يسجل",
 | 
			
		||||
    "Web Crawlers Allowed": "برامج زحف الويب المسموح بها"
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -515,5 +515,6 @@
 | 
			
		|||
    "Show who liked this post": "Mostra a qui li agrada aquesta publicació",
 | 
			
		||||
    "Show who repeated this post": "Mostra qui ha repetit aquesta publicació",
 | 
			
		||||
    "Repeated by": "Repetit per",
 | 
			
		||||
    "Register": "Registra't"
 | 
			
		||||
    "Register": "Registra't",
 | 
			
		||||
    "Web Crawlers Allowed": "Es permeten rastrejadors web"
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -515,5 +515,6 @@
 | 
			
		|||
    "Show who liked this post": "Dangoswch pwy oedd yn hoffi'r post hwn",
 | 
			
		||||
    "Show who repeated this post": "Dangoswch pwy ailadroddodd y post hwn",
 | 
			
		||||
    "Repeated by": "Ailadrodd gan",
 | 
			
		||||
    "Register": "Cofrestrwch"
 | 
			
		||||
    "Register": "Cofrestrwch",
 | 
			
		||||
    "Web Crawlers Allowed": "Caniatáu Ymlusgwyr Gwe"
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -515,5 +515,6 @@
 | 
			
		|||
    "Show who liked this post": "Zeigen, wem dieser Beitrag gefallen hat",
 | 
			
		||||
    "Show who repeated this post": "Zeigen Sie, wer diesen Beitrag wiederholt hat",
 | 
			
		||||
    "Repeated by": "Wiederholt von",
 | 
			
		||||
    "Register": "Registrieren"
 | 
			
		||||
    "Register": "Registrieren",
 | 
			
		||||
    "Web Crawlers Allowed": "Webcrawler erlaubt"
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -515,5 +515,6 @@
 | 
			
		|||
    "Show who liked this post": "Show who liked this post",
 | 
			
		||||
    "Show who repeated this post": "Show who repeated this post",
 | 
			
		||||
    "Repeated by": "Repeated by",
 | 
			
		||||
    "Register": "Register"
 | 
			
		||||
    "Register": "Register",
 | 
			
		||||
    "Web Crawlers Allowed": "Web Crawlers Allowed"
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -515,5 +515,6 @@
 | 
			
		|||
    "Show who liked this post": "Mostrar a quién le gustó esta publicación",
 | 
			
		||||
    "Show who repeated this post": "Mostrar quién repitió esta publicación",
 | 
			
		||||
    "Repeated by": "Repetido por",
 | 
			
		||||
    "Register": "Registrarse"
 | 
			
		||||
    "Register": "Registrarse",
 | 
			
		||||
    "Web Crawlers Allowed": "Rastreadores web permitidos"
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -515,5 +515,6 @@
 | 
			
		|||
    "Show who liked this post": "Montrer qui a aimé ce post",
 | 
			
		||||
    "Show who repeated this post": "Montrer qui a répété ce post",
 | 
			
		||||
    "Repeated by": "Répété par",
 | 
			
		||||
    "Register": "S'inscrire"
 | 
			
		||||
    "Register": "S'inscrire",
 | 
			
		||||
    "Web Crawlers Allowed": "Robots d'exploration Web autorisés"
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -515,5 +515,6 @@
 | 
			
		|||
    "Show who liked this post": "Taispeáin cé a thaitin an postáil seo",
 | 
			
		||||
    "Show who repeated this post": "Taispeáin cé a rinne an postáil seo arís",
 | 
			
		||||
    "Repeated by": "Arís agus arís eile ag",
 | 
			
		||||
    "Register": "Clár"
 | 
			
		||||
    "Register": "Clár",
 | 
			
		||||
    "Web Crawlers Allowed": "Crawlers Gréasáin Ceadaithe"
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -515,5 +515,6 @@
 | 
			
		|||
    "Show who liked this post": "दिखाएँ कि इस पोस्ट को किसने पसंद किया",
 | 
			
		||||
    "Show who repeated this post": "दिखाएं कि इस पोस्ट को किसने दोहराया",
 | 
			
		||||
    "Repeated by": "द्वारा दोहराया गया",
 | 
			
		||||
    "Register": "रजिस्टर करें"
 | 
			
		||||
    "Register": "रजिस्टर करें",
 | 
			
		||||
    "Web Crawlers Allowed": "वेब क्रॉलर की अनुमति है"
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -515,5 +515,6 @@
 | 
			
		|||
    "Show who liked this post": "Mostra a chi è piaciuto questo post",
 | 
			
		||||
    "Show who repeated this post": "Mostra chi ha ripetuto questo post",
 | 
			
		||||
    "Repeated by": "Ripetuto da",
 | 
			
		||||
    "Register": "Registrati"
 | 
			
		||||
    "Register": "Registrati",
 | 
			
		||||
    "Web Crawlers Allowed": "Web crawler consentiti"
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -515,5 +515,6 @@
 | 
			
		|||
    "Show who liked this post": "この投稿を高く評価した人を表示する",
 | 
			
		||||
    "Show who repeated this post": "この投稿を繰り返した人を表示する",
 | 
			
		||||
    "Repeated by": "によって繰り返される",
 | 
			
		||||
    "Register": "登録"
 | 
			
		||||
    "Register": "登録",
 | 
			
		||||
    "Web Crawlers Allowed": "許可されるWebクローラー"
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -515,5 +515,6 @@
 | 
			
		|||
    "Show who liked this post": "이 포스트를 좋아한 사람 표시",
 | 
			
		||||
    "Show who repeated this post": "이 포스트를 반복한 사람 표시",
 | 
			
		||||
    "Repeated by": "반복한 사람",
 | 
			
		||||
    "Register": "등록"
 | 
			
		||||
    "Register": "등록",
 | 
			
		||||
    "Web Crawlers Allowed": "웹 크롤러 허용"
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -515,5 +515,6 @@
 | 
			
		|||
    "Show who liked this post": "Nîşan bide kê ev post eciband",
 | 
			
		||||
    "Show who repeated this post": "Nîşan bide kê ev post dubare kiriye",
 | 
			
		||||
    "Repeated by": "Ji hêla dubare kirin",
 | 
			
		||||
    "Register": "Fêhrist"
 | 
			
		||||
    "Register": "Fêhrist",
 | 
			
		||||
    "Web Crawlers Allowed": "Crawlers Web Destûrdar in"
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -511,5 +511,6 @@
 | 
			
		|||
    "Show who liked this post": "Show who liked this post",
 | 
			
		||||
    "Show who repeated this post": "Show who repeated this post",
 | 
			
		||||
    "Repeated by": "Repeated by",
 | 
			
		||||
    "Register": "Register"
 | 
			
		||||
    "Register": "Register",
 | 
			
		||||
    "Web Crawlers Allowed": "Web Crawlers Allowed"
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -515,5 +515,6 @@
 | 
			
		|||
    "Show who liked this post": "Pokaż, kto polubił ten post",
 | 
			
		||||
    "Show who repeated this post": "Pokaż, kto powtórzył ten post",
 | 
			
		||||
    "Repeated by": "Powtórzone przez",
 | 
			
		||||
    "Register": "Zarejestrować"
 | 
			
		||||
    "Register": "Zarejestrować",
 | 
			
		||||
    "Web Crawlers Allowed": "Dozwolone roboty sieciowe"
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -515,5 +515,6 @@
 | 
			
		|||
    "Show who liked this post": "Mostrar quem gostou deste post",
 | 
			
		||||
    "Show who repeated this post": "Mostrar quem repetiu esta postagem",
 | 
			
		||||
    "Repeated by": "Repetido por",
 | 
			
		||||
    "Register": "Registro"
 | 
			
		||||
    "Register": "Registro",
 | 
			
		||||
    "Web Crawlers Allowed": "Rastreadores da Web permitidos"
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -515,5 +515,6 @@
 | 
			
		|||
    "Show who liked this post": "Показать, кому понравился этот пост",
 | 
			
		||||
    "Show who repeated this post": "Показать, кто повторил этот пост",
 | 
			
		||||
    "Repeated by": "Повторено",
 | 
			
		||||
    "Register": "регистр"
 | 
			
		||||
    "Register": "регистр",
 | 
			
		||||
    "Web Crawlers Allowed": "Веб-сканеры разрешены"
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -515,5 +515,6 @@
 | 
			
		|||
    "Show who liked this post": "Onyesha ni nani aliyependa chapisho hili",
 | 
			
		||||
    "Show who repeated this post": "Onyesha ni nani aliyerudia chapisho hili",
 | 
			
		||||
    "Repeated by": "Imerudiwa na",
 | 
			
		||||
    "Register": "Sajili"
 | 
			
		||||
    "Register": "Sajili",
 | 
			
		||||
    "Web Crawlers Allowed": "Watambazaji Wavuti Zinaruhusiwa"
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -515,5 +515,6 @@
 | 
			
		|||
    "Show who liked this post": "Покажіть, кому сподобався цей пост",
 | 
			
		||||
    "Show who repeated this post": "Покажіть, хто повторив цей пост",
 | 
			
		||||
    "Repeated by": "Повторюється за",
 | 
			
		||||
    "Register": "Реєстрація"
 | 
			
		||||
    "Register": "Реєстрація",
 | 
			
		||||
    "Web Crawlers Allowed": "Веб-сканери дозволені"
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -515,5 +515,6 @@
 | 
			
		|||
    "Show who liked this post": "显示谁喜欢这篇文章",
 | 
			
		||||
    "Show who repeated this post": "显示谁重复了这篇文章",
 | 
			
		||||
    "Repeated by": "重复",
 | 
			
		||||
    "Register": "登记"
 | 
			
		||||
    "Register": "登记",
 | 
			
		||||
    "Web Crawlers Allowed": "允许网络爬虫"
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1631,6 +1631,7 @@ def _html_edit_profile_shared_items(base_dir: str, nickname: str, domain: str,
 | 
			
		|||
 | 
			
		||||
def _html_edit_profile_filtering(base_dir: str, nickname: str, domain: str,
 | 
			
		||||
                                 user_agents_blocked: str,
 | 
			
		||||
                                 crawlers_allowed: str,
 | 
			
		||||
                                 translate: {}, reply_interval_hours: int,
 | 
			
		||||
                                 cw_lists: {}, lists_enabled: str) -> str:
 | 
			
		||||
    """Filtering and blocking section of edit profile screen
 | 
			
		||||
| 
						 | 
				
			
			@ -1807,6 +1808,16 @@ def _html_edit_profile_filtering(base_dir: str, nickname: str, domain: str,
 | 
			
		|||
                           'userAgentsBlockedStr', user_agents_blocked_str,
 | 
			
		||||
                           200, '', False)
 | 
			
		||||
 | 
			
		||||
        crawlers_allowed_str = ''
 | 
			
		||||
        for uagent in crawlers_allowed:
 | 
			
		||||
            if crawlers_allowed_str:
 | 
			
		||||
                crawlers_allowed_str += '\n'
 | 
			
		||||
            crawlers_allowed_str += uagent
 | 
			
		||||
        edit_profile_form += \
 | 
			
		||||
            edit_text_area(translate['Web Crawlers Allowed'],
 | 
			
		||||
                           'crawlersAllowedStr', crawlers_allowed_str,
 | 
			
		||||
                           200, '', False)
 | 
			
		||||
 | 
			
		||||
        cw_lists_str = ''
 | 
			
		||||
        for name, _ in cw_lists.items():
 | 
			
		||||
            variablename = get_cw_list_variable(name)
 | 
			
		||||
| 
						 | 
				
			
			@ -2137,7 +2148,8 @@ def html_edit_profile(css_cache: {}, translate: {}, base_dir: str, path: str,
 | 
			
		|||
                      default_timeline: str, theme: str,
 | 
			
		||||
                      peertube_instances: [],
 | 
			
		||||
                      text_mode_banner: str, city: str,
 | 
			
		||||
                      user_agents_blocked: str,
 | 
			
		||||
                      user_agents_blocked: [],
 | 
			
		||||
                      crawlers_allowed: [],
 | 
			
		||||
                      access_keys: {},
 | 
			
		||||
                      default_reply_interval_hrs: int,
 | 
			
		||||
                      cw_lists: {}, lists_enabled: str) -> str:
 | 
			
		||||
| 
						 | 
				
			
			@ -2354,8 +2366,8 @@ def html_edit_profile(css_cache: {}, translate: {}, base_dir: str, path: str,
 | 
			
		|||
                                 default_reply_interval_hrs)
 | 
			
		||||
    edit_profile_form += \
 | 
			
		||||
        _html_edit_profile_filtering(base_dir, nickname, domain,
 | 
			
		||||
                                     user_agents_blocked, translate,
 | 
			
		||||
                                     reply_interval_hours,
 | 
			
		||||
                                     user_agents_blocked, crawlers_allowed,
 | 
			
		||||
                                     translate, reply_interval_hours,
 | 
			
		||||
                                     cw_lists, lists_enabled)
 | 
			
		||||
 | 
			
		||||
    # git projects section
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue