mirror of https://gitlab.com/bashrc2/epicyon
				
				
				
			Improve checking of site active status
This uses a defluffed version of webchkmerge-requests/30/head
							parent
							
								
									0826326653
								
							
						
					
					
						commit
						b16fb0d24c
					
				
							
								
								
									
										2
									
								
								posts.py
								
								
								
								
							
							
						
						
									
										2
									
								
								posts.py
								
								
								
								
							|  | @ -30,6 +30,7 @@ from session import postJsonString | ||||||
| from session import postImage | from session import postImage | ||||||
| from webfinger import webfingerHandle | from webfinger import webfingerHandle | ||||||
| from httpsig import createSignedHeader | from httpsig import createSignedHeader | ||||||
|  | from siteactive import siteIsActive | ||||||
| from utils import fileLastModified | from utils import fileLastModified | ||||||
| from utils import isPublicPost | from utils import isPublicPost | ||||||
| from utils import hasUsersPath | from utils import hasUsersPath | ||||||
|  | @ -38,7 +39,6 @@ from utils import getFullDomain | ||||||
| from utils import getFollowersList | from utils import getFollowersList | ||||||
| from utils import isEvil | from utils import isEvil | ||||||
| from utils import removeIdEnding | from utils import removeIdEnding | ||||||
| from utils import siteIsActive |  | ||||||
| from utils import getCachedPostFilename | from utils import getCachedPostFilename | ||||||
| from utils import getStatusNumber | from utils import getStatusNumber | ||||||
| from utils import createPersonDir | from utils import createPersonDir | ||||||
|  |  | ||||||
|  | @ -0,0 +1,121 @@ | ||||||
|  | __filename__ = "siteactive.py" | ||||||
|  | __author__ = "Bob Mottram" | ||||||
|  | __credits__ = ["webchk"] | ||||||
|  | __license__ = "AGPL3+" | ||||||
|  | __version__ = "1.2.0" | ||||||
|  | __maintainer__ = "Bob Mottram" | ||||||
|  | __email__ = "bob@freedombone.net" | ||||||
|  | __status__ = "Production" | ||||||
|  | 
 | ||||||
|  | import http.client | ||||||
|  | from urllib.parse import urlparse | ||||||
|  | import ssl | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class Result: | ||||||
|  |     """Holds result of an URL check. | ||||||
|  | 
 | ||||||
|  |     The redirect attribute is a Result object that the URL was redirected to. | ||||||
|  | 
 | ||||||
|  |     The sitemap_urls attribute will contain a list of Result object if url | ||||||
|  |     is a sitemap file and http_response() was run with parse set to True. | ||||||
|  |     """ | ||||||
|  |     def __init__(self, url): | ||||||
|  |         self.url = url | ||||||
|  |         self.status = 0 | ||||||
|  |         self.desc = '' | ||||||
|  |         self.headers = None | ||||||
|  |         self.latency = 0 | ||||||
|  |         self.content = '' | ||||||
|  |         self.redirect = None | ||||||
|  |         self.sitemap_urls = None | ||||||
|  | 
 | ||||||
|  |     def __repr__(self): | ||||||
|  |         if self.status == 0: | ||||||
|  |             return '{} ... {}'.format(self.url, self.desc) | ||||||
|  |         return '{} ... {} {} ({})'.format( | ||||||
|  |             self.url, self.status, self.desc, self.latency | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |     def fill_headers(self, headers): | ||||||
|  |         """Takes a list of tuples and converts it a dictionary.""" | ||||||
|  |         self.headers = {h[0]: h[1] for h in headers} | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def _siteActiveParseUrl(url): | ||||||
|  |     """Returns an object with properties representing | ||||||
|  | 
 | ||||||
|  |     scheme:   URL scheme specifier | ||||||
|  |     netloc:   Network location part | ||||||
|  |     path:     Hierarchical path | ||||||
|  |     params:   Parameters for last path element | ||||||
|  |     query:    Query component | ||||||
|  |     fragment: Fragment identifier | ||||||
|  |     username: User name | ||||||
|  |     password: Password | ||||||
|  |     hostname: Host name (lower case) | ||||||
|  |     port:     Port number as integer, if present | ||||||
|  |     """ | ||||||
|  |     loc = urlparse(url) | ||||||
|  | 
 | ||||||
|  |     # if the scheme (http, https ...) is not available urlparse wont work | ||||||
|  |     if loc.scheme == "": | ||||||
|  |         url = "http://" + url | ||||||
|  |         loc = urlparse(url) | ||||||
|  |     return loc | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def _siteACtiveHttpConnect(loc, timeout: int): | ||||||
|  |     """Connects to the host and returns an HTTP or HTTPS connections.""" | ||||||
|  |     if loc.scheme == "https": | ||||||
|  |         ssl_context = ssl.SSLContext() | ||||||
|  |         return http.client.HTTPSConnection( | ||||||
|  |             loc.netloc, context=ssl_context, timeout=timeout) | ||||||
|  |     return http.client.HTTPConnection(loc.netloc, timeout=timeout) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def _siteActiveHttpRequest(loc, timeout: int): | ||||||
|  |     """Performs a HTTP request and return response in a Result object. | ||||||
|  |     """ | ||||||
|  |     conn = _siteACtiveHttpConnect(loc, timeout) | ||||||
|  |     method = 'HEAD' | ||||||
|  | 
 | ||||||
|  |     conn.request(method, loc.path) | ||||||
|  |     resp = conn.getresponse() | ||||||
|  | 
 | ||||||
|  |     result = Result(loc.geturl()) | ||||||
|  |     result.status = resp.status | ||||||
|  |     result.desc = resp.reason | ||||||
|  |     result.fill_headers(resp.getheaders()) | ||||||
|  | 
 | ||||||
|  |     conn.close() | ||||||
|  |     return result | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def siteIsActive(url: str, timeout=10) -> bool: | ||||||
|  |     """Returns true if the current url is resolvable. | ||||||
|  |     This can be used to check that an instance is online before | ||||||
|  |     trying to send posts to it. | ||||||
|  |     """ | ||||||
|  |     if not url.startswith('http'): | ||||||
|  |         return False | ||||||
|  |     if '.onion/' in url or '.i2p/' in url or \ | ||||||
|  |        url.endswith('.onion') or \ | ||||||
|  |        url.endswith('.i2p'): | ||||||
|  |         # skip this check for onion and i2p | ||||||
|  |         return True | ||||||
|  | 
 | ||||||
|  |     loc = _siteActiveParseUrl(url) | ||||||
|  |     result = Result(url=url) | ||||||
|  | 
 | ||||||
|  |     try: | ||||||
|  |         result = _siteActiveHttpRequest(loc, timeout) | ||||||
|  | 
 | ||||||
|  |         if 400 <= result.status < 500: | ||||||
|  |             return result | ||||||
|  | 
 | ||||||
|  |         return True | ||||||
|  | 
 | ||||||
|  |     except BaseException: | ||||||
|  |         pass | ||||||
|  |     return False | ||||||
							
								
								
									
										6
									
								
								tests.py
								
								
								
								
							
							
						
						
									
										6
									
								
								tests.py
								
								
								
								
							|  | @ -38,7 +38,7 @@ from utils import getFullDomain | ||||||
| from utils import validNickname | from utils import validNickname | ||||||
| from utils import firstParagraphFromString | from utils import firstParagraphFromString | ||||||
| from utils import removeIdEnding | from utils import removeIdEnding | ||||||
| from utils import siteIsActive | from siteactive import siteIsActive | ||||||
| from utils import updateRecentPostsCache | from utils import updateRecentPostsCache | ||||||
| from utils import followPerson | from utils import followPerson | ||||||
| from utils import getNicknameFromActor | from utils import getNicknameFromActor | ||||||
|  | @ -2067,6 +2067,7 @@ def testJsonld(): | ||||||
| 
 | 
 | ||||||
| def testSiteIsActive(): | def testSiteIsActive(): | ||||||
|     print('testSiteIsActive') |     print('testSiteIsActive') | ||||||
|  |     assert(siteIsActive('https://archive.org')) | ||||||
|     assert(siteIsActive('https://mastodon.social')) |     assert(siteIsActive('https://mastodon.social')) | ||||||
|     assert(not siteIsActive('https://notarealwebsite.a.b.c')) |     assert(not siteIsActive('https://notarealwebsite.a.b.c')) | ||||||
| 
 | 
 | ||||||
|  | @ -2818,7 +2819,8 @@ def testFunctions(): | ||||||
|         'createServerBob', |         'createServerBob', | ||||||
|         'createServerEve', |         'createServerEve', | ||||||
|         'E2EEremoveDevice', |         'E2EEremoveDevice', | ||||||
|         'setOrganizationScheme' |         'setOrganizationScheme', | ||||||
|  |         'fill_headers' | ||||||
|     ] |     ] | ||||||
|     excludeImports = [ |     excludeImports = [ | ||||||
|         'link', |         'link', | ||||||
|  |  | ||||||
							
								
								
									
										25
									
								
								utils.py
								
								
								
								
							
							
						
						
									
										25
									
								
								utils.py
								
								
								
								
							|  | @ -11,9 +11,6 @@ import time | ||||||
| import shutil | import shutil | ||||||
| import datetime | import datetime | ||||||
| import json | import json | ||||||
| from socket import error as SocketError |  | ||||||
| import errno |  | ||||||
| import urllib.request |  | ||||||
| import idna | import idna | ||||||
| from pprint import pprint | from pprint import pprint | ||||||
| from calendar import monthrange | from calendar import monthrange | ||||||
|  | @ -1841,28 +1838,6 @@ def updateAnnounceCollection(recentPostsCache: {}, | ||||||
|         saveJson(postJsonObject, postFilename) |         saveJson(postJsonObject, postFilename) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def siteIsActive(url: str) -> bool: |  | ||||||
|     """Returns true if the current url is resolvable. |  | ||||||
|     This can be used to check that an instance is online before |  | ||||||
|     trying to send posts to it. |  | ||||||
|     """ |  | ||||||
|     if not url.startswith('http'): |  | ||||||
|         return False |  | ||||||
|     if '.onion/' in url or '.i2p/' in url or \ |  | ||||||
|        url.endswith('.onion') or \ |  | ||||||
|        url.endswith('.i2p'): |  | ||||||
|         # skip this check for onion and i2p |  | ||||||
|         return True |  | ||||||
|     try: |  | ||||||
|         req = urllib.request.Request(url) |  | ||||||
|         urllib.request.urlopen(req, timeout=10)  # nosec |  | ||||||
|         return True |  | ||||||
|     except SocketError as e: |  | ||||||
|         if e.errno == errno.ECONNRESET: |  | ||||||
|             print('WARN: connection was reset during siteIsActive') |  | ||||||
|     return False |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| def weekDayOfMonthStart(monthNumber: int, year: int) -> int: | def weekDayOfMonthStart(monthNumber: int, year: int) -> int: | ||||||
|     """Gets the day number of the first day of the month |     """Gets the day number of the first day of the month | ||||||
|     1=sun, 7=sat |     1=sun, 7=sat | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue