mirror of https://gitlab.com/bashrc2/epicyon
Improve checking of site active status
This uses a defluffed version of webchkmerge-requests/17/head
parent
0826326653
commit
b16fb0d24c
2
posts.py
2
posts.py
|
@ -30,6 +30,7 @@ from session import postJsonString
|
|||
from session import postImage
|
||||
from webfinger import webfingerHandle
|
||||
from httpsig import createSignedHeader
|
||||
from siteactive import siteIsActive
|
||||
from utils import fileLastModified
|
||||
from utils import isPublicPost
|
||||
from utils import hasUsersPath
|
||||
|
@ -38,7 +39,6 @@ from utils import getFullDomain
|
|||
from utils import getFollowersList
|
||||
from utils import isEvil
|
||||
from utils import removeIdEnding
|
||||
from utils import siteIsActive
|
||||
from utils import getCachedPostFilename
|
||||
from utils import getStatusNumber
|
||||
from utils import createPersonDir
|
||||
|
|
|
@ -0,0 +1,121 @@
|
|||
__filename__ = "siteactive.py"
|
||||
__author__ = "Bob Mottram"
|
||||
__credits__ = ["webchk"]
|
||||
__license__ = "AGPL3+"
|
||||
__version__ = "1.2.0"
|
||||
__maintainer__ = "Bob Mottram"
|
||||
__email__ = "bob@freedombone.net"
|
||||
__status__ = "Production"
|
||||
|
||||
import http.client
|
||||
from urllib.parse import urlparse
|
||||
import ssl
|
||||
|
||||
|
||||
class Result:
|
||||
"""Holds result of an URL check.
|
||||
|
||||
The redirect attribute is a Result object that the URL was redirected to.
|
||||
|
||||
The sitemap_urls attribute will contain a list of Result object if url
|
||||
is a sitemap file and http_response() was run with parse set to True.
|
||||
"""
|
||||
def __init__(self, url):
|
||||
self.url = url
|
||||
self.status = 0
|
||||
self.desc = ''
|
||||
self.headers = None
|
||||
self.latency = 0
|
||||
self.content = ''
|
||||
self.redirect = None
|
||||
self.sitemap_urls = None
|
||||
|
||||
def __repr__(self):
|
||||
if self.status == 0:
|
||||
return '{} ... {}'.format(self.url, self.desc)
|
||||
return '{} ... {} {} ({})'.format(
|
||||
self.url, self.status, self.desc, self.latency
|
||||
)
|
||||
|
||||
def fill_headers(self, headers):
|
||||
"""Takes a list of tuples and converts it a dictionary."""
|
||||
self.headers = {h[0]: h[1] for h in headers}
|
||||
|
||||
|
||||
def _siteActiveParseUrl(url):
|
||||
"""Returns an object with properties representing
|
||||
|
||||
scheme: URL scheme specifier
|
||||
netloc: Network location part
|
||||
path: Hierarchical path
|
||||
params: Parameters for last path element
|
||||
query: Query component
|
||||
fragment: Fragment identifier
|
||||
username: User name
|
||||
password: Password
|
||||
hostname: Host name (lower case)
|
||||
port: Port number as integer, if present
|
||||
"""
|
||||
loc = urlparse(url)
|
||||
|
||||
# if the scheme (http, https ...) is not available urlparse wont work
|
||||
if loc.scheme == "":
|
||||
url = "http://" + url
|
||||
loc = urlparse(url)
|
||||
return loc
|
||||
|
||||
|
||||
def _siteACtiveHttpConnect(loc, timeout: int):
|
||||
"""Connects to the host and returns an HTTP or HTTPS connections."""
|
||||
if loc.scheme == "https":
|
||||
ssl_context = ssl.SSLContext()
|
||||
return http.client.HTTPSConnection(
|
||||
loc.netloc, context=ssl_context, timeout=timeout)
|
||||
return http.client.HTTPConnection(loc.netloc, timeout=timeout)
|
||||
|
||||
|
||||
def _siteActiveHttpRequest(loc, timeout: int):
|
||||
"""Performs a HTTP request and return response in a Result object.
|
||||
"""
|
||||
conn = _siteACtiveHttpConnect(loc, timeout)
|
||||
method = 'HEAD'
|
||||
|
||||
conn.request(method, loc.path)
|
||||
resp = conn.getresponse()
|
||||
|
||||
result = Result(loc.geturl())
|
||||
result.status = resp.status
|
||||
result.desc = resp.reason
|
||||
result.fill_headers(resp.getheaders())
|
||||
|
||||
conn.close()
|
||||
return result
|
||||
|
||||
|
||||
def siteIsActive(url: str, timeout=10) -> bool:
|
||||
"""Returns true if the current url is resolvable.
|
||||
This can be used to check that an instance is online before
|
||||
trying to send posts to it.
|
||||
"""
|
||||
if not url.startswith('http'):
|
||||
return False
|
||||
if '.onion/' in url or '.i2p/' in url or \
|
||||
url.endswith('.onion') or \
|
||||
url.endswith('.i2p'):
|
||||
# skip this check for onion and i2p
|
||||
return True
|
||||
|
||||
loc = _siteActiveParseUrl(url)
|
||||
result = Result(url=url)
|
||||
|
||||
try:
|
||||
result = _siteActiveHttpRequest(loc, timeout)
|
||||
|
||||
if 400 <= result.status < 500:
|
||||
return result
|
||||
|
||||
return True
|
||||
|
||||
except BaseException:
|
||||
pass
|
||||
return False
|
6
tests.py
6
tests.py
|
@ -38,7 +38,7 @@ from utils import getFullDomain
|
|||
from utils import validNickname
|
||||
from utils import firstParagraphFromString
|
||||
from utils import removeIdEnding
|
||||
from utils import siteIsActive
|
||||
from siteactive import siteIsActive
|
||||
from utils import updateRecentPostsCache
|
||||
from utils import followPerson
|
||||
from utils import getNicknameFromActor
|
||||
|
@ -2067,6 +2067,7 @@ def testJsonld():
|
|||
|
||||
def testSiteIsActive():
|
||||
print('testSiteIsActive')
|
||||
assert(siteIsActive('https://archive.org'))
|
||||
assert(siteIsActive('https://mastodon.social'))
|
||||
assert(not siteIsActive('https://notarealwebsite.a.b.c'))
|
||||
|
||||
|
@ -2818,7 +2819,8 @@ def testFunctions():
|
|||
'createServerBob',
|
||||
'createServerEve',
|
||||
'E2EEremoveDevice',
|
||||
'setOrganizationScheme'
|
||||
'setOrganizationScheme',
|
||||
'fill_headers'
|
||||
]
|
||||
excludeImports = [
|
||||
'link',
|
||||
|
|
25
utils.py
25
utils.py
|
@ -11,9 +11,6 @@ import time
|
|||
import shutil
|
||||
import datetime
|
||||
import json
|
||||
from socket import error as SocketError
|
||||
import errno
|
||||
import urllib.request
|
||||
import idna
|
||||
from pprint import pprint
|
||||
from calendar import monthrange
|
||||
|
@ -1841,28 +1838,6 @@ def updateAnnounceCollection(recentPostsCache: {},
|
|||
saveJson(postJsonObject, postFilename)
|
||||
|
||||
|
||||
def siteIsActive(url: str) -> bool:
|
||||
"""Returns true if the current url is resolvable.
|
||||
This can be used to check that an instance is online before
|
||||
trying to send posts to it.
|
||||
"""
|
||||
if not url.startswith('http'):
|
||||
return False
|
||||
if '.onion/' in url or '.i2p/' in url or \
|
||||
url.endswith('.onion') or \
|
||||
url.endswith('.i2p'):
|
||||
# skip this check for onion and i2p
|
||||
return True
|
||||
try:
|
||||
req = urllib.request.Request(url)
|
||||
urllib.request.urlopen(req, timeout=10) # nosec
|
||||
return True
|
||||
except SocketError as e:
|
||||
if e.errno == errno.ECONNRESET:
|
||||
print('WARN: connection was reset during siteIsActive')
|
||||
return False
|
||||
|
||||
|
||||
def weekDayOfMonthStart(monthNumber: int, year: int) -> int:
|
||||
"""Gets the day number of the first day of the month
|
||||
1=sun, 7=sat
|
||||
|
|
Loading…
Reference in New Issue