diff --git a/posts.py b/posts.py index d44257bd2..9e94ba54e 100644 --- a/posts.py +++ b/posts.py @@ -30,6 +30,7 @@ from session import postJsonString from session import postImage from webfinger import webfingerHandle from httpsig import createSignedHeader +from siteactive import siteIsActive from utils import fileLastModified from utils import isPublicPost from utils import hasUsersPath @@ -38,7 +39,6 @@ from utils import getFullDomain from utils import getFollowersList from utils import isEvil from utils import removeIdEnding -from utils import siteIsActive from utils import getCachedPostFilename from utils import getStatusNumber from utils import createPersonDir diff --git a/siteactive.py b/siteactive.py new file mode 100644 index 000000000..ca530bf49 --- /dev/null +++ b/siteactive.py @@ -0,0 +1,121 @@ +__filename__ = "siteactive.py" +__author__ = "Bob Mottram" +__credits__ = ["webchk"] +__license__ = "AGPL3+" +__version__ = "1.2.0" +__maintainer__ = "Bob Mottram" +__email__ = "bob@freedombone.net" +__status__ = "Production" + +import http.client +from urllib.parse import urlparse +import ssl + + +class Result: + """Holds result of an URL check. + + The redirect attribute is a Result object that the URL was redirected to. + + The sitemap_urls attribute will contain a list of Result object if url + is a sitemap file and http_response() was run with parse set to True. + """ + def __init__(self, url): + self.url = url + self.status = 0 + self.desc = '' + self.headers = None + self.latency = 0 + self.content = '' + self.redirect = None + self.sitemap_urls = None + + def __repr__(self): + if self.status == 0: + return '{} ... {}'.format(self.url, self.desc) + return '{} ... {} {} ({})'.format( + self.url, self.status, self.desc, self.latency + ) + + def fill_headers(self, headers): + """Takes a list of tuples and converts it a dictionary.""" + self.headers = {h[0]: h[1] for h in headers} + + +def _siteActiveParseUrl(url): + """Returns an object with properties representing + + scheme: URL scheme specifier + netloc: Network location part + path: Hierarchical path + params: Parameters for last path element + query: Query component + fragment: Fragment identifier + username: User name + password: Password + hostname: Host name (lower case) + port: Port number as integer, if present + """ + loc = urlparse(url) + + # if the scheme (http, https ...) is not available urlparse wont work + if loc.scheme == "": + url = "http://" + url + loc = urlparse(url) + return loc + + +def _siteACtiveHttpConnect(loc, timeout: int): + """Connects to the host and returns an HTTP or HTTPS connections.""" + if loc.scheme == "https": + ssl_context = ssl.SSLContext() + return http.client.HTTPSConnection( + loc.netloc, context=ssl_context, timeout=timeout) + return http.client.HTTPConnection(loc.netloc, timeout=timeout) + + +def _siteActiveHttpRequest(loc, timeout: int): + """Performs a HTTP request and return response in a Result object. + """ + conn = _siteACtiveHttpConnect(loc, timeout) + method = 'HEAD' + + conn.request(method, loc.path) + resp = conn.getresponse() + + result = Result(loc.geturl()) + result.status = resp.status + result.desc = resp.reason + result.fill_headers(resp.getheaders()) + + conn.close() + return result + + +def siteIsActive(url: str, timeout=10) -> bool: + """Returns true if the current url is resolvable. + This can be used to check that an instance is online before + trying to send posts to it. + """ + if not url.startswith('http'): + return False + if '.onion/' in url or '.i2p/' in url or \ + url.endswith('.onion') or \ + url.endswith('.i2p'): + # skip this check for onion and i2p + return True + + loc = _siteActiveParseUrl(url) + result = Result(url=url) + + try: + result = _siteActiveHttpRequest(loc, timeout) + + if 400 <= result.status < 500: + return result + + return True + + except BaseException: + pass + return False diff --git a/tests.py b/tests.py index 308e1fb0c..7186012a1 100644 --- a/tests.py +++ b/tests.py @@ -38,7 +38,7 @@ from utils import getFullDomain from utils import validNickname from utils import firstParagraphFromString from utils import removeIdEnding -from utils import siteIsActive +from siteactive import siteIsActive from utils import updateRecentPostsCache from utils import followPerson from utils import getNicknameFromActor @@ -2067,6 +2067,7 @@ def testJsonld(): def testSiteIsActive(): print('testSiteIsActive') + assert(siteIsActive('https://archive.org')) assert(siteIsActive('https://mastodon.social')) assert(not siteIsActive('https://notarealwebsite.a.b.c')) @@ -2818,7 +2819,8 @@ def testFunctions(): 'createServerBob', 'createServerEve', 'E2EEremoveDevice', - 'setOrganizationScheme' + 'setOrganizationScheme', + 'fill_headers' ] excludeImports = [ 'link', diff --git a/utils.py b/utils.py index 0f3d811cf..6e9b82e2b 100644 --- a/utils.py +++ b/utils.py @@ -11,9 +11,6 @@ import time import shutil import datetime import json -from socket import error as SocketError -import errno -import urllib.request import idna from pprint import pprint from calendar import monthrange @@ -1841,28 +1838,6 @@ def updateAnnounceCollection(recentPostsCache: {}, saveJson(postJsonObject, postFilename) -def siteIsActive(url: str) -> bool: - """Returns true if the current url is resolvable. - This can be used to check that an instance is online before - trying to send posts to it. - """ - if not url.startswith('http'): - return False - if '.onion/' in url or '.i2p/' in url or \ - url.endswith('.onion') or \ - url.endswith('.i2p'): - # skip this check for onion and i2p - return True - try: - req = urllib.request.Request(url) - urllib.request.urlopen(req, timeout=10) # nosec - return True - except SocketError as e: - if e.errno == errno.ECONNRESET: - print('WARN: connection was reset during siteIsActive') - return False - - def weekDayOfMonthStart(monthNumber: int, year: int) -> int: """Gets the day number of the first day of the month 1=sun, 7=sat