From be75d250e00935de48340c6c6cb44bd1a827ac4b Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Thu, 23 Dec 2021 20:59:36 +0000 Subject: [PATCH] Command to retrieve html for post --- epicyon.py | 31 +++++++++++++++++++++++++++++- session.py | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 82 insertions(+), 5 deletions(-) diff --git a/epicyon.py b/epicyon.py index 391650119..2923d4247 100644 --- a/epicyon.py +++ b/epicyon.py @@ -43,6 +43,7 @@ from posts import getUserUrl from posts import checkDomains from session import createSession from session import getJson +from session import downloadHtml from newswire import getRSS from filters import addFilter from filters import removeFilter @@ -290,6 +291,8 @@ parser.add_argument('--postsraw', dest='postsraw', type=str, help='Show raw json of posts for the given handle') parser.add_argument('--json', dest='json', type=str, default=None, help='Show the json for a given activitypub url') +parser.add_argument('--htmlpost', dest='htmlpost', type=str, default=None, + help='Show the html for a given activitypub url') parser.add_argument('--rss', dest='rss', type=str, default=None, help='Show an rss feed for a given url') parser.add_argument('-f', '--federate', nargs='+', dest='federationList', @@ -949,7 +952,33 @@ if args.json: print('Did not obtain instance actor key for ' + domain) testJson = getJson(signingPrivateKeyPem, session, args.json, asHeader, None, debug, __version__, httpPrefix, domain) - pprint(testJson) + if testJson: + pprint(testJson) + sys.exit() + +if args.htmlpost: + session = createSession(None) + profileStr = 'https://www.w3.org/ns/activitystreams' + asHeader = { + 'Accept': 'text/html; profile="' + profileStr + '"' + } + if not args.domain: + args.domain = getConfigParam(baseDir, 'domain') + domain = '' + if args.domain: + domain = args.domain + signingPrivateKeyPem = getInstanceActorKey(baseDir, domain) + if debug: + print('baseDir: ' + str(baseDir)) + if signingPrivateKeyPem: + print('Obtained instance actor signing key') + else: + print('Did not obtain instance actor key for ' + domain) + testHtml = downloadHtml(signingPrivateKeyPem, session, args.htmlpost, + asHeader, None, debug, __version__, + httpPrefix, domain) + if testHtml: + print(testHtml) sys.exit() # create cache for actors diff --git a/session.py b/session.py index 8464bd0ab..ca841cffb 100644 --- a/session.py +++ b/session.py @@ -88,7 +88,8 @@ def urlExists(session, url: str, timeoutSec: int = 3, def _getJsonRequest(session, url: str, domainFull: str, sessionHeaders: {}, sessionParams: {}, timeoutSec: int, - signingPrivateKeyPem: str, quiet: bool, debug: bool) -> {}: + signingPrivateKeyPem: str, quiet: bool, debug: bool, + returnJson: bool) -> {}: """http GET for json """ try: @@ -108,7 +109,9 @@ def _getJsonRequest(session, url: str, domainFull: str, sessionHeaders: {}, ' failed with error code ' + str(result.status_code) + ' headers: ' + str(sessionHeaders)) - return result.json() + if returnJson: + return result.json() + return result.content except requests.exceptions.RequestException as e: sessionHeaders2 = sessionHeaders.copy() if sessionHeaders2.get('Authorization'): @@ -199,8 +202,12 @@ def _getJsonSigned(session, url: str, domainFull: str, sessionHeaders: {}, if debug: print('Signed GET sessionHeaders ' + str(sessionHeaders)) + returnJson = True + if 'json' not in contentType: + returnJson = False return _getJsonRequest(session, url, domainFull, sessionHeaders, - sessionParams, timeoutSec, None, quiet, debug) + sessionParams, timeoutSec, None, quiet, + debug, returnJson) def getJson(signingPrivateKeyPem: str, @@ -239,7 +246,48 @@ def getJson(signingPrivateKeyPem: str, else: return _getJsonRequest(session, url, domain, sessionHeaders, sessionParams, timeoutSec, - None, quiet, debug) + None, quiet, debug, True) + + +def downloadHtml(signingPrivateKeyPem: str, + session, url: str, headers: {}, params: {}, debug: bool, + version: str = '1.2.0', httpPrefix: str = 'https', + domain: str = 'testdomain', + timeoutSec: int = 20, quiet: bool = False) -> {}: + if not isinstance(url, str): + if debug and not quiet: + print('url: ' + str(url)) + print('ERROR: downloadHtml failed, url should be a string') + return None + sessionParams = {} + sessionHeaders = {} + if headers: + sessionHeaders = headers + if params: + sessionParams = params + sessionHeaders['Accept'] = 'text/html' + sessionHeaders['User-Agent'] = 'Epicyon/' + version + if domain: + sessionHeaders['User-Agent'] += \ + '; +' + httpPrefix + '://' + domain + '/' + if not session: + if not quiet: + print('WARN: downloadHtml failed, ' + + 'no session specified for downloadHtml') + return None + + if debug: + HTTPConnection.debuglevel = 1 + + if signingPrivateKeyPem: + return _getJsonSigned(session, url, domain, + sessionHeaders, sessionParams, + timeoutSec, signingPrivateKeyPem, + quiet, debug) + else: + return _getJsonRequest(session, url, domain, sessionHeaders, + sessionParams, timeoutSec, + None, quiet, debug, False) def postJson(httpPrefix: str, domainFull: str,