Command to retrieve html for post

merge-requests/22/merge
Bob Mottram 2021-12-23 20:59:36 +00:00
parent 44daf317fc
commit be75d250e0
2 changed files with 82 additions and 5 deletions

View File

@ -43,6 +43,7 @@ from posts import getUserUrl
from posts import checkDomains from posts import checkDomains
from session import createSession from session import createSession
from session import getJson from session import getJson
from session import downloadHtml
from newswire import getRSS from newswire import getRSS
from filters import addFilter from filters import addFilter
from filters import removeFilter from filters import removeFilter
@ -290,6 +291,8 @@ parser.add_argument('--postsraw', dest='postsraw', type=str,
help='Show raw json of posts for the given handle') help='Show raw json of posts for the given handle')
parser.add_argument('--json', dest='json', type=str, default=None, parser.add_argument('--json', dest='json', type=str, default=None,
help='Show the json for a given activitypub url') help='Show the json for a given activitypub url')
parser.add_argument('--htmlpost', dest='htmlpost', type=str, default=None,
help='Show the html for a given activitypub url')
parser.add_argument('--rss', dest='rss', type=str, default=None, parser.add_argument('--rss', dest='rss', type=str, default=None,
help='Show an rss feed for a given url') help='Show an rss feed for a given url')
parser.add_argument('-f', '--federate', nargs='+', dest='federationList', parser.add_argument('-f', '--federate', nargs='+', dest='federationList',
@ -949,7 +952,33 @@ if args.json:
print('Did not obtain instance actor key for ' + domain) print('Did not obtain instance actor key for ' + domain)
testJson = getJson(signingPrivateKeyPem, session, args.json, asHeader, testJson = getJson(signingPrivateKeyPem, session, args.json, asHeader,
None, debug, __version__, httpPrefix, domain) None, debug, __version__, httpPrefix, domain)
pprint(testJson) if testJson:
pprint(testJson)
sys.exit()
if args.htmlpost:
session = createSession(None)
profileStr = 'https://www.w3.org/ns/activitystreams'
asHeader = {
'Accept': 'text/html; profile="' + profileStr + '"'
}
if not args.domain:
args.domain = getConfigParam(baseDir, 'domain')
domain = ''
if args.domain:
domain = args.domain
signingPrivateKeyPem = getInstanceActorKey(baseDir, domain)
if debug:
print('baseDir: ' + str(baseDir))
if signingPrivateKeyPem:
print('Obtained instance actor signing key')
else:
print('Did not obtain instance actor key for ' + domain)
testHtml = downloadHtml(signingPrivateKeyPem, session, args.htmlpost,
asHeader, None, debug, __version__,
httpPrefix, domain)
if testHtml:
print(testHtml)
sys.exit() sys.exit()
# create cache for actors # create cache for actors

View File

@ -88,7 +88,8 @@ def urlExists(session, url: str, timeoutSec: int = 3,
def _getJsonRequest(session, url: str, domainFull: str, sessionHeaders: {}, def _getJsonRequest(session, url: str, domainFull: str, sessionHeaders: {},
sessionParams: {}, timeoutSec: int, sessionParams: {}, timeoutSec: int,
signingPrivateKeyPem: str, quiet: bool, debug: bool) -> {}: signingPrivateKeyPem: str, quiet: bool, debug: bool,
returnJson: bool) -> {}:
"""http GET for json """http GET for json
""" """
try: try:
@ -108,7 +109,9 @@ def _getJsonRequest(session, url: str, domainFull: str, sessionHeaders: {},
' failed with error code ' + ' failed with error code ' +
str(result.status_code) + str(result.status_code) +
' headers: ' + str(sessionHeaders)) ' headers: ' + str(sessionHeaders))
return result.json() if returnJson:
return result.json()
return result.content
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
sessionHeaders2 = sessionHeaders.copy() sessionHeaders2 = sessionHeaders.copy()
if sessionHeaders2.get('Authorization'): if sessionHeaders2.get('Authorization'):
@ -199,8 +202,12 @@ def _getJsonSigned(session, url: str, domainFull: str, sessionHeaders: {},
if debug: if debug:
print('Signed GET sessionHeaders ' + str(sessionHeaders)) print('Signed GET sessionHeaders ' + str(sessionHeaders))
returnJson = True
if 'json' not in contentType:
returnJson = False
return _getJsonRequest(session, url, domainFull, sessionHeaders, return _getJsonRequest(session, url, domainFull, sessionHeaders,
sessionParams, timeoutSec, None, quiet, debug) sessionParams, timeoutSec, None, quiet,
debug, returnJson)
def getJson(signingPrivateKeyPem: str, def getJson(signingPrivateKeyPem: str,
@ -239,7 +246,48 @@ def getJson(signingPrivateKeyPem: str,
else: else:
return _getJsonRequest(session, url, domain, sessionHeaders, return _getJsonRequest(session, url, domain, sessionHeaders,
sessionParams, timeoutSec, sessionParams, timeoutSec,
None, quiet, debug) None, quiet, debug, True)
def downloadHtml(signingPrivateKeyPem: str,
session, url: str, headers: {}, params: {}, debug: bool,
version: str = '1.2.0', httpPrefix: str = 'https',
domain: str = 'testdomain',
timeoutSec: int = 20, quiet: bool = False) -> {}:
if not isinstance(url, str):
if debug and not quiet:
print('url: ' + str(url))
print('ERROR: downloadHtml failed, url should be a string')
return None
sessionParams = {}
sessionHeaders = {}
if headers:
sessionHeaders = headers
if params:
sessionParams = params
sessionHeaders['Accept'] = 'text/html'
sessionHeaders['User-Agent'] = 'Epicyon/' + version
if domain:
sessionHeaders['User-Agent'] += \
'; +' + httpPrefix + '://' + domain + '/'
if not session:
if not quiet:
print('WARN: downloadHtml failed, ' +
'no session specified for downloadHtml')
return None
if debug:
HTTPConnection.debuglevel = 1
if signingPrivateKeyPem:
return _getJsonSigned(session, url, domain,
sessionHeaders, sessionParams,
timeoutSec, signingPrivateKeyPem,
quiet, debug)
else:
return _getJsonRequest(session, url, domain, sessionHeaders,
sessionParams, timeoutSec,
None, quiet, debug, False)
def postJson(httpPrefix: str, domainFull: str, def postJson(httpPrefix: str, domainFull: str,