Command to retrieve html for post

main
Bob Mottram 2021-12-23 20:59:36 +00:00
parent 44daf317fc
commit be75d250e0
2 changed files with 82 additions and 5 deletions

View File

@ -43,6 +43,7 @@ from posts import getUserUrl
from posts import checkDomains
from session import createSession
from session import getJson
from session import downloadHtml
from newswire import getRSS
from filters import addFilter
from filters import removeFilter
@ -290,6 +291,8 @@ parser.add_argument('--postsraw', dest='postsraw', type=str,
help='Show raw json of posts for the given handle')
parser.add_argument('--json', dest='json', type=str, default=None,
help='Show the json for a given activitypub url')
parser.add_argument('--htmlpost', dest='htmlpost', type=str, default=None,
help='Show the html for a given activitypub url')
parser.add_argument('--rss', dest='rss', type=str, default=None,
help='Show an rss feed for a given url')
parser.add_argument('-f', '--federate', nargs='+', dest='federationList',
@ -949,7 +952,33 @@ if args.json:
print('Did not obtain instance actor key for ' + domain)
testJson = getJson(signingPrivateKeyPem, session, args.json, asHeader,
None, debug, __version__, httpPrefix, domain)
pprint(testJson)
if testJson:
pprint(testJson)
sys.exit()
if args.htmlpost:
session = createSession(None)
profileStr = 'https://www.w3.org/ns/activitystreams'
asHeader = {
'Accept': 'text/html; profile="' + profileStr + '"'
}
if not args.domain:
args.domain = getConfigParam(baseDir, 'domain')
domain = ''
if args.domain:
domain = args.domain
signingPrivateKeyPem = getInstanceActorKey(baseDir, domain)
if debug:
print('baseDir: ' + str(baseDir))
if signingPrivateKeyPem:
print('Obtained instance actor signing key')
else:
print('Did not obtain instance actor key for ' + domain)
testHtml = downloadHtml(signingPrivateKeyPem, session, args.htmlpost,
asHeader, None, debug, __version__,
httpPrefix, domain)
if testHtml:
print(testHtml)
sys.exit()
# create cache for actors

View File

@ -88,7 +88,8 @@ def urlExists(session, url: str, timeoutSec: int = 3,
def _getJsonRequest(session, url: str, domainFull: str, sessionHeaders: {},
sessionParams: {}, timeoutSec: int,
signingPrivateKeyPem: str, quiet: bool, debug: bool) -> {}:
signingPrivateKeyPem: str, quiet: bool, debug: bool,
returnJson: bool) -> {}:
"""http GET for json
"""
try:
@ -108,7 +109,9 @@ def _getJsonRequest(session, url: str, domainFull: str, sessionHeaders: {},
' failed with error code ' +
str(result.status_code) +
' headers: ' + str(sessionHeaders))
return result.json()
if returnJson:
return result.json()
return result.content
except requests.exceptions.RequestException as e:
sessionHeaders2 = sessionHeaders.copy()
if sessionHeaders2.get('Authorization'):
@ -199,8 +202,12 @@ def _getJsonSigned(session, url: str, domainFull: str, sessionHeaders: {},
if debug:
print('Signed GET sessionHeaders ' + str(sessionHeaders))
returnJson = True
if 'json' not in contentType:
returnJson = False
return _getJsonRequest(session, url, domainFull, sessionHeaders,
sessionParams, timeoutSec, None, quiet, debug)
sessionParams, timeoutSec, None, quiet,
debug, returnJson)
def getJson(signingPrivateKeyPem: str,
@ -239,7 +246,48 @@ def getJson(signingPrivateKeyPem: str,
else:
return _getJsonRequest(session, url, domain, sessionHeaders,
sessionParams, timeoutSec,
None, quiet, debug)
None, quiet, debug, True)
def downloadHtml(signingPrivateKeyPem: str,
session, url: str, headers: {}, params: {}, debug: bool,
version: str = '1.2.0', httpPrefix: str = 'https',
domain: str = 'testdomain',
timeoutSec: int = 20, quiet: bool = False) -> {}:
if not isinstance(url, str):
if debug and not quiet:
print('url: ' + str(url))
print('ERROR: downloadHtml failed, url should be a string')
return None
sessionParams = {}
sessionHeaders = {}
if headers:
sessionHeaders = headers
if params:
sessionParams = params
sessionHeaders['Accept'] = 'text/html'
sessionHeaders['User-Agent'] = 'Epicyon/' + version
if domain:
sessionHeaders['User-Agent'] += \
'; +' + httpPrefix + '://' + domain + '/'
if not session:
if not quiet:
print('WARN: downloadHtml failed, ' +
'no session specified for downloadHtml')
return None
if debug:
HTTPConnection.debuglevel = 1
if signingPrivateKeyPem:
return _getJsonSigned(session, url, domain,
sessionHeaders, sessionParams,
timeoutSec, signingPrivateKeyPem,
quiet, debug)
else:
return _getJsonRequest(session, url, domain, sessionHeaders,
sessionParams, timeoutSec,
None, quiet, debug, False)
def postJson(httpPrefix: str, domainFull: str,