From 95a11cccbb1800e59a4ad604db878c6a484e4120 Mon Sep 17 00:00:00 2001 From: Bob Mottram Date: Sun, 10 Nov 2019 11:37:24 +0000 Subject: [PATCH] Refactoring receiving of posts --- content.py | 122 +++++++++++++++++++++++++++++++++++++++++++++++++++++ daemon.py | 99 ++++++------------------------------------- 2 files changed, 134 insertions(+), 87 deletions(-) diff --git a/content.py b/content.py index 13a6972b..1f5976a8 100644 --- a/content.py +++ b/content.py @@ -9,6 +9,7 @@ __status__ = "Production" import os import time import commentjson +import email.parser from shutil import copyfile def replaceEmojiFromTags(content: str,tag: [],messageType: str) -> str: @@ -390,3 +391,124 @@ def getMentionsFromHtml(htmlText: str,matchStr=" (str,str): + """Saves the given media bytes extracted from http form POST + Returns the filename and attachment type + """ + if not mediaBytes: + if debug: + print('DEBUG: No media found within POST') + return None,None + + mediaLocation=-1 + searchStr='' + filename=None + + # directly search the binary array for the beginning + # of an image + extensionList= { + 'png': 'image/png', + 'jpeg': 'image/jpeg', + 'gif': 'image/gif', + 'mp4': 'video/mp4', + 'ogv': 'video/ogv', + 'mp3': 'audio/mpeg', + 'ogg': 'audio/ogg' + } + for extension,contentType in extensionList.items(): + searchStr=b'Content-Type: '+unicode(contentType,'utf-8') + mediaLocation=mediaBytes.find(searchStr) + if not filenameBase: + filenameBase= \ + baseDir+'/accounts/'+ \ + nickname+'@'+domain+'/upload' + if mediaLocation>-1: + mediaFound=True + if extension=='jpeg': + extension='jpg' + elif extension=='mpeg': + extension='mp3' + filename=filenameBase+'.'+extension + attachmentMediaType= \ + searchStr.decode().split('/')[0].replace('Content-Type: ','') + break + + if not filename: + return None,None + + # locate the beginning of the image, after any + # carriage returns + startPos=mediaLocation+len(searchStr) + for offset in range(1,8): + if mediaBytes[startPos+offset]!=10: + if mediaBytes[startPos+offset]!=13: + startPos+=offset + break + + fd = open(filename, 'wb') + fd.write(mediaBytes[startPos:]) + fd.close() + return filename,attachmentMediaType + +def extractTextFieldsInPOST(postBytes,boundary) -> {}: + """Returns a dictionary containing the text fields of a http form POST + The boundary argument comes from the http header + """ + msg = email.parser.BytesParser().parsebytes(postBytes) + messageFields=msg.get_payload(decode=True).decode('utf-8').split(boundary) + fields={} + # examine each section of the POST, separated by the boundary + for f in messageFields: + if f=='--': + continue + if ' name="' not in f: + continue + postStr=f.split(' name="',1)[1] + if '"' not in postStr: + continue + postKey=postStr.split('"',1)[0] + postValueStr=postStr.split('"',1)[1] + if ';' in postValueStr: + continue + if '\r\n' not in postValueStr: + continue + postLines=postValueStr.split('\r\n') + postValue='' + if len(postLines)>2: + for line in range(2,len(postLines)-1): + if line>2: + postValue+='\n' + postValue+=postLines[line] + fields[postKey]=postValue + return fields diff --git a/daemon.py b/daemon.py index 795aba59..dfb4e5a2 100644 --- a/daemon.py +++ b/daemon.py @@ -137,6 +137,9 @@ from manualapprove import manualApproveFollowRequest from announce import createAnnounce from announce import outboxAnnounce from content import addHtmlTags +from content import extractMediaInFormPOST +from content import saveMediaInFormPOST +from content import extractTextFieldsInPOST from media import removeMetaData from cache import storePersonInCache from cache import getPersonFromCache @@ -172,8 +175,8 @@ def readFollowList(filename: str) -> None: return followlist class PubServer(BaseHTTPRequestHandler): - protocol_version = 'HTTP/1.1' - + protocol_version = 'HTTP/1.1' + def _requestHTTP(self) -> bool: """Should a http response be given? """ @@ -2649,92 +2652,14 @@ class PubServer(BaseHTTPRequestHandler): # in Python 3.8/3.10 # Instead we use the multipart mime parser from the email module postBytes=self.rfile.read(length) - msg = email.parser.BytesParser().parsebytes(postBytes) - # why don't we just use msg.is_multipart(), rather than splitting? - # TL;DR it doesn't work for this use case because we're not using - # email style encoding message/rfc822 - imageBoundary=b'Content-Disposition: form-data; name="attachpic";' - imageLocation=postBytes.find(imageBoundary) - if imageLocation>-1: - # get the first part of the data containing text fields - # If we try to use decode=True on the full data, including images, - # then it will fail - msg = email.parser.BytesParser().parsebytes(postBytes[:imageLocation]) - messageFields=msg.get_payload(decode=True).decode('utf-8').split(boundary) - fields={} - filename=None - attachmentMediaType=None - # get the text fields - for f in messageFields: - if f=='--': - continue - if ' name="' in f: - postStr=f.split(' name="',1)[1] - if '"' in postStr: - postKey=postStr.split('"',1)[0] - postValueStr=postStr.split('"',1)[1] - if ';' not in postValueStr: - if '\r\n' in postValueStr: - postLines=postValueStr.split('\r\n') - postValue='' - if len(postLines)>2: - for line in range(2,len(postLines)-1): - if line>2: - postValue+='\n' - postValue+=postLines[line] - fields[postKey]=postValue - # now extract any attached image or other media - if imageLocation>-1: - imageLocation2=-1 - filename=None - searchStr='' - # directly search the binary array for the beginning - # of an image - extensionList=['png','jpeg','gif','mp4','webm','ogv','mp3','ogg'] - for extension in extensionList: - searchStr=b'Content-Type: image/png' - if extension=='jpeg': - searchStr=b'Content-Type: image/jpeg' - elif extension=='gif': - searchStr=b'Content-Type: image/gif' - elif extension=='mp4': - searchStr=b'Content-Type: video/mp4' - elif extension=='ogv': - searchStr=b'Content-Type: video/ogv' - elif extension=='mp3': - searchStr=b'Content-Type: audio/mpeg' - elif extension=='ogg': - searchStr=b'Content-Type: audio/ogg' - imageLocation2=postBytes.find(searchStr) - filenameBase= \ - self.server.baseDir+'/accounts/'+ \ - nickname+'@'+self.server.domain+'/upload' - if imageLocation2>-1: - if extension=='jpeg': - extension='jpg' - elif extension=='mpeg': - extension='mp3' - filename=filenameBase+'.'+extension - attachmentMediaType= \ - searchStr.decode().split('/')[0].replace('Content-Type: ','') - break - if filename and imageLocation2>-1: - # locate the beginning of the image, after any - # carriage returns - startPos=imageLocation2+len(searchStr) - for offset in range(1,8): - if postBytes[startPos+offset]!=10: - if postBytes[startPos+offset]!=13: - startPos+=offset - break + mediaBytes,postBytes=extractMediaInFormPOST(postBytes,boundary,'attachpic') + filename,attachmentMediaType= \ + saveMediaInFormPOST(mediaBytes,self.server.baseDir, \ + nickname,self.server.domain, \ + self.server.debug,None) + fields=extractTextFieldsInPOST(postBytes,boundary) - fd = open(filename, 'wb') - fd.write(postBytes[startPos:]) - fd.close() - else: - filename=None - - # send the post + # process the received text fields from the POST if not fields.get('message') and not fields.get('imageDescription'): return -1 if fields.get('submitPost'):