Refactoring receiving of posts

merge-requests/2/head
Bob Mottram 2019-11-10 11:37:24 +00:00
parent b207223f3a
commit 95a11cccbb
2 changed files with 134 additions and 87 deletions

View File

@ -9,6 +9,7 @@ __status__ = "Production"
import os
import time
import commentjson
import email.parser
from shutil import copyfile
def replaceEmojiFromTags(content: str,tag: [],messageType: str) -> str:
@ -390,3 +391,124 @@ def getMentionsFromHtml(htmlText: str,matchStr="<span class=\"h-card\"><a href=\
if actorStr not in mentions:
mentions.append(actorStr)
return mentions
def extractMediaInFormPOST(postBytes,boundary,name: str):
"""Extracts the binary encoding for image/video/audio within a http form POST
Returns the media bytes and the remaining bytes
"""
imageStartBoundary=b'Content-Disposition: form-data; name="'+unicode(name,"utf-8")+b'";'
imageStartLocation=postBytes.find(imageStartBoundary)
if imageStartLocation==-1:
return None,postBytes
# bytes after the start boundary appears
mediaBytes=postBytes[imageStartLocation:]
# look for the next boundary
imageEndBoundary=boundary
imageEndLocation=mediaBytes.find(imageEndBoundary)
if imageEndLocation==-1:
# no ending boundary
return mediaBytes,postBytes[:imageStartLocation]
# remaining bytes after the end of the image
remainder=mediaBytes[imageEndLocation:]
# remove bytes after the end boundary
mediaBytes=mediaBytes[:imageEndLocation]
# return the media and the before+after bytes
return mediaBytes,postBytes[:imageStartLocation]+remainder
def saveMediaInFormPOST(mediaBytes,baseDir: str, \
nickname: str,domain: str,debug: bool, \
filenameBase=None) -> (str,str):
"""Saves the given media bytes extracted from http form POST
Returns the filename and attachment type
"""
if not mediaBytes:
if debug:
print('DEBUG: No media found within POST')
return None,None
mediaLocation=-1
searchStr=''
filename=None
# directly search the binary array for the beginning
# of an image
extensionList= {
'png': 'image/png',
'jpeg': 'image/jpeg',
'gif': 'image/gif',
'mp4': 'video/mp4',
'ogv': 'video/ogv',
'mp3': 'audio/mpeg',
'ogg': 'audio/ogg'
}
for extension,contentType in extensionList.items():
searchStr=b'Content-Type: '+unicode(contentType,'utf-8')
mediaLocation=mediaBytes.find(searchStr)
if not filenameBase:
filenameBase= \
baseDir+'/accounts/'+ \
nickname+'@'+domain+'/upload'
if mediaLocation>-1:
mediaFound=True
if extension=='jpeg':
extension='jpg'
elif extension=='mpeg':
extension='mp3'
filename=filenameBase+'.'+extension
attachmentMediaType= \
searchStr.decode().split('/')[0].replace('Content-Type: ','')
break
if not filename:
return None,None
# locate the beginning of the image, after any
# carriage returns
startPos=mediaLocation+len(searchStr)
for offset in range(1,8):
if mediaBytes[startPos+offset]!=10:
if mediaBytes[startPos+offset]!=13:
startPos+=offset
break
fd = open(filename, 'wb')
fd.write(mediaBytes[startPos:])
fd.close()
return filename,attachmentMediaType
def extractTextFieldsInPOST(postBytes,boundary) -> {}:
"""Returns a dictionary containing the text fields of a http form POST
The boundary argument comes from the http header
"""
msg = email.parser.BytesParser().parsebytes(postBytes)
messageFields=msg.get_payload(decode=True).decode('utf-8').split(boundary)
fields={}
# examine each section of the POST, separated by the boundary
for f in messageFields:
if f=='--':
continue
if ' name="' not in f:
continue
postStr=f.split(' name="',1)[1]
if '"' not in postStr:
continue
postKey=postStr.split('"',1)[0]
postValueStr=postStr.split('"',1)[1]
if ';' in postValueStr:
continue
if '\r\n' not in postValueStr:
continue
postLines=postValueStr.split('\r\n')
postValue=''
if len(postLines)>2:
for line in range(2,len(postLines)-1):
if line>2:
postValue+='\n'
postValue+=postLines[line]
fields[postKey]=postValue
return fields

View File

@ -137,6 +137,9 @@ from manualapprove import manualApproveFollowRequest
from announce import createAnnounce
from announce import outboxAnnounce
from content import addHtmlTags
from content import extractMediaInFormPOST
from content import saveMediaInFormPOST
from content import extractTextFieldsInPOST
from media import removeMetaData
from cache import storePersonInCache
from cache import getPersonFromCache
@ -172,8 +175,8 @@ def readFollowList(filename: str) -> None:
return followlist
class PubServer(BaseHTTPRequestHandler):
protocol_version = 'HTTP/1.1'
protocol_version = 'HTTP/1.1'
def _requestHTTP(self) -> bool:
"""Should a http response be given?
"""
@ -2649,92 +2652,14 @@ class PubServer(BaseHTTPRequestHandler):
# in Python 3.8/3.10
# Instead we use the multipart mime parser from the email module
postBytes=self.rfile.read(length)
msg = email.parser.BytesParser().parsebytes(postBytes)
# why don't we just use msg.is_multipart(), rather than splitting?
# TL;DR it doesn't work for this use case because we're not using
# email style encoding message/rfc822
imageBoundary=b'Content-Disposition: form-data; name="attachpic";'
imageLocation=postBytes.find(imageBoundary)
if imageLocation>-1:
# get the first part of the data containing text fields
# If we try to use decode=True on the full data, including images,
# then it will fail
msg = email.parser.BytesParser().parsebytes(postBytes[:imageLocation])
messageFields=msg.get_payload(decode=True).decode('utf-8').split(boundary)
fields={}
filename=None
attachmentMediaType=None
# get the text fields
for f in messageFields:
if f=='--':
continue
if ' name="' in f:
postStr=f.split(' name="',1)[1]
if '"' in postStr:
postKey=postStr.split('"',1)[0]
postValueStr=postStr.split('"',1)[1]
if ';' not in postValueStr:
if '\r\n' in postValueStr:
postLines=postValueStr.split('\r\n')
postValue=''
if len(postLines)>2:
for line in range(2,len(postLines)-1):
if line>2:
postValue+='\n'
postValue+=postLines[line]
fields[postKey]=postValue
# now extract any attached image or other media
if imageLocation>-1:
imageLocation2=-1
filename=None
searchStr=''
# directly search the binary array for the beginning
# of an image
extensionList=['png','jpeg','gif','mp4','webm','ogv','mp3','ogg']
for extension in extensionList:
searchStr=b'Content-Type: image/png'
if extension=='jpeg':
searchStr=b'Content-Type: image/jpeg'
elif extension=='gif':
searchStr=b'Content-Type: image/gif'
elif extension=='mp4':
searchStr=b'Content-Type: video/mp4'
elif extension=='ogv':
searchStr=b'Content-Type: video/ogv'
elif extension=='mp3':
searchStr=b'Content-Type: audio/mpeg'
elif extension=='ogg':
searchStr=b'Content-Type: audio/ogg'
imageLocation2=postBytes.find(searchStr)
filenameBase= \
self.server.baseDir+'/accounts/'+ \
nickname+'@'+self.server.domain+'/upload'
if imageLocation2>-1:
if extension=='jpeg':
extension='jpg'
elif extension=='mpeg':
extension='mp3'
filename=filenameBase+'.'+extension
attachmentMediaType= \
searchStr.decode().split('/')[0].replace('Content-Type: ','')
break
if filename and imageLocation2>-1:
# locate the beginning of the image, after any
# carriage returns
startPos=imageLocation2+len(searchStr)
for offset in range(1,8):
if postBytes[startPos+offset]!=10:
if postBytes[startPos+offset]!=13:
startPos+=offset
break
mediaBytes,postBytes=extractMediaInFormPOST(postBytes,boundary,'attachpic')
filename,attachmentMediaType= \
saveMediaInFormPOST(mediaBytes,self.server.baseDir, \
nickname,self.server.domain, \
self.server.debug,None)
fields=extractTextFieldsInPOST(postBytes,boundary)
fd = open(filename, 'wb')
fd.write(postBytes[startPos:])
fd.close()
else:
filename=None
# send the post
# process the received text fields from the POST
if not fields.get('message') and not fields.get('imageDescription'):
return -1
if fields.get('submitPost'):