Refactoring receiving of posts

2019-11-10 11:37:24 +00:00 · 2019-11-10 11:37:24 +00:00 · 95a11cccbb
parent b207223f3a
commit 95a11cccbb
2 changed files with 134 additions and 87 deletions
--- a/content.py
+++ b/content.py
@ -9,6 +9,7 @@ __status__ = "Production"
 import os
 import time
 import commentjson
 import email.parser
 from shutil import copyfile
 def replaceEmojiFromTags(content: str,tag: [],messageType: str) -> str:
@ -390,3 +391,124 @@ def getMentionsFromHtml(htmlText: str,matchStr="<span class=\"h-card\"><a href=\
            if actorStr not in mentions:
                mentions.append(actorStr)
    return mentions
 def extractMediaInFormPOST(postBytes,boundary,name: str):
    """Extracts the binary encoding for image/video/audio within a http form POST
    Returns the media bytes and the remaining bytes
    """
    imageStartBoundary=b'Content-Disposition: form-data; name="'+unicode(name,"utf-8")+b'";'
    imageStartLocation=postBytes.find(imageStartBoundary)
    if imageStartLocation==-1:
        return None,postBytes
    # bytes after the start boundary appears
    mediaBytes=postBytes[imageStartLocation:]
    # look for the next boundary
    imageEndBoundary=boundary
    imageEndLocation=mediaBytes.find(imageEndBoundary)
    if imageEndLocation==-1:
        # no ending boundary
        return mediaBytes,postBytes[:imageStartLocation]
    # remaining bytes after the end of the image
    remainder=mediaBytes[imageEndLocation:]
    # remove bytes after the end boundary
    mediaBytes=mediaBytes[:imageEndLocation]
    # return the media and the before+after bytes
    return mediaBytes,postBytes[:imageStartLocation]+remainder
 def saveMediaInFormPOST(mediaBytes,baseDir: str, \
                        nickname: str,domain: str,debug: bool, \
                        filenameBase=None) -> (str,str):
    """Saves the given media bytes extracted from http form POST
    Returns the filename and attachment type
    """
    if not mediaBytes:
        if debug:
            print('DEBUG: No media found within POST')
        return None,None
    mediaLocation=-1
    searchStr=''
    filename=None
    # directly search the binary array for the beginning
    # of an image
    extensionList= {
        'png': 'image/png',
        'jpeg': 'image/jpeg',
        'gif': 'image/gif',
        'mp4': 'video/mp4',
        'ogv': 'video/ogv',
        'mp3': 'audio/mpeg',
        'ogg': 'audio/ogg'
    }
    for extension,contentType in extensionList.items():
        searchStr=b'Content-Type: '+unicode(contentType,'utf-8')
        mediaLocation=mediaBytes.find(searchStr)
        if not filenameBase:
            filenameBase= \
                baseDir+'/accounts/'+ \
                nickname+'@'+domain+'/upload'
        if mediaLocation>-1:
            mediaFound=True
            if extension=='jpeg':
                extension='jpg'
            elif extension=='mpeg':
                extension='mp3'
            filename=filenameBase+'.'+extension
            attachmentMediaType= \
                searchStr.decode().split('/')[0].replace('Content-Type: ','')
            break
    if not filename:
        return None,None
    # locate the beginning of the image, after any
    # carriage returns
    startPos=mediaLocation+len(searchStr)
    for offset in range(1,8):
        if mediaBytes[startPos+offset]!=10:
            if mediaBytes[startPos+offset]!=13:
                startPos+=offset
                break
    fd = open(filename, 'wb')
    fd.write(mediaBytes[startPos:])
    fd.close()
    return filename,attachmentMediaType
 def extractTextFieldsInPOST(postBytes,boundary) -> {}:
    """Returns a dictionary containing the text fields of a http form POST
    The boundary argument comes from the http header
    """    
    msg = email.parser.BytesParser().parsebytes(postBytes)
    messageFields=msg.get_payload(decode=True).decode('utf-8').split(boundary)
    fields={}
    # examine each section of the POST, separated by the boundary
    for f in messageFields:
        if f=='--':
            continue
        if ' name="' not in f:
            continue                    
        postStr=f.split(' name="',1)[1]
        if '"' not in postStr:
            continue
        postKey=postStr.split('"',1)[0]
        postValueStr=postStr.split('"',1)[1]
        if ';' in postValueStr:
            continue
        if '\r\n' not in postValueStr:
            continue
        postLines=postValueStr.split('\r\n')                                    
        postValue=''
        if len(postLines)>2:
            for line in range(2,len(postLines)-1):
                if line>2:
                    postValue+='\n'
                postValue+=postLines[line]
        fields[postKey]=postValue
    return fields
--- a/daemon.py
+++ b/daemon.py
@ -137,6 +137,9 @@ from manualapprove import manualApproveFollowRequest
 from announce import createAnnounce
 from announce import outboxAnnounce
 from content import addHtmlTags
 from content import extractMediaInFormPOST
 from content import saveMediaInFormPOST
 from content import extractTextFieldsInPOST
 from media import removeMetaData
 from cache import storePersonInCache
 from cache import getPersonFromCache
@ -172,8 +175,8 @@ def readFollowList(filename: str) -> None:
    return followlist
 class PubServer(BaseHTTPRequestHandler):
-    protocol_version = 'HTTP/1.1'
+    protocol_version = 'HTTP/1.1'        
-
+    
    def _requestHTTP(self) -> bool:
        """Should a http response be given?
        """
@ -2649,92 +2652,14 @@ class PubServer(BaseHTTPRequestHandler):
            # in Python 3.8/3.10
            # Instead we use the multipart mime parser from the email module
            postBytes=self.rfile.read(length)
-            msg = email.parser.BytesParser().parsebytes(postBytes)
+            mediaBytes,postBytes=extractMediaInFormPOST(postBytes,boundary,'attachpic')
-            # why don't we just use msg.is_multipart(), rather than splitting?
+            filename,attachmentMediaType= \
-            # TL;DR it doesn't work for this use case because we're not using
+                saveMediaInFormPOST(mediaBytes,self.server.baseDir, \
-            # email style encoding message/rfc822
+                                    nickname,self.server.domain, \
-            imageBoundary=b'Content-Disposition: form-data; name="attachpic";'
+                                    self.server.debug,None)
-            imageLocation=postBytes.find(imageBoundary)
+            fields=extractTextFieldsInPOST(postBytes,boundary)
            if imageLocation>-1:
                # get the first part of the data containing text fields
                # If we try to use decode=True on the full data, including images,
                # then it will fail
                msg = email.parser.BytesParser().parsebytes(postBytes[:imageLocation])
            messageFields=msg.get_payload(decode=True).decode('utf-8').split(boundary)
            fields={}
            filename=None
            attachmentMediaType=None
            # get the text fields
            for f in messageFields:
                if f=='--':
                    continue
                if ' name="' in f:
                    postStr=f.split(' name="',1)[1]
                    if '"' in postStr:
                        postKey=postStr.split('"',1)[0]
                        postValueStr=postStr.split('"',1)[1]
                        if ';' not in postValueStr:
                            if '\r\n' in postValueStr:
                                postLines=postValueStr.split('\r\n')                                    
                                postValue=''
                                if len(postLines)>2:
                                    for line in range(2,len(postLines)-1):
                                        if line>2:
                                            postValue+='\n'
                                        postValue+=postLines[line]
                                fields[postKey]=postValue
            # now extract any attached image or other media
            if imageLocation>-1:
                imageLocation2=-1
                filename=None
                searchStr=''
                # directly search the binary array for the beginning
                # of an image
                extensionList=['png','jpeg','gif','mp4','webm','ogv','mp3','ogg']
                for extension in extensionList:
                    searchStr=b'Content-Type: image/png'
                    if extension=='jpeg':
                        searchStr=b'Content-Type: image/jpeg'
                    elif extension=='gif':
                        searchStr=b'Content-Type: image/gif'
                    elif extension=='mp4':
                        searchStr=b'Content-Type: video/mp4'
                    elif extension=='ogv':
                        searchStr=b'Content-Type: video/ogv'
                    elif extension=='mp3':
                        searchStr=b'Content-Type: audio/mpeg'
                    elif extension=='ogg':
                        searchStr=b'Content-Type: audio/ogg'
                    imageLocation2=postBytes.find(searchStr)
                    filenameBase= \
                        self.server.baseDir+'/accounts/'+ \
                        nickname+'@'+self.server.domain+'/upload'
                    if imageLocation2>-1:
                        if extension=='jpeg':
                            extension='jpg'
                        elif extension=='mpeg':
                            extension='mp3'
                        filename=filenameBase+'.'+extension
                        attachmentMediaType= \
                            searchStr.decode().split('/')[0].replace('Content-Type: ','')
                        break
                if filename and imageLocation2>-1:
                    # locate the beginning of the image, after any
                    # carriage returns
                    startPos=imageLocation2+len(searchStr)
                    for offset in range(1,8):
                        if postBytes[startPos+offset]!=10:
                            if postBytes[startPos+offset]!=13:
                                startPos+=offset
                                break
-                    fd = open(filename, 'wb')
+            # process the received text fields from the POST
                    fd.write(postBytes[startPos:])
                    fd.close()
                else:
                    filename=None
            # send the post
            if not fields.get('message') and not fields.get('imageDescription'):
                return -1
            if fields.get('submitPost'):