From 95a11cccbb1800e59a4ad604db878c6a484e4120 Mon Sep 17 00:00:00 2001
From: Bob Mottram <bob@freedombone.net>
Date: Sun, 10 Nov 2019 11:37:24 +0000
Subject: [PATCH] Refactoring receiving of posts

---
 content.py | 122 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 daemon.py  |  99 ++++++-------------------------------------
 2 files changed, 134 insertions(+), 87 deletions(-)
diff --git a/content.py b/content.py
index 13a6972b..1f5976a8 100644
--- a/content.py
+++ b/content.py
@@ -9,6 +9,7 @@ __status__ = "Production"
 import os
 import time
 import commentjson
+import email.parser
 from shutil import copyfile
 
 def replaceEmojiFromTags(content: str,tag: [],messageType: str) -> str:
@@ -390,3 +391,124 @@ def getMentionsFromHtml(htmlText: str,matchStr="<span class=\"h-card\"><a href=\
             if actorStr not in mentions:
                 mentions.append(actorStr)
     return mentions
+
+def extractMediaInFormPOST(postBytes,boundary,name: str):
+    """Extracts the binary encoding for image/video/audio within a http form POST
+    Returns the media bytes and the remaining bytes
+    """
+    imageStartBoundary=b'Content-Disposition: form-data; name="'+unicode(name,"utf-8")+b'";'
+    imageStartLocation=postBytes.find(imageStartBoundary)
+    if imageStartLocation==-1:
+        return None,postBytes
+
+    # bytes after the start boundary appears
+    mediaBytes=postBytes[imageStartLocation:]
+
+    # look for the next boundary
+    imageEndBoundary=boundary
+    imageEndLocation=mediaBytes.find(imageEndBoundary)
+    if imageEndLocation==-1:
+        # no ending boundary
+        return mediaBytes,postBytes[:imageStartLocation]
+
+    # remaining bytes after the end of the image
+    remainder=mediaBytes[imageEndLocation:]
+
+    # remove bytes after the end boundary
+    mediaBytes=mediaBytes[:imageEndLocation]
+
+    # return the media and the before+after bytes
+    return mediaBytes,postBytes[:imageStartLocation]+remainder
+
+def saveMediaInFormPOST(mediaBytes,baseDir: str, \
+                        nickname: str,domain: str,debug: bool, \
+                        filenameBase=None) -> (str,str):
+    """Saves the given media bytes extracted from http form POST
+    Returns the filename and attachment type
+    """
+    if not mediaBytes:
+        if debug:
+            print('DEBUG: No media found within POST')
+        return None,None
+
+    mediaLocation=-1
+    searchStr=''
+    filename=None
+    
+    # directly search the binary array for the beginning
+    # of an image
+    extensionList= {
+        'png': 'image/png',
+        'jpeg': 'image/jpeg',
+        'gif': 'image/gif',
+        'mp4': 'video/mp4',
+        'ogv': 'video/ogv',
+        'mp3': 'audio/mpeg',
+        'ogg': 'audio/ogg'
+    }
+    for extension,contentType in extensionList.items():
+        searchStr=b'Content-Type: '+unicode(contentType,'utf-8')
+        mediaLocation=mediaBytes.find(searchStr)
+        if not filenameBase:
+            filenameBase= \
+                baseDir+'/accounts/'+ \
+                nickname+'@'+domain+'/upload'
+        if mediaLocation>-1:
+            mediaFound=True
+            if extension=='jpeg':
+                extension='jpg'
+            elif extension=='mpeg':
+                extension='mp3'
+            filename=filenameBase+'.'+extension
+            attachmentMediaType= \
+                searchStr.decode().split('/')[0].replace('Content-Type: ','')
+            break
+
+    if not filename:
+        return None,None
+
+    # locate the beginning of the image, after any
+    # carriage returns
+    startPos=mediaLocation+len(searchStr)
+    for offset in range(1,8):
+        if mediaBytes[startPos+offset]!=10:
+            if mediaBytes[startPos+offset]!=13:
+                startPos+=offset
+                break
+
+    fd = open(filename, 'wb')
+    fd.write(mediaBytes[startPos:])
+    fd.close()
+    return filename,attachmentMediaType
+
+def extractTextFieldsInPOST(postBytes,boundary) -> {}:
+    """Returns a dictionary containing the text fields of a http form POST
+    The boundary argument comes from the http header
+    """    
+    msg = email.parser.BytesParser().parsebytes(postBytes)
+    messageFields=msg.get_payload(decode=True).decode('utf-8').split(boundary)
+    fields={}
+    # examine each section of the POST, separated by the boundary
+    for f in messageFields:
+        if f=='--':
+            continue
+        if ' name="' not in f:
+            continue                    
+        postStr=f.split(' name="',1)[1]
+        if '"' not in postStr:
+            continue
+        postKey=postStr.split('"',1)[0]
+        postValueStr=postStr.split('"',1)[1]
+        if ';' in postValueStr:
+            continue
+        if '\r\n' not in postValueStr:
+            continue
+        postLines=postValueStr.split('\r\n')                                    
+        postValue=''
+        if len(postLines)>2:
+            for line in range(2,len(postLines)-1):
+                if line>2:
+                    postValue+='\n'
+                postValue+=postLines[line]
+        fields[postKey]=postValue
+    return fields
diff --git a/daemon.py b/daemon.py
index 795aba59..dfb4e5a2 100644
--- a/daemon.py
+++ b/daemon.py
@@ -137,6 +137,9 @@ from manualapprove import manualApproveFollowRequest
 from announce import createAnnounce
 from announce import outboxAnnounce
 from content import addHtmlTags
+from content import extractMediaInFormPOST
+from content import saveMediaInFormPOST
+from content import extractTextFieldsInPOST
 from media import removeMetaData
 from cache import storePersonInCache
 from cache import getPersonFromCache
@@ -172,8 +175,8 @@ def readFollowList(filename: str) -> None:
     return followlist
 
 class PubServer(BaseHTTPRequestHandler):
-    protocol_version = 'HTTP/1.1'
-
+    protocol_version = 'HTTP/1.1'        
+    
     def _requestHTTP(self) -> bool:
         """Should a http response be given?
         """
@@ -2649,92 +2652,14 @@ class PubServer(BaseHTTPRequestHandler):
             # in Python 3.8/3.10
             # Instead we use the multipart mime parser from the email module
             postBytes=self.rfile.read(length)
-            msg = email.parser.BytesParser().parsebytes(postBytes)
-            # why don't we just use msg.is_multipart(), rather than splitting?
-            # TL;DR it doesn't work for this use case because we're not using
-            # email style encoding message/rfc822
-            imageBoundary=b'Content-Disposition: form-data; name="attachpic";'
-            imageLocation=postBytes.find(imageBoundary)
-            if imageLocation>-1:
-                # get the first part of the data containing text fields
-                # If we try to use decode=True on the full data, including images,
-                # then it will fail
-                msg = email.parser.BytesParser().parsebytes(postBytes[:imageLocation])
-            messageFields=msg.get_payload(decode=True).decode('utf-8').split(boundary)
-            fields={}
-            filename=None
-            attachmentMediaType=None
-            # get the text fields
-            for f in messageFields:
-                if f=='--':
-                    continue
-                if ' name="' in f:
-                    postStr=f.split(' name="',1)[1]
-                    if '"' in postStr:
-                        postKey=postStr.split('"',1)[0]
-                        postValueStr=postStr.split('"',1)[1]
-                        if ';' not in postValueStr:
-                            if '\r\n' in postValueStr:
-                                postLines=postValueStr.split('\r\n')                                    
-                                postValue=''
-                                if len(postLines)>2:
-                                    for line in range(2,len(postLines)-1):
-                                        if line>2:
-                                            postValue+='\n'
-                                        postValue+=postLines[line]
-                                fields[postKey]=postValue
-            # now extract any attached image or other media
-            if imageLocation>-1:
-                imageLocation2=-1
-                filename=None
-                searchStr=''
-                # directly search the binary array for the beginning
-                # of an image
-                extensionList=['png','jpeg','gif','mp4','webm','ogv','mp3','ogg']
-                for extension in extensionList:
-                    searchStr=b'Content-Type: image/png'
-                    if extension=='jpeg':
-                        searchStr=b'Content-Type: image/jpeg'
-                    elif extension=='gif':
-                        searchStr=b'Content-Type: image/gif'
-                    elif extension=='mp4':
-                        searchStr=b'Content-Type: video/mp4'
-                    elif extension=='ogv':
-                        searchStr=b'Content-Type: video/ogv'
-                    elif extension=='mp3':
-                        searchStr=b'Content-Type: audio/mpeg'
-                    elif extension=='ogg':
-                        searchStr=b'Content-Type: audio/ogg'
-                    imageLocation2=postBytes.find(searchStr)
-                    filenameBase= \
-                        self.server.baseDir+'/accounts/'+ \
-                        nickname+'@'+self.server.domain+'/upload'
-                    if imageLocation2>-1:
-                        if extension=='jpeg':
-                            extension='jpg'
-                        elif extension=='mpeg':
-                            extension='mp3'
-                        filename=filenameBase+'.'+extension
-                        attachmentMediaType= \
-                            searchStr.decode().split('/')[0].replace('Content-Type: ','')
-                        break
-                if filename and imageLocation2>-1:
-                    # locate the beginning of the image, after any
-                    # carriage returns
-                    startPos=imageLocation2+len(searchStr)
-                    for offset in range(1,8):
-                        if postBytes[startPos+offset]!=10:
-                            if postBytes[startPos+offset]!=13:
-                                startPos+=offset
-                                break
+            mediaBytes,postBytes=extractMediaInFormPOST(postBytes,boundary,'attachpic')
+            filename,attachmentMediaType= \
+                saveMediaInFormPOST(mediaBytes,self.server.baseDir, \
+                                    nickname,self.server.domain, \
+                                    self.server.debug,None)
+            fields=extractTextFieldsInPOST(postBytes,boundary)
 
-                    fd = open(filename, 'wb')
-                    fd.write(postBytes[startPos:])
-                    fd.close()
-                else:
-                    filename=None
-
-            # send the post
+            # process the received text fields from the POST
             if not fields.get('message') and not fields.get('imageDescription'):
                 return -1
             if fields.get('submitPost'):