Optimizing http-parser

Signed-off-by: Jianhui Zhao <jianhuizhao329@gmail.com>
2017-11-26 23:52:41 +08:00 · 2017-11-26 23:52:41 +08:00 · 34d052d4c0
parent 650f545c20
commit 34d052d4c0
4 changed files with 24 additions and 72 deletions
--- a/src/http_parser.c
+++ b/src/http_parser.c
@ -103,11 +103,11 @@ do {                                                                 \
 do {                                                                 \
  assert(HTTP_PARSER_ERRNO(parser) == HPE_OK);                       \
                                                                     \
-  if (FOR##_mark) {                                                  \
+  if (parser->mark) {                                                  \
    if (LIKELY(settings->on_##FOR)) {                                \
      parser->state = CURRENT_STATE();                               \
      if (UNLIKELY(0 !=                                              \
-                   settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
+                   settings->on_##FOR(parser, parser->mark, (LEN)))) { \
        SET_ERRNO(HPE_CB_##FOR);                                     \
      }                                                              \
      UPDATE_STATE(parser->state);                                   \
@ -117,25 +117,17 @@ do {                                                                 \
        return (ER);                                                 \
      }                                                              \
    }                                                                \
-    FOR##_mark = NULL;                                               \
+    parser->mark = NULL;                                               \
  }                                                                  \
 } while (0)

 /* Run the data callback FOR and consume the current byte */
 #define CALLBACK_DATA(FOR)                                           \
-    CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
+    CALLBACK_DATA_(FOR, p - parser->mark, p - data + 1)

 /* Run the data callback FOR and don't consume the current byte */
 #define CALLBACK_DATA_NOADVANCE(FOR)                                 \
-    CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
-
-/* Set the mark FOR; non-destructive if mark is already set */
-#define MARK(FOR)                                                    \
-do {                                                                 \
-  if (!FOR##_mark) {                                                 \
-    FOR##_mark = p;                                                  \
-  }                                                                  \
-} while (0)
+    CALLBACK_DATA_(FOR, p - parser->mark, p - data)

 /* Don't allow the total size of the HTTP headers (including the status
 * line) to exceed HTTP_MAX_HEADER_SIZE.  This check is here to protect
@ -636,11 +628,6 @@ size_t http_parser_execute (http_parser *parser,
  char c, ch;
  int8_t unhex_val;
  const char *p = data;
-  const char *header_field_mark = 0;
-  const char *header_value_mark = 0;
-  const char *url_mark = 0;
-  const char *body_mark = 0;
-  const char *status_mark = 0;
  enum state p_state = (enum state) parser->state;
  const unsigned int lenient = parser->lenient_http_headers;

@ -670,7 +657,7 @@ size_t http_parser_execute (http_parser *parser,
    }
  }

-
+#if 0
  if (CURRENT_STATE() == s_header_field)
    header_field_mark = data;
  if (CURRENT_STATE() == s_header_value)
@ -695,6 +682,7 @@ size_t http_parser_execute (http_parser *parser,
  default:
    break;
  }
+#endif

  for (p=data; p != data + len; p++) {
    ch = *p;
@ -884,7 +872,7 @@ reexecute:

      case s_res_status_start:
      {
-        MARK(status);
+        parser->mark = p;
        UPDATE_STATE(s_res_status);
        parser->index = 0;

@ -1013,7 +1001,7 @@ reexecute:
      {
        if (ch == ' ') break;

-        MARK(url);
+        parser->mark = p;
        if (parser->method == HTTP_CONNECT) {
          UPDATE_STATE(s_req_server_start);
        }
@ -1196,8 +1184,7 @@ reexecute:
          goto error;
        }

-        MARK(header_field);
-
+        parser->mark = p;
        parser->index = 0;
        UPDATE_STATE(s_header_field);

@ -1371,8 +1358,7 @@ reexecute:

      case s_header_value_start:
      {
-        MARK(header_value);
-
+        parser->mark = p;
        UPDATE_STATE(s_header_value);
        parser->index = 0;

@ -1695,7 +1681,7 @@ reexecute:
          }

          /* header value was empty */
-          MARK(header_value);
+          parser->mark = p;
          UPDATE_STATE(s_header_field_start);
          CALLBACK_DATA_NOADVANCE(header_value);
          REEXECUTE();
@ -1830,7 +1816,7 @@ reexecute:
         * Further, if content_length ends up at 0, we want to see the last
         * byte again for our message complete callback.
         */
-        MARK(body);
+        parser->mark = p;
        parser->content_length -= to_read;
        p += to_read - 1;
 		
@ -1846,18 +1832,19 @@ reexecute:
           * complete-on-length. It's not clear that this distinction is
           * important for applications, but let's keep it for now.
           */
-          CALLBACK_DATA_(body, p - body_mark + 1, p - data);
+          CALLBACK_DATA_(body, p - parser->mark + 1, p - data);
          REEXECUTE();
        }

+		CALLBACK_DATA_(body, p - parser->mark + 1, p - data);
        break;
      }

      /* read until EOF */
      case s_body_identity_eof:
-        MARK(body);
+        parser->mark = p;
        p = data + len - 1;
-
+		CALLBACK_DATA_(body, p - parser->mark + 1, p - data);
        break;

      case s_message_done:
@ -1962,7 +1949,7 @@ reexecute:
        /* See the explanation in s_body_identity for why the content
         * length and data pointers are managed this way.
         */
-        MARK(body);
+        parser->mark = p;
        parser->content_length -= to_read;
        p += to_read - 1;

@ -1970,6 +1957,7 @@ reexecute:
          UPDATE_STATE(s_chunk_data_almost_done);
        }
 		
+		CALLBACK_DATA_(body, p - parser->mark + 1, p - data);
        break;
      }

@ -1996,28 +1984,6 @@ reexecute:
    }
  }

-  /* Run callbacks for any marks that we have leftover after we ran our of
-   * bytes. There should be at most one of these set, so it's OK to invoke
-   * them in series (unset marks will not result in callbacks).
-   *
-   * We use the NOADVANCE() variety of callbacks here because 'p' has already
-   * overflowed 'data' and this allows us to correct for the off-by-one that
-   * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
-   * value that's in-bounds).
-   */
-
-  assert(((header_field_mark ? 1 : 0) +
-          (header_value_mark ? 1 : 0) +
-          (url_mark ? 1 : 0)  +
-          (body_mark ? 1 : 0) +
-          (status_mark ? 1 : 0)) <= 1);
-
-  CALLBACK_DATA_NOADVANCE(header_field);
-  CALLBACK_DATA_NOADVANCE(header_value);
-  CALLBACK_DATA_NOADVANCE(url);
-  CALLBACK_DATA_NOADVANCE(body);
-  CALLBACK_DATA_NOADVANCE(status);
-
  RETURN(len);

 error:
--- a/src/include/internal.h
+++ b/src/include/internal.h
@ -13,8 +13,7 @@

 #define UH_CON_CLOSE                (1 << 0)
 #define UH_CON_SSL_HANDSHAKE_DONE   (1 << 1)    /* SSL hanshake has completed */
-#define UH_CON_PARSERING            (1 << 2)    /* Whether executed http_parser_execute() */
-#define UH_CON_REUSE                (1 << 3)
+#define UH_CON_REUSE                (1 << 2)

 #define likely(x)   (__builtin_expect(!!(x), 1))
 #define unlikely(x) (__builtin_expect(!!(x), 0))
--- a/src/include/uhttp/http_parser.h
+++ b/src/include/uhttp/http_parser.h
@ -297,6 +297,7 @@ struct http_parser {
  unsigned int index : 7;        /* index into current matcher */
  unsigned int lenient_http_headers : 1;
  
+  const char *mark;				/* Marks the start position of the current matcher */
  uint32_t nread;          /* # bytes read in various scenarios */
  uint64_t content_length; /* # bytes in body (0 if no Content-Length header) */

--- a/src/uhttp.c
+++ b/src/uhttp.c
@ -232,20 +232,6 @@ handshake_done:

    uh_log_debug("read:[%.*s]", len, base);

-    if (!(con->flags & UH_CON_PARSERING)) {
-        if (!memmem(buf->base, buf->len, "\r\n\r\n", 4)) {
-            if (buf->len > UH_HEAD_SIZE_LIMIT) {
-                uh_log_err("HTTP head size too big");
-                uh_send_error(con, HTTP_STATUS_BAD_REQUEST, NULL);
-            }
-            return;
-        }
-        
-        base = buf->base;
-        len = buf->len;
-        con->flags |= UH_CON_PARSERING;
-    }
-
    parsered = http_parser_execute(&con->parser, &parser_settings, base, len);

    if (unlikely(con->flags & UH_CON_CLOSE))