Optimizing http-parser

Signed-off-by: Jianhui Zhao <jianhuizhao329@gmail.com>
main
Jianhui Zhao 2017-11-26 23:52:41 +08:00
parent 650f545c20
commit 34d052d4c0
4 changed files with 24 additions and 72 deletions

View File

@ -103,11 +103,11 @@ do { \
do { \ do { \
assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \ assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
\ \
if (FOR##_mark) { \ if (parser->mark) { \
if (LIKELY(settings->on_##FOR)) { \ if (LIKELY(settings->on_##FOR)) { \
parser->state = CURRENT_STATE(); \ parser->state = CURRENT_STATE(); \
if (UNLIKELY(0 != \ if (UNLIKELY(0 != \
settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \ settings->on_##FOR(parser, parser->mark, (LEN)))) { \
SET_ERRNO(HPE_CB_##FOR); \ SET_ERRNO(HPE_CB_##FOR); \
} \ } \
UPDATE_STATE(parser->state); \ UPDATE_STATE(parser->state); \
@ -117,25 +117,17 @@ do { \
return (ER); \ return (ER); \
} \ } \
} \ } \
FOR##_mark = NULL; \ parser->mark = NULL; \
} \ } \
} while (0) } while (0)
/* Run the data callback FOR and consume the current byte */ /* Run the data callback FOR and consume the current byte */
#define CALLBACK_DATA(FOR) \ #define CALLBACK_DATA(FOR) \
CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1) CALLBACK_DATA_(FOR, p - parser->mark, p - data + 1)
/* Run the data callback FOR and don't consume the current byte */ /* Run the data callback FOR and don't consume the current byte */
#define CALLBACK_DATA_NOADVANCE(FOR) \ #define CALLBACK_DATA_NOADVANCE(FOR) \
CALLBACK_DATA_(FOR, p - FOR##_mark, p - data) CALLBACK_DATA_(FOR, p - parser->mark, p - data)
/* Set the mark FOR; non-destructive if mark is already set */
#define MARK(FOR) \
do { \
if (!FOR##_mark) { \
FOR##_mark = p; \
} \
} while (0)
/* Don't allow the total size of the HTTP headers (including the status /* Don't allow the total size of the HTTP headers (including the status
* line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect
@ -636,11 +628,6 @@ size_t http_parser_execute (http_parser *parser,
char c, ch; char c, ch;
int8_t unhex_val; int8_t unhex_val;
const char *p = data; const char *p = data;
const char *header_field_mark = 0;
const char *header_value_mark = 0;
const char *url_mark = 0;
const char *body_mark = 0;
const char *status_mark = 0;
enum state p_state = (enum state) parser->state; enum state p_state = (enum state) parser->state;
const unsigned int lenient = parser->lenient_http_headers; const unsigned int lenient = parser->lenient_http_headers;
@ -670,7 +657,7 @@ size_t http_parser_execute (http_parser *parser,
} }
} }
#if 0
if (CURRENT_STATE() == s_header_field) if (CURRENT_STATE() == s_header_field)
header_field_mark = data; header_field_mark = data;
if (CURRENT_STATE() == s_header_value) if (CURRENT_STATE() == s_header_value)
@ -695,6 +682,7 @@ size_t http_parser_execute (http_parser *parser,
default: default:
break; break;
} }
#endif
for (p=data; p != data + len; p++) { for (p=data; p != data + len; p++) {
ch = *p; ch = *p;
@ -884,7 +872,7 @@ reexecute:
case s_res_status_start: case s_res_status_start:
{ {
MARK(status); parser->mark = p;
UPDATE_STATE(s_res_status); UPDATE_STATE(s_res_status);
parser->index = 0; parser->index = 0;
@ -1013,7 +1001,7 @@ reexecute:
{ {
if (ch == ' ') break; if (ch == ' ') break;
MARK(url); parser->mark = p;
if (parser->method == HTTP_CONNECT) { if (parser->method == HTTP_CONNECT) {
UPDATE_STATE(s_req_server_start); UPDATE_STATE(s_req_server_start);
} }
@ -1196,8 +1184,7 @@ reexecute:
goto error; goto error;
} }
MARK(header_field); parser->mark = p;
parser->index = 0; parser->index = 0;
UPDATE_STATE(s_header_field); UPDATE_STATE(s_header_field);
@ -1371,8 +1358,7 @@ reexecute:
case s_header_value_start: case s_header_value_start:
{ {
MARK(header_value); parser->mark = p;
UPDATE_STATE(s_header_value); UPDATE_STATE(s_header_value);
parser->index = 0; parser->index = 0;
@ -1695,7 +1681,7 @@ reexecute:
} }
/* header value was empty */ /* header value was empty */
MARK(header_value); parser->mark = p;
UPDATE_STATE(s_header_field_start); UPDATE_STATE(s_header_field_start);
CALLBACK_DATA_NOADVANCE(header_value); CALLBACK_DATA_NOADVANCE(header_value);
REEXECUTE(); REEXECUTE();
@ -1830,10 +1816,10 @@ reexecute:
* Further, if content_length ends up at 0, we want to see the last * Further, if content_length ends up at 0, we want to see the last
* byte again for our message complete callback. * byte again for our message complete callback.
*/ */
MARK(body); parser->mark = p;
parser->content_length -= to_read; parser->content_length -= to_read;
p += to_read - 1; p += to_read - 1;
if (parser->content_length == 0) { if (parser->content_length == 0) {
UPDATE_STATE(s_message_done); UPDATE_STATE(s_message_done);
@ -1846,18 +1832,19 @@ reexecute:
* complete-on-length. It's not clear that this distinction is * complete-on-length. It's not clear that this distinction is
* important for applications, but let's keep it for now. * important for applications, but let's keep it for now.
*/ */
CALLBACK_DATA_(body, p - body_mark + 1, p - data); CALLBACK_DATA_(body, p - parser->mark + 1, p - data);
REEXECUTE(); REEXECUTE();
} }
CALLBACK_DATA_(body, p - parser->mark + 1, p - data);
break; break;
} }
/* read until EOF */ /* read until EOF */
case s_body_identity_eof: case s_body_identity_eof:
MARK(body); parser->mark = p;
p = data + len - 1; p = data + len - 1;
CALLBACK_DATA_(body, p - parser->mark + 1, p - data);
break; break;
case s_message_done: case s_message_done:
@ -1962,14 +1949,15 @@ reexecute:
/* See the explanation in s_body_identity for why the content /* See the explanation in s_body_identity for why the content
* length and data pointers are managed this way. * length and data pointers are managed this way.
*/ */
MARK(body); parser->mark = p;
parser->content_length -= to_read; parser->content_length -= to_read;
p += to_read - 1; p += to_read - 1;
if (parser->content_length == 0) { if (parser->content_length == 0) {
UPDATE_STATE(s_chunk_data_almost_done); UPDATE_STATE(s_chunk_data_almost_done);
} }
CALLBACK_DATA_(body, p - parser->mark + 1, p - data);
break; break;
} }
@ -1996,28 +1984,6 @@ reexecute:
} }
} }
/* Run callbacks for any marks that we have leftover after we ran our of
* bytes. There should be at most one of these set, so it's OK to invoke
* them in series (unset marks will not result in callbacks).
*
* We use the NOADVANCE() variety of callbacks here because 'p' has already
* overflowed 'data' and this allows us to correct for the off-by-one that
* we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
* value that's in-bounds).
*/
assert(((header_field_mark ? 1 : 0) +
(header_value_mark ? 1 : 0) +
(url_mark ? 1 : 0) +
(body_mark ? 1 : 0) +
(status_mark ? 1 : 0)) <= 1);
CALLBACK_DATA_NOADVANCE(header_field);
CALLBACK_DATA_NOADVANCE(header_value);
CALLBACK_DATA_NOADVANCE(url);
CALLBACK_DATA_NOADVANCE(body);
CALLBACK_DATA_NOADVANCE(status);
RETURN(len); RETURN(len);
error: error:

View File

@ -13,8 +13,7 @@
#define UH_CON_CLOSE (1 << 0) #define UH_CON_CLOSE (1 << 0)
#define UH_CON_SSL_HANDSHAKE_DONE (1 << 1) /* SSL hanshake has completed */ #define UH_CON_SSL_HANDSHAKE_DONE (1 << 1) /* SSL hanshake has completed */
#define UH_CON_PARSERING (1 << 2) /* Whether executed http_parser_execute() */ #define UH_CON_REUSE (1 << 2)
#define UH_CON_REUSE (1 << 3)
#define likely(x) (__builtin_expect(!!(x), 1)) #define likely(x) (__builtin_expect(!!(x), 1))
#define unlikely(x) (__builtin_expect(!!(x), 0)) #define unlikely(x) (__builtin_expect(!!(x), 0))

View File

@ -296,7 +296,8 @@ struct http_parser {
unsigned int header_state : 7; /* enum header_state from http_parser.c */ unsigned int header_state : 7; /* enum header_state from http_parser.c */
unsigned int index : 7; /* index into current matcher */ unsigned int index : 7; /* index into current matcher */
unsigned int lenient_http_headers : 1; unsigned int lenient_http_headers : 1;
const char *mark; /* Marks the start position of the current matcher */
uint32_t nread; /* # bytes read in various scenarios */ uint32_t nread; /* # bytes read in various scenarios */
uint64_t content_length; /* # bytes in body (0 if no Content-Length header) */ uint64_t content_length; /* # bytes in body (0 if no Content-Length header) */

View File

@ -232,20 +232,6 @@ handshake_done:
uh_log_debug("read:[%.*s]", len, base); uh_log_debug("read:[%.*s]", len, base);
if (!(con->flags & UH_CON_PARSERING)) {
if (!memmem(buf->base, buf->len, "\r\n\r\n", 4)) {
if (buf->len > UH_HEAD_SIZE_LIMIT) {
uh_log_err("HTTP head size too big");
uh_send_error(con, HTTP_STATUS_BAD_REQUEST, NULL);
}
return;
}
base = buf->base;
len = buf->len;
con->flags |= UH_CON_PARSERING;
}
parsered = http_parser_execute(&con->parser, &parser_settings, base, len); parsered = http_parser_execute(&con->parser, &parser_settings, base, len);
if (unlikely(con->flags & UH_CON_CLOSE)) if (unlikely(con->flags & UH_CON_CLOSE))