Skip to content

Commit 09c35ac

Browse files
authored
thirdparty: update picohttpparser (#20843)
1 parent a21658b commit 09c35ac

File tree

2 files changed

+74
-31
lines changed

2 files changed

+74
-31
lines changed

thirdparty/picohttpparser/src/picohttpparser.c

Lines changed: 67 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,41 @@ static const char *is_complete(const char *buf, const char *buf_end, size_t last
241241
*valp_ += res_; \
242242
} while (0)
243243

244+
/* returned pointer is always within [buf, buf_end), or null */
245+
static const char *parse_token(const char *buf, const char *buf_end, const char **token, size_t *token_len, char next_char,
246+
int *ret)
247+
{
248+
/* We use pcmpestri to detect non-token characters. This instruction can take no more than eight character ranges (8*2*8=128
249+
* bits that is the size of a SSE register). Due to this restriction, characters `|` and `~` are handled in the slow loop. */
250+
static const char ALIGNED(16) ranges[] = "\x00 " /* control chars and up to SP */
251+
"\"\"" /* 0x22 */
252+
"()" /* 0x28,0x29 */
253+
",," /* 0x2c */
254+
"//" /* 0x2f */
255+
":@" /* 0x3a-0x40 */
256+
"[]" /* 0x5b-0x5d */
257+
"{\xff"; /* 0x7b-0xff */
258+
const char *buf_start = buf;
259+
int found;
260+
buf = findchar_fast(buf, buf_end, ranges, sizeof(ranges) - 1, &found);
261+
if (!found) {
262+
CHECK_EOF();
263+
}
264+
while (1) {
265+
if (*buf == next_char) {
266+
break;
267+
} else if (!token_char_map[(unsigned char)*buf]) {
268+
*ret = -1;
269+
return NULL;
270+
}
271+
++buf;
272+
CHECK_EOF();
273+
}
274+
*token = buf_start;
275+
*token_len = buf - buf_start;
276+
return buf;
277+
}
278+
244279
/* returned pointer is always within [buf, buf_end), or null */
245280
static const char *parse_http_version(const char *buf, const char *buf_end, int *minor_version, int *ret)
246281
{
@@ -280,31 +315,10 @@ static const char *parse_headers(const char *buf, const char *buf_end, struct ph
280315
if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {
281316
/* parsing name, but do not discard SP before colon, see
282317
* http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
283-
headers[*num_headers].name = buf;
284-
static const char ALIGNED(16) ranges1[] = "\x00 " /* control chars and up to SP */
285-
"\"\"" /* 0x22 */
286-
"()" /* 0x28,0x29 */
287-
",," /* 0x2c */
288-
"//" /* 0x2f */
289-
":@" /* 0x3a-0x40 */
290-
"[]" /* 0x5b-0x5d */
291-
"{\377"; /* 0x7b-0xff */
292-
int found;
293-
buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
294-
if (!found) {
295-
CHECK_EOF();
296-
}
297-
while (1) {
298-
if (*buf == ':') {
299-
break;
300-
} else if (!token_char_map[(unsigned char)*buf]) {
301-
*ret = -1;
302-
return NULL;
303-
}
304-
++buf;
305-
CHECK_EOF();
318+
if ((buf = parse_token(buf, buf_end, &headers[*num_headers].name, &headers[*num_headers].name_len, ':', ret)) == NULL) {
319+
return NULL;
306320
}
307-
if ((headers[*num_headers].name_len = buf - headers[*num_headers].name) == 0) {
321+
if (headers[*num_headers].name_len == 0) {
308322
*ret = -1;
309323
return NULL;
310324
}
@@ -352,13 +366,17 @@ static const char *parse_request(const char *buf, const char *buf_end, const cha
352366
}
353367

354368
/* parse request line */
355-
ADVANCE_TOKEN(*method, *method_len);
369+
if ((buf = parse_token(buf, buf_end, method, method_len, ' ', ret)) == NULL) {
370+
return NULL;
371+
}
356372
do {
357373
++buf;
374+
CHECK_EOF();
358375
} while (*buf == ' ');
359376
ADVANCE_TOKEN(*path, *path_len);
360377
do {
361378
++buf;
379+
CHECK_EOF();
362380
} while (*buf == ' ');
363381
if (*method_len == 0 || *path_len == 0) {
364382
*ret = -1;
@@ -422,6 +440,7 @@ static const char *parse_response(const char *buf, const char *buf_end, int *min
422440
}
423441
do {
424442
++buf;
443+
CHECK_EOF();
425444
} while (*buf == ' ');
426445
/* parse status code, we want at least [:digit:][:digit:][:digit:]<other char> to try to parse */
427446
if (buf_end - buf < 4) {
@@ -430,14 +449,15 @@ static const char *parse_response(const char *buf, const char *buf_end, int *min
430449
}
431450
PARSE_INT_3(status);
432451

433-
/* get message includig preceding space */
452+
/* get message including preceding space */
434453
if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
435454
return NULL;
436455
}
437456
if (*msg_len == 0) {
438457
/* ok */
439458
} else if (**msg == ' ') {
440-
/* remove preceding space */
459+
/* Remove preceding space. Successful return from `get_token_to_eol` guarantees that we would hit something other than SP
460+
* before running past the end of the given buffer. */
441461
do {
442462
++*msg;
443463
--*msg_len;
@@ -525,6 +545,8 @@ ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_
525545
size_t dst = 0, src = 0, bufsz = *_bufsz;
526546
ssize_t ret = -2; /* incomplete */
527547

548+
decoder->_total_read += bufsz;
549+
528550
while (1) {
529551
switch (decoder->_state) {
530552
case CHUNKED_IN_CHUNK_SIZE:
@@ -537,6 +559,18 @@ ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_
537559
ret = -1;
538560
goto Exit;
539561
}
562+
/* the only characters that may appear after the chunk size are BWS, semicolon, or CRLF */
563+
switch (buf[src]) {
564+
case ' ':
565+
case '\011':
566+
case ';':
567+
case '\012':
568+
case '\015':
569+
break;
570+
default:
571+
ret = -1;
572+
goto Exit;
573+
}
540574
break;
541575
}
542576
if (decoder->_hex_count == sizeof(size_t) * 2) {
@@ -632,6 +666,12 @@ ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_
632666
if (dst != src)
633667
memmove(buf + dst, buf + src, bufsz - src);
634668
*_bufsz = dst;
669+
/* if incomplete but the overhead of the chunked encoding is >=100KB and >80%, signal an error */
670+
if (ret == -2) {
671+
decoder->_total_overhead += bufsz - dst;
672+
if (decoder->_total_overhead >= 100 * 1024 && decoder->_total_read - decoder->_total_overhead < decoder->_total_read / 4)
673+
ret = -1;
674+
}
635675
return ret;
636676
}
637677

thirdparty/picohttpparser/src/picohttpparser.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#ifndef picohttpparser_h
2828
#define picohttpparser_h
2929

30+
#include <stdint.h>
3031
#include <sys/types.h>
3132

3233
#ifdef _MSC_VER
@@ -39,12 +40,12 @@ extern "C" {
3940

4041
/* contains name and value of a header (name == NULL if is a continuing line
4142
* of a multiline header */
42-
typedef struct phr_header {
43+
struct phr_header {
4344
const char *name;
4445
size_t name_len;
4546
const char *value;
4647
size_t value_len;
47-
}phr_header;
48+
};
4849

4950
/* returns number of bytes consumed if successful, -2 if request is partial,
5051
* -1 if failed */
@@ -64,6 +65,8 @@ struct phr_chunked_decoder {
6465
char consume_trailer; /* if trailing headers should be consumed */
6566
char _hex_count;
6667
char _state;
68+
uint64_t _total_read;
69+
uint64_t _total_overhead;
6770
};
6871

6972
/* the function rewrites the buffer given as (buf, bufsz) removing the chunked-
@@ -72,8 +75,8 @@ struct phr_chunked_decoder {
7275
* repeatedly call the function while it returns -2 (incomplete) every time
7376
* supplying newly arrived data. If the end of the chunked-encoded data is
7477
* found, the function returns a non-negative number indicating the number of
75-
* octets left undecoded at the tail of the supplied buffer. Returns -1 on
76-
* error.
78+
* octets left undecoded, that starts from the offset returned by `*bufsz`.
79+
* Returns -1 on error.
7780
*/
7881
ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *bufsz);
7982

0 commit comments

Comments
 (0)