@@ -241,6 +241,41 @@ static const char *is_complete(const char *buf, const char *buf_end, size_t last
241241 *valp_ += res_; \
242242 } while (0)
243243
244+ /* returned pointer is always within [buf, buf_end), or null */
245+ static const char * parse_token (const char * buf , const char * buf_end , const char * * token , size_t * token_len , char next_char ,
246+ int * ret )
247+ {
248+ /* We use pcmpestri to detect non-token characters. This instruction can take no more than eight character ranges (8*2*8=128
249+ * bits that is the size of a SSE register). Due to this restriction, characters `|` and `~` are handled in the slow loop. */
250+ static const char ALIGNED (16 ) ranges [] = "\x00 " /* control chars and up to SP */
251+ "\"\"" /* 0x22 */
252+ "()" /* 0x28,0x29 */
253+ ",," /* 0x2c */
254+ "//" /* 0x2f */
255+ ":@" /* 0x3a-0x40 */
256+ "[]" /* 0x5b-0x5d */
257+ "{\xff" ; /* 0x7b-0xff */
258+ const char * buf_start = buf ;
259+ int found ;
260+ buf = findchar_fast (buf , buf_end , ranges , sizeof (ranges ) - 1 , & found );
261+ if (!found ) {
262+ CHECK_EOF ();
263+ }
264+ while (1 ) {
265+ if (* buf == next_char ) {
266+ break ;
267+ } else if (!token_char_map [(unsigned char )* buf ]) {
268+ * ret = -1 ;
269+ return NULL ;
270+ }
271+ ++ buf ;
272+ CHECK_EOF ();
273+ }
274+ * token = buf_start ;
275+ * token_len = buf - buf_start ;
276+ return buf ;
277+ }
278+
244279/* returned pointer is always within [buf, buf_end), or null */
245280static const char * parse_http_version (const char * buf , const char * buf_end , int * minor_version , int * ret )
246281{
@@ -280,31 +315,10 @@ static const char *parse_headers(const char *buf, const char *buf_end, struct ph
280315 if (!(* num_headers != 0 && (* buf == ' ' || * buf == '\t' ))) {
281316 /* parsing name, but do not discard SP before colon, see
282317 * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
283- headers [* num_headers ].name = buf ;
284- static const char ALIGNED (16 ) ranges1 [] = "\x00 " /* control chars and up to SP */
285- "\"\"" /* 0x22 */
286- "()" /* 0x28,0x29 */
287- ",," /* 0x2c */
288- "//" /* 0x2f */
289- ":@" /* 0x3a-0x40 */
290- "[]" /* 0x5b-0x5d */
291- "{\377" ; /* 0x7b-0xff */
292- int found ;
293- buf = findchar_fast (buf , buf_end , ranges1 , sizeof (ranges1 ) - 1 , & found );
294- if (!found ) {
295- CHECK_EOF ();
296- }
297- while (1 ) {
298- if (* buf == ':' ) {
299- break ;
300- } else if (!token_char_map [(unsigned char )* buf ]) {
301- * ret = -1 ;
302- return NULL ;
303- }
304- ++ buf ;
305- CHECK_EOF ();
318+ if ((buf = parse_token (buf , buf_end , & headers [* num_headers ].name , & headers [* num_headers ].name_len , ':' , ret )) == NULL ) {
319+ return NULL ;
306320 }
307- if (( headers [* num_headers ].name_len = buf - headers [ * num_headers ]. name ) == 0 ) {
321+ if (headers [* num_headers ].name_len == 0 ) {
308322 * ret = -1 ;
309323 return NULL ;
310324 }
@@ -352,13 +366,17 @@ static const char *parse_request(const char *buf, const char *buf_end, const cha
352366 }
353367
354368 /* parse request line */
355- ADVANCE_TOKEN (* method , * method_len );
369+ if ((buf = parse_token (buf , buf_end , method , method_len , ' ' , ret )) == NULL ) {
370+ return NULL ;
371+ }
356372 do {
357373 ++ buf ;
374+ CHECK_EOF ();
358375 } while (* buf == ' ' );
359376 ADVANCE_TOKEN (* path , * path_len );
360377 do {
361378 ++ buf ;
379+ CHECK_EOF ();
362380 } while (* buf == ' ' );
363381 if (* method_len == 0 || * path_len == 0 ) {
364382 * ret = -1 ;
@@ -422,6 +440,7 @@ static const char *parse_response(const char *buf, const char *buf_end, int *min
422440 }
423441 do {
424442 ++ buf ;
443+ CHECK_EOF ();
425444 } while (* buf == ' ' );
426445 /* parse status code, we want at least [:digit:][:digit:][:digit:]<other char> to try to parse */
427446 if (buf_end - buf < 4 ) {
@@ -430,14 +449,15 @@ static const char *parse_response(const char *buf, const char *buf_end, int *min
430449 }
431450 PARSE_INT_3 (status );
432451
433- /* get message includig preceding space */
452+ /* get message including preceding space */
434453 if ((buf = get_token_to_eol (buf , buf_end , msg , msg_len , ret )) == NULL ) {
435454 return NULL ;
436455 }
437456 if (* msg_len == 0 ) {
438457 /* ok */
439458 } else if (* * msg == ' ' ) {
440- /* remove preceding space */
459+ /* Remove preceding space. Successful return from `get_token_to_eol` guarantees that we would hit something other than SP
460+ * before running past the end of the given buffer. */
441461 do {
442462 ++ * msg ;
443463 -- * msg_len ;
@@ -525,6 +545,8 @@ ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_
525545 size_t dst = 0 , src = 0 , bufsz = * _bufsz ;
526546 ssize_t ret = -2 ; /* incomplete */
527547
548+ decoder -> _total_read += bufsz ;
549+
528550 while (1 ) {
529551 switch (decoder -> _state ) {
530552 case CHUNKED_IN_CHUNK_SIZE :
@@ -537,6 +559,18 @@ ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_
537559 ret = -1 ;
538560 goto Exit ;
539561 }
562+ /* the only characters that may appear after the chunk size are BWS, semicolon, or CRLF */
563+ switch (buf [src ]) {
564+ case ' ' :
565+ case '\011' :
566+ case ';' :
567+ case '\012' :
568+ case '\015' :
569+ break ;
570+ default :
571+ ret = -1 ;
572+ goto Exit ;
573+ }
540574 break ;
541575 }
542576 if (decoder -> _hex_count == sizeof (size_t ) * 2 ) {
@@ -632,6 +666,12 @@ ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_
632666 if (dst != src )
633667 memmove (buf + dst , buf + src , bufsz - src );
634668 * _bufsz = dst ;
669+ /* if incomplete but the overhead of the chunked encoding is >=100KB and >80%, signal an error */
670+ if (ret == -2 ) {
671+ decoder -> _total_overhead += bufsz - dst ;
672+ if (decoder -> _total_overhead >= 100 * 1024 && decoder -> _total_read - decoder -> _total_overhead < decoder -> _total_read / 4 )
673+ ret = -1 ;
674+ }
635675 return ret ;
636676}
637677
0 commit comments