@@ -241,6 +241,41 @@ static const char *is_complete(const char *buf, const char *buf_end, size_t last
241
241
*valp_ += res_; \
242
242
} while (0)
243
243
244
+ /* returned pointer is always within [buf, buf_end), or null */
245
+ static const char * parse_token (const char * buf , const char * buf_end , const char * * token , size_t * token_len , char next_char ,
246
+ int * ret )
247
+ {
248
+ /* We use pcmpestri to detect non-token characters. This instruction can take no more than eight character ranges (8*2*8=128
249
+ * bits that is the size of a SSE register). Due to this restriction, characters `|` and `~` are handled in the slow loop. */
250
+ static const char ALIGNED (16 ) ranges [] = "\x00 " /* control chars and up to SP */
251
+ "\"\"" /* 0x22 */
252
+ "()" /* 0x28,0x29 */
253
+ ",," /* 0x2c */
254
+ "//" /* 0x2f */
255
+ ":@" /* 0x3a-0x40 */
256
+ "[]" /* 0x5b-0x5d */
257
+ "{\xff" ; /* 0x7b-0xff */
258
+ const char * buf_start = buf ;
259
+ int found ;
260
+ buf = findchar_fast (buf , buf_end , ranges , sizeof (ranges ) - 1 , & found );
261
+ if (!found ) {
262
+ CHECK_EOF ();
263
+ }
264
+ while (1 ) {
265
+ if (* buf == next_char ) {
266
+ break ;
267
+ } else if (!token_char_map [(unsigned char )* buf ]) {
268
+ * ret = -1 ;
269
+ return NULL ;
270
+ }
271
+ ++ buf ;
272
+ CHECK_EOF ();
273
+ }
274
+ * token = buf_start ;
275
+ * token_len = buf - buf_start ;
276
+ return buf ;
277
+ }
278
+
244
279
/* returned pointer is always within [buf, buf_end), or null */
245
280
static const char * parse_http_version (const char * buf , const char * buf_end , int * minor_version , int * ret )
246
281
{
@@ -280,31 +315,10 @@ static const char *parse_headers(const char *buf, const char *buf_end, struct ph
280
315
if (!(* num_headers != 0 && (* buf == ' ' || * buf == '\t' ))) {
281
316
/* parsing name, but do not discard SP before colon, see
282
317
* http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
283
- headers [* num_headers ].name = buf ;
284
- static const char ALIGNED (16 ) ranges1 [] = "\x00 " /* control chars and up to SP */
285
- "\"\"" /* 0x22 */
286
- "()" /* 0x28,0x29 */
287
- ",," /* 0x2c */
288
- "//" /* 0x2f */
289
- ":@" /* 0x3a-0x40 */
290
- "[]" /* 0x5b-0x5d */
291
- "{\377" ; /* 0x7b-0xff */
292
- int found ;
293
- buf = findchar_fast (buf , buf_end , ranges1 , sizeof (ranges1 ) - 1 , & found );
294
- if (!found ) {
295
- CHECK_EOF ();
296
- }
297
- while (1 ) {
298
- if (* buf == ':' ) {
299
- break ;
300
- } else if (!token_char_map [(unsigned char )* buf ]) {
301
- * ret = -1 ;
302
- return NULL ;
303
- }
304
- ++ buf ;
305
- CHECK_EOF ();
318
+ if ((buf = parse_token (buf , buf_end , & headers [* num_headers ].name , & headers [* num_headers ].name_len , ':' , ret )) == NULL ) {
319
+ return NULL ;
306
320
}
307
- if (( headers [* num_headers ].name_len = buf - headers [ * num_headers ]. name ) == 0 ) {
321
+ if (headers [* num_headers ].name_len == 0 ) {
308
322
* ret = -1 ;
309
323
return NULL ;
310
324
}
@@ -352,13 +366,17 @@ static const char *parse_request(const char *buf, const char *buf_end, const cha
352
366
}
353
367
354
368
/* parse request line */
355
- ADVANCE_TOKEN (* method , * method_len );
369
+ if ((buf = parse_token (buf , buf_end , method , method_len , ' ' , ret )) == NULL ) {
370
+ return NULL ;
371
+ }
356
372
do {
357
373
++ buf ;
374
+ CHECK_EOF ();
358
375
} while (* buf == ' ' );
359
376
ADVANCE_TOKEN (* path , * path_len );
360
377
do {
361
378
++ buf ;
379
+ CHECK_EOF ();
362
380
} while (* buf == ' ' );
363
381
if (* method_len == 0 || * path_len == 0 ) {
364
382
* ret = -1 ;
@@ -422,6 +440,7 @@ static const char *parse_response(const char *buf, const char *buf_end, int *min
422
440
}
423
441
do {
424
442
++ buf ;
443
+ CHECK_EOF ();
425
444
} while (* buf == ' ' );
426
445
/* parse status code, we want at least [:digit:][:digit:][:digit:]<other char> to try to parse */
427
446
if (buf_end - buf < 4 ) {
@@ -430,14 +449,15 @@ static const char *parse_response(const char *buf, const char *buf_end, int *min
430
449
}
431
450
PARSE_INT_3 (status );
432
451
433
- /* get message includig preceding space */
452
+ /* get message including preceding space */
434
453
if ((buf = get_token_to_eol (buf , buf_end , msg , msg_len , ret )) == NULL ) {
435
454
return NULL ;
436
455
}
437
456
if (* msg_len == 0 ) {
438
457
/* ok */
439
458
} else if (* * msg == ' ' ) {
440
- /* remove preceding space */
459
+ /* Remove preceding space. Successful return from `get_token_to_eol` guarantees that we would hit something other than SP
460
+ * before running past the end of the given buffer. */
441
461
do {
442
462
++ * msg ;
443
463
-- * msg_len ;
@@ -525,6 +545,8 @@ ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_
525
545
size_t dst = 0 , src = 0 , bufsz = * _bufsz ;
526
546
ssize_t ret = -2 ; /* incomplete */
527
547
548
+ decoder -> _total_read += bufsz ;
549
+
528
550
while (1 ) {
529
551
switch (decoder -> _state ) {
530
552
case CHUNKED_IN_CHUNK_SIZE :
@@ -537,6 +559,18 @@ ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_
537
559
ret = -1 ;
538
560
goto Exit ;
539
561
}
562
+ /* the only characters that may appear after the chunk size are BWS, semicolon, or CRLF */
563
+ switch (buf [src ]) {
564
+ case ' ' :
565
+ case '\011' :
566
+ case ';' :
567
+ case '\012' :
568
+ case '\015' :
569
+ break ;
570
+ default :
571
+ ret = -1 ;
572
+ goto Exit ;
573
+ }
540
574
break ;
541
575
}
542
576
if (decoder -> _hex_count == sizeof (size_t ) * 2 ) {
@@ -632,6 +666,12 @@ ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_
632
666
if (dst != src )
633
667
memmove (buf + dst , buf + src , bufsz - src );
634
668
* _bufsz = dst ;
669
+ /* if incomplete but the overhead of the chunked encoding is >=100KB and >80%, signal an error */
670
+ if (ret == -2 ) {
671
+ decoder -> _total_overhead += bufsz - dst ;
672
+ if (decoder -> _total_overhead >= 100 * 1024 && decoder -> _total_read - decoder -> _total_overhead < decoder -> _total_read / 4 )
673
+ ret = -1 ;
674
+ }
635
675
return ret ;
636
676
}
637
677
0 commit comments