Skip to content
Permalink
Browse files Browse the repository at this point in the history
Merge pull request from GHSA-c2jg-hw38-jrqq
10323: Inconsistent Interpretation of HTTP Requests ('HTTP Request Smuggling') in twisted.web
  • Loading branch information
adiroiban committed Apr 4, 2022
2 parents de8e5a6 + a513804 commit 592217e
Show file tree
Hide file tree
Showing 3 changed files with 278 additions and 13 deletions.
95 changes: 86 additions & 9 deletions src/twisted/web/http.py
Expand Up @@ -108,7 +108,7 @@
import time
import warnings
from io import BytesIO
from typing import AnyStr, Callable, Optional
from typing import AnyStr, Callable, Optional, Tuple
from urllib.parse import (
ParseResultBytes,
unquote_to_bytes as unquote,
Expand Down Expand Up @@ -410,10 +410,39 @@ def toChunk(data):
return (networkString(f"{len(data):x}"), b"\r\n", data, b"\r\n")


def fromChunk(data):
def _ishexdigits(b: bytes) -> bool:
"""
Is the string case-insensitively hexidecimal?
It must be composed of one or more characters in the ranges a-f, A-F
and 0-9.
"""
for c in b:
if c not in b"0123456789abcdefABCDEF":
return False
return b != b""


def _hexint(b: bytes) -> int:
"""
Decode a hexadecimal integer.
Unlike L{int(b, 16)}, this raises L{ValueError} when the integer has
a prefix like C{b'0x'}, C{b'+'}, or C{b'-'}, which is desirable when
parsing network protocols.
"""
if not _ishexdigits(b):
raise ValueError(b)
return int(b, 16)


def fromChunk(data: bytes) -> Tuple[bytes, bytes]:
"""
Convert chunk to string.
Note that this function is not specification compliant: it doesn't handle
chunk extensions.
@type data: C{bytes}
@return: tuple of (result, remaining) - both C{bytes}.
Expand All @@ -422,7 +451,7 @@ def fromChunk(data):
byte string.
"""
prefix, rest = data.split(b"\r\n", 1)
length = int(prefix, 16)
length = _hexint(prefix)
if length < 0:
raise ValueError("Chunk length must be >= 0, not %d" % (length,))
if rest[length : length + 2] != b"\r\n":
Expand Down Expand Up @@ -1790,6 +1819,47 @@ def noMoreData(self):
maxChunkSizeLineLength = 1024


_chunkExtChars = (
b"\t !\"#$%&'()*+,-./0123456789:;<=>?@"
b"ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`"
b"abcdefghijklmnopqrstuvwxyz{|}~"
b"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
b"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
b"\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
b"\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
b"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
b"\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
b"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
b"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
)
"""
Characters that are valid in a chunk extension.
See RFC 7230 section 4.1.1::
chunk-ext = *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
chunk-ext-name = token
chunk-ext-val = token / quoted-string
And section 3.2.6::
token = 1*tchar
tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
/ "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
/ DIGIT / ALPHA
; any VCHAR, except delimiters
quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
obs-text = %x80-FF
We don't check if chunk extensions are well-formed beyond validating that they
don't contain characters outside this range.
"""


class _ChunkedTransferDecoder:
"""
Protocol for decoding I{chunked} Transfer-Encoding, as defined by RFC 7230,
Expand Down Expand Up @@ -1883,14 +1953,19 @@ def _dataReceived_CHUNK_LENGTH(self) -> bool:
endOfLengthIndex = self._buffer.find(b";", 0, eolIndex)
if endOfLengthIndex == -1:
endOfLengthIndex = eolIndex
rawLength = self._buffer[0:endOfLengthIndex]
try:
length = int(self._buffer[0:endOfLengthIndex], 16)
length = _hexint(rawLength)
except ValueError:
raise _MalformedChunkedDataError("Chunk-size must be an integer.")

if length < 0:
raise _MalformedChunkedDataError("Chunk-size must not be negative.")
elif length == 0:
ext = self._buffer[endOfLengthIndex + 1 : eolIndex]
if ext and ext.translate(None, _chunkExtChars) != b"":
raise _MalformedChunkedDataError(
f"Invalid characters in chunk extensions: {ext!r}."
)

if length == 0:
self.state = "TRAILER"
else:
self.state = "BODY"
Expand Down Expand Up @@ -2246,7 +2321,7 @@ def lineReceived(self, line):
self.setRawMode()
elif line[0] in b" \t":
# Continuation of a multi line header.
self.__header = self.__header + b"\n" + line
self.__header += b" " + line.lstrip(b" \t")
# Regular header line.
# Processing of header line is delayed to allow accumulating multi
# line headers.
Expand Down Expand Up @@ -2274,6 +2349,8 @@ def fail():

# Can this header determine the length?
if header == b"content-length":
if not data.isdigit():
return fail()
try:
length = int(data)
except ValueError:
Expand Down Expand Up @@ -2327,7 +2404,7 @@ def headerReceived(self, line):
return False

header = header.lower()
data = data.strip()
data = data.strip(b" \t")

if not self._maybeChooseTransferDecoder(header, data):
return False
Expand Down
1 change: 1 addition & 0 deletions src/twisted/web/newsfragments/10323.bugfix
@@ -0,0 +1 @@
twisted.web.http had several several defects in HTTP request parsing that could permit HTTP request smuggling. It now disallows signed Content-Length headers, forbids illegal characters in chunked extensions, forbids 0x prefix to chunk lengths, and only strips spaces and horizontal tab characters from header values. These changes address CVE-2022-24801 and GHSA-c2jg-hw38-jrqq.

0 comments on commit 592217e

Please sign in to comment.