Skip to content
This repository has been archived by the owner on Mar 29, 2022. It is now read-only.

Commit

Permalink
Limit size of parsed header lines and message bodies
Browse files Browse the repository at this point in the history
  • Loading branch information
vfaronov committed Jul 17, 2017
1 parent 9a6ea5d commit 8ca0e31
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 30 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ Unreleased
Changed
-------
- Notice `1277`_ (obsolete 'X-' prefix) is now reported only once per message.
- HTTPolice no longer attempts to process very long header lines (currently
16K; they will fail with notice `1006`_/`1009`_) and message bodies
(currently 1G; notice `1298`_).
- The syntax of `chunk extensions`_ is no longer checked.

Added
Expand All @@ -18,6 +21,8 @@ Added

.. _Forwarded: https://tools.ietf.org/html/rfc7239
.. _chunk extensions: https://tools.ietf.org/html/rfc7230#section-4.1.1
.. _1009: http://pythonhosted.org/HTTPolice/notices.html#1009
.. _1298: http://pythonhosted.org/HTTPolice/notices.html#1298
.. _1296: http://pythonhosted.org/HTTPolice/notices.html#1296
.. _1297: http://pythonhosted.org/HTTPolice/notices.html#1297

Expand Down
77 changes: 48 additions & 29 deletions httpolice/framing1.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@
STATUS_CODE = re.compile(u'^[0-9]{3}$')


MAX_BODY_SIZE = 1024 * 1024 * 1024


def parse_streams(inbound, outbound, scheme=None):
"""Parse one or two HTTP/1.x streams.
Expand Down Expand Up @@ -118,6 +121,25 @@ def _parse_request_heading(stream, scheme=None):
return req


def _process_content_length(msg, stream):
n = msg.headers.content_length.value
if n is Unavailable:
msg.body = Unavailable
stream.sane = False
else:
if n > MAX_BODY_SIZE:
msg.body = Unavailable
stream.sane = False
msg.complain(1298, place=msg.headers.content_length, size=n,
max_size=MAX_BODY_SIZE)
else:
try:
msg.body = stream.read(n)
except ParseError as exc:
msg.body = Unavailable
msg.complain(1004, error=exc)


def _parse_request_body(req, stream):
# RFC 7230 section 3.3.3.

Expand All @@ -133,16 +155,7 @@ def _parse_request_body(req, stream):
_decode_transfer_coding(req, codings.pop())

elif req.headers.content_length:
n = req.headers.content_length.value
if n is Unavailable:
req.body = Unavailable
stream.sane = False
else:
try:
req.body = stream.read(n)
except ParseError as exc:
req.body = Unavailable
req.complain(1004, error=exc)
_process_content_length(req, stream)

else:
req.body = b''
Expand Down Expand Up @@ -220,16 +233,7 @@ def _parse_response_body(resp, stream):
_decode_transfer_coding(resp, codings.pop())

elif resp.headers.content_length.is_present:
n = resp.headers.content_length.value
if n is Unavailable:
resp.body = Unavailable
stream.sane = False
else:
try:
resp.body = stream.read(n)
except ParseError as exc:
resp.body = Unavailable
resp.complain(1004, error=exc)
_process_content_length(resp, stream)

else:
resp.body = stream.read()
Expand Down Expand Up @@ -283,7 +287,17 @@ def _decode_transfer_coding(msg, coding):
msg.body = Unavailable


def _parse_chunk(stream):
class BodyTooLongError(Exception):

def __init__(self, size, max_size):
super(BodyTooLongError, self).__init__(u'body longer than %d bytes' %
max_size)
self.size = size
self.max_size = max_size


def _parse_chunk(stream, data):
current_size = sum(len(c) for c in data)
with stream.parsing(chunk):
pos = stream.tell()
(size_s, _, _) = stream.readline().partition(u';')
Expand All @@ -293,28 +307,33 @@ def _parse_chunk(stream):
except ValueError:
raise stream.error(pos)
if size == 0:
return b''
return False
elif size + current_size > MAX_BODY_SIZE:
stream.sane = False
raise BodyTooLongError(size + current_size, MAX_BODY_SIZE)
else:
data = stream.read(size)
data.append(stream.read(size))
stream.readlineend()
return data
return True


def _parse_chunked(msg, stream):
data = []
place = u'chunked framing'
try:
chunk_data = _parse_chunk(stream)
while chunk_data:
data.append(chunk_data)
chunk_data = _parse_chunk(stream)
while _parse_chunk(stream, data):
pass
trailer = parse_header_fields(stream)
with stream.parsing(chunked_body):
stream.readlineend()
except ParseError as e:
msg.complain(1005, error=e)
msg.body = Unavailable
except BodyTooLongError as e:
msg.complain(1298, place=place, size=e.size, max_size=e.max_size)
msg.body = Unavailable
else:
stream.dump_complaints(msg.complain, place=u'chunked framing')
stream.dump_complaints(msg.complain, place=place)
msg.body = b''.join(data)
msg.trailer_entries = trailer
if trailer:
Expand Down
5 changes: 5 additions & 0 deletions httpolice/notices.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2180,4 +2180,9 @@ One non-obvious thing is how references work
<explain>If this was intended to be <var ref="n_elements"/> parameters for a single proxy hop, then the pairs must be separated with semicolons, not commas.</explain>
</comment>

<debug id="1298">
<title>Body is too long to be checked</title>
<explain>This message’s <var ref="place"/> indicates that the body is at least <var ref="size"/> bytes long. HTTPolice does not attempt to process bodies longer than <var ref="max_size"/> bytes. The rest of the stream will not be processed either.</explain>
</debug>

</notices>
9 changes: 8 additions & 1 deletion httpolice/stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ class Stream(object):
as similarly-named methods of file objects.
"""

max_line_length = 16 * 1024

def __init__(self, file_, name=None):
self.file = file_
self.name = name
Expand Down Expand Up @@ -62,10 +64,15 @@ def read(self, n=-1):

def readline(self, decode=True):
pos = self.tell()
r = self.file.readline()
r = self.file.readline(self.max_line_length)
if self.peek() == b'':
self.eof = True
if not r.endswith(b'\n'):
if len(r) >= self.max_line_length:
raise self.error(
pos,
expected=u'no more than %d bytes before end of line' %
self.max_line_length)
raise self.error(pos, expected=u'data terminated by end of line')

if len(r) >= 2 and r[-2:-1] == b'\r':
Expand Down
23 changes: 23 additions & 0 deletions test/combined_data/1298_1
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
1298 1298 1007 1010

======== BEGIN INBOUND STREAM ========
PUT /articles/123/ HTTP/1.1
Host: example.com
User-Agent: demo
Content-Type: text/plain
Content-Length: 8429245833248012

Hello world!

======== BEGIN OUTBOUND STREAM ========
HTTP/1.1 201 Created
Date: Thu, 31 Dec 2015 18:26:56 GMT
Content-Type: text/plain
Transfer-Encoding: chunked

3e
A new article has been created with the following content:


1df25abb9d7d0c
Hello world!
13 changes: 13 additions & 0 deletions test/test_streams_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,3 +359,16 @@ def test_rearrange():
assert exchanges[8].request is None
assert [complaint.id for complaint in exchanges[8].complaints] == [1279]
assert exchanges[9].request.target == u'/08'


def test_super_long_headers(tmpdir):
req_path = tmpdir.join('request.dat')
with req_path.open('wb') as req_file:
req_file.write(b'GET / HTTP/1.1\r\n'
b'Host: example.com\r\n'
b'User-Agent: test\r\n'
b'Accept-Language: ' + (b'en, ' * 4096) + b'\r\n'
b'\r\n')
exchanges = load(req_stream_input, [str(req_path)])
assert exchanges[0].request is None
assert [complaint.id for complaint in exchanges[0].complaints] == [1006]

0 comments on commit 8ca0e31

Please sign in to comment.