Skip to content

Commit

Permalink
enforce_content_length for incrementally read responses
Browse files Browse the repository at this point in the history
  • Loading branch information
nateprewitt committed Aug 28, 2016
1 parent 7c87f02 commit 7c6c226
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 2 deletions.
65 changes: 65 additions & 0 deletions test/with_dummyserver/test_socketlevel.py
Expand Up @@ -1076,3 +1076,68 @@ def socket_handler(listener):
self.assertEqual([b'hello, world'], list(r.stream(None)))

done_event.set()

class TestBadContentLength(SocketDummyServerTestCase):
def test_enforce_content_length_get(self):
done_event = Event()

def socket_handler(listener):
sock = listener.accept()[0]

buf = b''
while not buf.endswith(b'\r\n\r\n'):
buf += sock.recv(65536)

sock.send(
b'HTTP/1.1 200 OK\r\n'
b'Content-Length: 22\r\n'
b'Content-type: text/plain\r\n'
b'\r\n'
b'hello, world'
)
done_event.wait(1)
sock.close()

self._start_server(socket_handler)
conn = HTTPConnectionPool(self.host, self.port, maxsize=1)

# Test stream read when content length less than headers claim
get_response = conn.request('GET', url='/', preload_content=False,
enforce_content_length=True)
data = get_response.stream(100)
# Read "good" data before we try to read again.
# This won't trigger till generator is exhausted.
next(data)
self.assertRaises(ProtocolError, next, data)

done_event.set()

def test_enforce_content_length_no_body(self):
done_event = Event()

def socket_handler(listener):
sock = listener.accept()[0]

buf = b''
while not buf.endswith(b'\r\n\r\n'):
buf += sock.recv(65536)

sock.send(
b'HTTP/1.1 200 OK\r\n'
b'Content-Length: 22\r\n'
b'Content-type: text/plain\r\n'
b'\r\n'
)
done_event.wait(1)
sock.close()

self._start_server(socket_handler)
conn = HTTPConnectionPool(self.host, self.port, maxsize=1)

#Test stream on 0 length body
head_response = conn.request('HEAD', url='/', preload_content=False,
enforce_content_length=True)
data = [chunk for chunk in head_response.stream(1)]
self.assertEqual(len(data), 0)

done_event.set()
17 changes: 17 additions & 0 deletions urllib3/exceptions.py
@@ -1,4 +1,7 @@
from __future__ import absolute_import
from .packages.six.moves.http_client import (
IncompleteRead as httplib_IncompleteRead
)
# Base Exceptions


Expand Down Expand Up @@ -193,6 +196,20 @@ class ResponseNotChunked(ProtocolError, ValueError):
pass


class IncompleteRead(HTTPError, httplib_IncompleteRead):
"""
Response length doesn't match expected Content-Length
Subclass of http_client.IncompleteRead to allow int value
for `partial` to avoid creating large objects on streamed
reads.
"""
def __init__(self, partial, expected):
message = ('IncompleteRead(%i bytes read, '
'%i more expected)' % (partial, expected))
httplib_IncompleteRead.__init__(self, message)


class InvalidHeader(HTTPError):
"The header provided was somehow invalid."
pass
Expand Down
16 changes: 14 additions & 2 deletions urllib3/response.py
Expand Up @@ -9,7 +9,7 @@
from ._collections import HTTPHeaderDict
from .exceptions import (
ProtocolError, DecodeError, ReadTimeoutError,
ResponseNotChunked, InvalidHeader
ResponseNotChunked, IncompleteRead, InvalidHeader
)
from .packages.six import string_types as basestring, binary_type, PY3
from .packages.six.moves import http_client as httplib
Expand Down Expand Up @@ -97,6 +97,10 @@ class is also compatible with the Python standard library's :mod:`io`
:param retries:
The retries contains the last :class:`~urllib3.util.retry.Retry` that
was used during the request.
:param enforce_content_length:
Enforce content length checking. Body returned by server must match
value of Content-Length header, if present. Otherwise, raise error.
"""

CONTENT_DECODERS = ['gzip', 'deflate']
Expand All @@ -105,7 +109,7 @@ class is also compatible with the Python standard library's :mod:`io`
def __init__(self, body='', headers=None, status=0, version=0, reason=None,
strict=0, preload_content=True, decode_content=True,
original_response=None, pool=None, connection=None,
retries=None, request_method=None):
retries=None, enforce_content_length=False, request_method=None):

if isinstance(headers, HTTPHeaderDict):
self.headers = headers
Expand All @@ -117,6 +121,7 @@ def __init__(self, body='', headers=None, status=0, version=0, reason=None,
self.strict = strict
self.decode_content = decode_content
self.retries = retries
self.enforce_content_length = enforce_content_length

self._decoder = None
self._body = None
Expand Down Expand Up @@ -383,6 +388,13 @@ def read(self, amt=None, decode_content=None, cache_content=False):
# no harm in redundantly calling close.
self._fp.close()
flush_decoder = True
if self.enforce_content_length and self.length_remaining not in (0, None):
# This is an edge case that httplib failed to cover due
# to concerns of backward compatibility. We're
# addressing it here to make sure IncompleteRead is
# raised during streaming, so all calls with incorrect
# Content-Length are caught.
raise IncompleteRead(self._fp_bytes_read, self.length_remaining)

if data:
self._fp_bytes_read += len(data)
Expand Down

0 comments on commit 7c6c226

Please sign in to comment.