From 7c6c226f969c528e29c6105364d761d0edf440a1 Mon Sep 17 00:00:00 2001 From: Nate Prewitt Date: Sun, 28 Aug 2016 11:05:25 -0600 Subject: [PATCH] enforce_content_length for incrementally read responses --- test/with_dummyserver/test_socketlevel.py | 65 +++++++++++++++++++++++ urllib3/exceptions.py | 17 ++++++ urllib3/response.py | 16 +++++- 3 files changed, 96 insertions(+), 2 deletions(-) diff --git a/test/with_dummyserver/test_socketlevel.py b/test/with_dummyserver/test_socketlevel.py index d1b1b2f61c..3df468a15b 100644 --- a/test/with_dummyserver/test_socketlevel.py +++ b/test/with_dummyserver/test_socketlevel.py @@ -1076,3 +1076,68 @@ def socket_handler(listener): self.assertEqual([b'hello, world'], list(r.stream(None))) done_event.set() + +class TestBadContentLength(SocketDummyServerTestCase): + def test_enforce_content_length_get(self): + done_event = Event() + + def socket_handler(listener): + sock = listener.accept()[0] + + buf = b'' + while not buf.endswith(b'\r\n\r\n'): + buf += sock.recv(65536) + + sock.send( + b'HTTP/1.1 200 OK\r\n' + b'Content-Length: 22\r\n' + b'Content-type: text/plain\r\n' + b'\r\n' + b'hello, world' + ) + done_event.wait(1) + sock.close() + + self._start_server(socket_handler) + conn = HTTPConnectionPool(self.host, self.port, maxsize=1) + + # Test stream read when content length less than headers claim + get_response = conn.request('GET', url='/', preload_content=False, + enforce_content_length=True) + data = get_response.stream(100) + # Read "good" data before we try to read again. + # This won't trigger till generator is exhausted. + next(data) + self.assertRaises(ProtocolError, next, data) + + done_event.set() + + def test_enforce_content_length_no_body(self): + done_event = Event() + + def socket_handler(listener): + sock = listener.accept()[0] + + buf = b'' + while not buf.endswith(b'\r\n\r\n'): + buf += sock.recv(65536) + + sock.send( + b'HTTP/1.1 200 OK\r\n' + b'Content-Length: 22\r\n' + b'Content-type: text/plain\r\n' + b'\r\n' + ) + done_event.wait(1) + sock.close() + + self._start_server(socket_handler) + conn = HTTPConnectionPool(self.host, self.port, maxsize=1) + + #Test stream on 0 length body + head_response = conn.request('HEAD', url='/', preload_content=False, + enforce_content_length=True) + data = [chunk for chunk in head_response.stream(1)] + self.assertEqual(len(data), 0) + + done_event.set() diff --git a/urllib3/exceptions.py b/urllib3/exceptions.py index 9f5e334657..cdc2bc24f5 100644 --- a/urllib3/exceptions.py +++ b/urllib3/exceptions.py @@ -1,4 +1,7 @@ from __future__ import absolute_import +from .packages.six.moves.http_client import ( + IncompleteRead as httplib_IncompleteRead +) # Base Exceptions @@ -193,6 +196,20 @@ class ResponseNotChunked(ProtocolError, ValueError): pass +class IncompleteRead(HTTPError, httplib_IncompleteRead): + """ + Response length doesn't match expected Content-Length + + Subclass of http_client.IncompleteRead to allow int value + for `partial` to avoid creating large objects on streamed + reads. + """ + def __init__(self, partial, expected): + message = ('IncompleteRead(%i bytes read, ' + '%i more expected)' % (partial, expected)) + httplib_IncompleteRead.__init__(self, message) + + class InvalidHeader(HTTPError): "The header provided was somehow invalid." pass diff --git a/urllib3/response.py b/urllib3/response.py index 7de269751c..be2accdad1 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -9,7 +9,7 @@ from ._collections import HTTPHeaderDict from .exceptions import ( ProtocolError, DecodeError, ReadTimeoutError, - ResponseNotChunked, InvalidHeader + ResponseNotChunked, IncompleteRead, InvalidHeader ) from .packages.six import string_types as basestring, binary_type, PY3 from .packages.six.moves import http_client as httplib @@ -97,6 +97,10 @@ class is also compatible with the Python standard library's :mod:`io` :param retries: The retries contains the last :class:`~urllib3.util.retry.Retry` that was used during the request. + + :param enforce_content_length: + Enforce content length checking. Body returned by server must match + value of Content-Length header, if present. Otherwise, raise error. """ CONTENT_DECODERS = ['gzip', 'deflate'] @@ -105,7 +109,7 @@ class is also compatible with the Python standard library's :mod:`io` def __init__(self, body='', headers=None, status=0, version=0, reason=None, strict=0, preload_content=True, decode_content=True, original_response=None, pool=None, connection=None, - retries=None, request_method=None): + retries=None, enforce_content_length=False, request_method=None): if isinstance(headers, HTTPHeaderDict): self.headers = headers @@ -117,6 +121,7 @@ def __init__(self, body='', headers=None, status=0, version=0, reason=None, self.strict = strict self.decode_content = decode_content self.retries = retries + self.enforce_content_length = enforce_content_length self._decoder = None self._body = None @@ -383,6 +388,13 @@ def read(self, amt=None, decode_content=None, cache_content=False): # no harm in redundantly calling close. self._fp.close() flush_decoder = True + if self.enforce_content_length and self.length_remaining not in (0, None): + # This is an edge case that httplib failed to cover due + # to concerns of backward compatibility. We're + # addressing it here to make sure IncompleteRead is + # raised during streaming, so all calls with incorrect + # Content-Length are caught. + raise IncompleteRead(self._fp_bytes_read, self.length_remaining) if data: self._fp_bytes_read += len(data)