diff --git a/.travis.yml b/.travis.yml index edcf5ed526..7fba1a3a9d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -39,6 +39,8 @@ matrix: include: - python: 2.7 env: TOXENV=py27 + - python: 2.7 + env: TOXENV=py27-nobrotli - python: 3.4 env: TOXENV=py34 - python: 3.5 @@ -49,6 +51,10 @@ matrix: env: TOXENV=py37 dist: xenial sudo: required + - python: 3.7 + env: TOXENV=py37-nobrotli + dist: xenial + sudo: required - python: 3.8-dev env: TOXENV=py38 dist: xenial diff --git a/CHANGES.rst b/CHANGES.rst index c8d6b9b3ab..adc55a68e2 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -18,6 +18,10 @@ dev (master) * Require and validate certificates by default when using HTTPS (Pull #1507) +* Added support for Brotli content encoding. It is enabled automatically if + ``brotlipy`` package is installed which can be requested with + ``urllib3[brotli]`` extra. (Pull #1532) + * ... [Short description of non-trivial change.] (Issue #) diff --git a/appveyor.yml b/appveyor.yml index 1f8c7ee376..25dfbcc3b9 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -13,6 +13,12 @@ environment: TOXENV: "py27" TOXPY27: "%PYTHON%\\python.exe" + - PYTHON: "C:\\Python27-x64" + PYTHON_VERSION: "2.7.x" + PYTHON_ARCH: "64" + TOXENV: "py27-nobrotli" + TOXPY27: "%PYTHON%\\python.exe" + - PYTHON: "C:\\Python34-x64" PYTHON_VERSION: "3.4.x" PYTHON_ARCH: "64" @@ -37,6 +43,12 @@ environment: TOXENV: "py37" TOXPY37: "%PYTHON%\\python.exe" + - PYTHON: "C:\\Python37-x64" + PYTHON_VERSION: "3.7.x" + PYTHON_ARCH: "64" + TOXENV: "py37-nobrotli" + TOXPY37: "%PYTHON%\\python.exe" + cache: - C:\Users\appveyor\AppData\Local\pip\Cache diff --git a/setup.py b/setup.py index 9b1ec713ca..750eba81f9 100755 --- a/setup.py +++ b/setup.py @@ -64,6 +64,9 @@ ], test_suite='test', extras_require={ + 'brotli': [ + 'brotlipy>=0.6.0', + ], 'secure': [ 'pyOpenSSL>=0.14', 'cryptography>=1.3.4', diff --git a/src/urllib3/response.py b/src/urllib3/response.py index 8ecd1a4e0d..f7c9df5dde 100644 --- a/src/urllib3/response.py +++ b/src/urllib3/response.py @@ -6,6 +6,11 @@ from socket import timeout as SocketTimeout from socket import error as SocketError +try: + import brotli +except ImportError: + brotli = None + from ._collections import HTTPHeaderDict from .exceptions import ( BodyNotHttplibCompatible, ProtocolError, DecodeError, ReadTimeoutError, @@ -90,6 +95,18 @@ def decompress(self, data): self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) +if brotli is not None: + class BrotliDecoder(object): + def __init__(self): + self._obj = brotli.Decompressor() + + def __getattr__(self, name): + return getattr(self._obj, name) + + def decompress(self, data): + return self._obj.decompress(data) + + class MultiDecoder(object): """ From RFC7231: @@ -118,6 +135,9 @@ def _get_decoder(mode): if mode == 'gzip': return GzipDecoder() + if brotli is not None and mode == 'br': + return BrotliDecoder() + return DeflateDecoder() @@ -155,6 +175,8 @@ class is also compatible with the Python standard library's :mod:`io` """ CONTENT_DECODERS = ['gzip', 'deflate'] + if brotli is not None: + CONTENT_DECODERS += ['br'] REDIRECT_STATUSES = [301, 302, 303, 307, 308] def __init__(self, body='', headers=None, status=0, version=0, reason=None, @@ -317,20 +339,26 @@ def _init_decoder(self): if len(encodings): self._decoder = _get_decoder(content_encoding) + DECODER_ERROR_CLASSES = (IOError, zlib.error) + if brotli is not None: + DECODER_ERROR_CLASSES += (brotli.Error,) + def _decode(self, data, decode_content, flush_decoder): """ Decode the data passed in and potentially flush the decoder. """ + if not decode_content: + return data + try: - if decode_content and self._decoder: + if self._decoder: data = self._decoder.decompress(data) - except (IOError, zlib.error) as e: + except self.DECODER_ERROR_CLASSES as e: content_encoding = self.headers.get('content-encoding', '').lower() raise DecodeError( "Received response with content-encoding: %s, but " "failed to decode it." % content_encoding, e) - - if flush_decoder and decode_content: + if flush_decoder: data += self._flush_decoder() return data diff --git a/src/urllib3/util/request.py b/src/urllib3/util/request.py index 3ddfcd5594..280b8530c6 100644 --- a/src/urllib3/util/request.py +++ b/src/urllib3/util/request.py @@ -5,6 +5,13 @@ from ..exceptions import UnrewindableBodyError ACCEPT_ENCODING = 'gzip,deflate' +try: + import brotli as _unused_module_brotli # noqa: F401 +except ImportError: + pass +else: + ACCEPT_ENCODING += ',br' + _FAILEDTELL = object() diff --git a/test/__init__.py b/test/__init__.py index 0719b4b93d..b5ead0d47a 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -8,6 +8,10 @@ import os import pytest +try: + import brotli +except ImportError: + brotli = None from urllib3.exceptions import HTTPWarning from urllib3.packages import six @@ -74,6 +78,16 @@ def wrapper(*args, **kwargs): return wrapper +def onlyBrotlipy(): + return pytest.mark.skipif( + brotli is None, reason='only run if brotlipy is present') + + +def notBrotlipy(): + return pytest.mark.skipif( + brotli is not None, reason='only run if brotlipy is absent') + + def notSecureTransport(test): """Skips this test when SecureTransport is in use.""" diff --git a/test/test_response.py b/test/test_response.py index 13c01cda2f..b6b589a6ea 100644 --- a/test/test_response.py +++ b/test/test_response.py @@ -6,7 +6,7 @@ import pytest import mock -from urllib3.response import HTTPResponse +from urllib3.response import HTTPResponse, brotli from urllib3.exceptions import ( DecodeError, ResponseNotChunked, ProtocolError, InvalidHeader ) @@ -14,6 +14,8 @@ from urllib3.util.retry import Retry, RequestHistory from urllib3.util.response import is_fp_closed +from test import onlyBrotlipy + from base64 import b64decode # A known random (i.e, not-too-compressible) payload generated with: @@ -208,6 +210,35 @@ def test_chunked_decoding_gzip_swallow_garbage(self): assert r.data == b'foofoofoo' + @onlyBrotlipy() + def test_decode_brotli(self): + data = brotli.compress(b'foo') + + fp = BytesIO(data) + r = HTTPResponse(fp, headers={'content-encoding': 'br'}) + assert r.data == b'foo' + + @onlyBrotlipy() + def test_chunked_decoding_brotli(self): + data = brotli.compress(b'foobarbaz') + + fp = BytesIO(data) + r = HTTPResponse( + fp, headers={'content-encoding': 'br'}, preload_content=False) + + ret = b'' + for _ in range(100): + ret += r.read(1) + if r.closed: + break + assert ret == b'foobarbaz' + + @onlyBrotlipy() + def test_decode_brotli_error(self): + fp = BytesIO(b'foo') + with pytest.raises(DecodeError): + HTTPResponse(fp, headers={'content-encoding': 'br'}) + def test_multi_decoding_deflate_deflate(self): data = zlib.compress(zlib.compress(b'foo')) diff --git a/test/test_util.py b/test/test_util.py index 508cb10b45..ac527355a1 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -44,7 +44,7 @@ from . import clear_warnings -from test import onlyPy3, onlyPy2 +from test import onlyPy3, onlyPy2, onlyBrotlipy, notBrotlipy # This number represents a time in seconds, it doesn't mean anything in # isolation. Setting to a high-ish value to avoid conflicts with the smaller @@ -300,14 +300,30 @@ def test_parse_url_bytes_type_error_python_3(self): parse_url(b"https://www.google.com/") @pytest.mark.parametrize('kwargs, expected', [ - ({'accept_encoding': True}, - {'accept-encoding': 'gzip,deflate'}), + pytest.param( + {'accept_encoding': True}, + {'accept-encoding': 'gzip,deflate,br'}, + marks=onlyBrotlipy(), + ), + pytest.param( + {'accept_encoding': True}, + {'accept-encoding': 'gzip,deflate'}, + marks=notBrotlipy(), + ), ({'accept_encoding': 'foo,bar'}, {'accept-encoding': 'foo,bar'}), ({'accept_encoding': ['foo', 'bar']}, {'accept-encoding': 'foo,bar'}), - ({'accept_encoding': True, 'user_agent': 'banana'}, - {'accept-encoding': 'gzip,deflate', 'user-agent': 'banana'}), + pytest.param( + {'accept_encoding': True, 'user_agent': 'banana'}, + {'accept-encoding': 'gzip,deflate,br', 'user-agent': 'banana'}, + marks=onlyBrotlipy(), + ), + pytest.param( + {'accept_encoding': True, 'user_agent': 'banana'}, + {'accept-encoding': 'gzip,deflate', 'user-agent': 'banana'}, + marks=notBrotlipy(), + ), ({'user_agent': 'banana'}, {'user-agent': 'banana'}), ({'keep_alive': True}, diff --git a/tox.ini b/tox.ini index 6e92581562..8764a349e3 100644 --- a/tox.ini +++ b/tox.ini @@ -1,10 +1,10 @@ [tox] -envlist = flake8-py3, py27, py34, py35, py36, py37, py38, pypy +envlist = flake8-py3, py27, py34, py35, py36, py37, py38, pypy, py{27,37}-nobrotli [testenv] deps= -r{toxinidir}/dev-requirements.txt -extras= socks,secure -commands= +extras = socks,secure,brotli +commands = # Print out the python version and bitness pip --version python --version @@ -21,6 +21,12 @@ setenv = PYTHONWARNINGS=always::DeprecationWarning passenv = CFLAGS LDFLAGS TRAVIS APPVEYOR CRYPTOGRAPHY_OSX_NO_LINK_FLAGS TRAVIS_INFRA +[testenv:py27-nobrotli] +extras = socks,secure + +[testenv:py37-nobrotli] +extras = socks,secure + [testenv:gae] basepython = python2.7 deps=