From fb5415218618947680abf4a75511d71687a2e57f Mon Sep 17 00:00:00 2001 From: Tahia Khan Date: Tue, 4 Feb 2014 01:58:06 -0500 Subject: [PATCH 01/42] Adding Timeout examples on docs landing page --- docs/index.rst | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/docs/index.rst b/docs/index.rst index 61edafa6cc..8f53e349b8 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -152,6 +152,34 @@ should use a :class:`~urllib3.poolmanager.PoolManager`. A :class:`~urllib3.connectionpool.ConnectionPool` is composed of a collection of :class:`httplib.HTTPConnection` objects. +Timeout +------- + +A timeout can be set to block socket operations on individual connections. This +can be done with a float or integer, which sets the timeout for the entire HTTP +request, or an instance of :class:`~urllib3.util.Timeout` which will give you +more granular control over how much time is given to different stages of the request. +More specifically, the Timeout object can be used to granularly set a specific +timeout for connections and reads on the request. + +:: + + >>> timeout1 = 7.0 #specifies 7.0 for both connect and read + >>> pool1 = HTTPConnectionPool('ajax.googleapis.com', timeout=timeout1) + >>> pool1.request(...) # Etc, etc + + >>> timeout2 = urllib3.util.Timeout(read=5) #specifies infinite timeout for connections and 5 for read + >>> pool2 = HTTPConnectionPool('ajax.googleapis.com', timeout=timeout2) + >>> pool2.request(...) # Etc, etc + + >>> timeout2 = urllib3.util.Timeout(connect=3, read=5) #specifies 3 for connect and 5 for read + >>> pool2 = HTTPConnectionPool('ajax.googleapis.com', timeout=timeout2) + >>> pool2.request(...) # Etc, etc + + >>> timeout3 = urllib3.util.Timeout(total=7.5) #specifies 7.5 for read and connect combined + >>> pool3 = HTTPConnectionPool('ajax.googleapis.com', timeout=timeout3) + >>> pool3.request(...) # Etc, etc + Foundation ---------- From 772a3e9a5434e30dee4da587bb72de0913041233 Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Tue, 4 Mar 2014 11:23:28 -0800 Subject: [PATCH 02/42] Dev CHANGES --- CHANGES.rst | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 01e558a3df..bf888c23db 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,6 +4,12 @@ Changes dev (master) ++++++++++++ +* ... [Short description of non-trivial change.] (Issue #) + + +1.8 (2014-03-04) +++++++++++++++++ + * Improved url parsing in ``urllib3.util.parse_url`` (properly parse '@' in username, and blank ports like 'hostname:'). @@ -52,9 +58,6 @@ dev (master) * Fixed open socket leak with SSL-related failures. (Issue #344, #348) -* ... [Short description of non-trivial change.] (Issue #) - - 1.7.1 (2013-09-25) ++++++++++++++++++ From 78f520dbeaaa80e0852b1411a1f3d7beaf8f0486 Mon Sep 17 00:00:00 2001 From: schlamar Date: Thu, 6 Mar 2014 17:30:42 +0100 Subject: [PATCH 03/42] Publish default accept encoding as constant. --- urllib3/util.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/urllib3/util.py b/urllib3/util.py index bd266317ff..3d2ef95a3e 100644 --- a/urllib3/util.py +++ b/urllib3/util.py @@ -35,6 +35,8 @@ from .packages import six from .exceptions import LocationParseError, SSLError, TimeoutStateError +ACCEPT_ENCODING = 'gzip,deflate' + _Default = object() # The default timeout to use for socket connections. This is the attribute used @@ -465,7 +467,7 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, elif isinstance(accept_encoding, list): accept_encoding = ','.join(accept_encoding) else: - accept_encoding = 'gzip,deflate' + accept_encoding = ACCEPT_ENCODING headers['accept-encoding'] = accept_encoding if user_agent: From d00556a6315c8f437c4d55f87ecb515d16f7338a Mon Sep 17 00:00:00 2001 From: Bui Date: Sat, 8 Mar 2014 21:16:48 +0000 Subject: [PATCH 04/42] finish implementing source_address --- urllib3/connection.py | 2 +- urllib3/connectionpool.py | 12 +++++++++--- urllib3/response.py | 3 ++- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/urllib3/connection.py b/urllib3/connection.py index 662bd2e454..e2d7d83444 100644 --- a/urllib3/connection.py +++ b/urllib3/connection.py @@ -82,7 +82,7 @@ def _new_conn(self): """ extra_args = [] if self.source_address: # Python 2.7+ - extra_args.append(self.source_address) + extra_args.append((self.source_address, 0)) conn = socket.create_connection( (self.host, self.port), diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index 6d0dbb184c..8688972420 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -136,7 +136,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): def __init__(self, host, port=None, strict=False, timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False, - headers=None, _proxy=None, _proxy_headers=None): + headers=None, _proxy=None, _proxy_headers=None, + source_address=None): ConnectionPool.__init__(self, host, port) RequestMethods.__init__(self, headers) @@ -155,6 +156,8 @@ def __init__(self, host, port=None, strict=False, self.proxy = _proxy self.proxy_headers = _proxy_headers or {} + self.source_address = source_address + # Fill the queue up so that doing get() on it will block properly for _ in xrange(maxsize): self.pool.put(None) @@ -173,7 +176,8 @@ def _new_conn(self): conn = self.ConnectionCls(host=self.host, port=self.port, timeout=self.timeout.connect_timeout, - strict=self.strict) + strict=self.strict, + source_address=self.source_address) if self.proxy is not None: # Enable Nagle's algorithm for proxies, to avoid packet # fragmentation. @@ -594,7 +598,8 @@ def __init__(self, host, port=None, _proxy=None, _proxy_headers=None, key_file=None, cert_file=None, cert_reqs=None, ca_certs=None, ssl_version=None, - assert_hostname=None, assert_fingerprint=None): + assert_hostname=None, assert_fingerprint=None, + source_address=None): HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize, block, headers, _proxy, _proxy_headers) @@ -605,6 +610,7 @@ def __init__(self, host, port=None, self.ssl_version = ssl_version self.assert_hostname = assert_hostname self.assert_fingerprint = assert_fingerprint + self.source_address = source_address def _prepare_conn(self, conn): """ diff --git a/urllib3/response.py b/urllib3/response.py index db441828aa..d793619880 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -79,7 +79,8 @@ class HTTPResponse(io.IOBase): def __init__(self, body='', headers=None, status=0, version=0, reason=None, strict=0, preload_content=True, decode_content=True, - original_response=None, pool=None, connection=None): + original_response=None, pool=None, connection=None, + source_address=None): self.headers = HTTPHeaderDict() if headers: From 04abe31a1ce9fd02499986833d47fd74e73a8816 Mon Sep 17 00:00:00 2001 From: Bui Date: Sat, 8 Mar 2014 21:56:05 +0000 Subject: [PATCH 05/42] don't force port 0 --- urllib3/connection.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/urllib3/connection.py b/urllib3/connection.py index e2d7d83444..61cc192d42 100644 --- a/urllib3/connection.py +++ b/urllib3/connection.py @@ -82,7 +82,10 @@ def _new_conn(self): """ extra_args = [] if self.source_address: # Python 2.7+ - extra_args.append((self.source_address, 0)) + if type(self.source_address) is tuple: + extra_args.append(self.source_address) + else: + extra_args.append((self.source_address, 0)) conn = socket.create_connection( (self.host, self.port), From 2841e7d2abef0ac602f53b23bb7051264c82dd1b Mon Sep 17 00:00:00 2001 From: Bui Date: Sat, 8 Mar 2014 22:38:28 +0000 Subject: [PATCH 06/42] use **conn_kw --- urllib3/connection.py | 11 +++-------- urllib3/connectionpool.py | 16 ++++++++-------- urllib3/response.py | 2 +- 3 files changed, 12 insertions(+), 17 deletions(-) diff --git a/urllib3/connection.py b/urllib3/connection.py index 61cc192d42..b8e15f2097 100644 --- a/urllib3/connection.py +++ b/urllib3/connection.py @@ -71,7 +71,6 @@ def __init__(self, *args, **kw): if sys.version_info < (2, 7): # Python 2.6 and earlier kw.pop('source_address', None) - self.source_address = None _HTTPConnection.__init__(self, *args, **kw) @@ -82,10 +81,7 @@ def _new_conn(self): """ extra_args = [] if self.source_address: # Python 2.7+ - if type(self.source_address) is tuple: - extra_args.append(self.source_address) - else: - extra_args.append((self.source_address, 0)) + extra_args.append(self.source_address) conn = socket.create_connection( (self.host, self.port), @@ -112,12 +108,11 @@ class HTTPSConnection(HTTPConnection): def __init__(self, host, port=None, key_file=None, cert_file=None, strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, - source_address=None): + **kwargs): HTTPConnection.__init__(self, host, port, strict=strict, - timeout=timeout, - source_address=source_address) + timeout=timeout, **kwargs) self.key_file = key_file self.cert_file = cert_file diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index 8688972420..ffc01a825c 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -137,7 +137,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): def __init__(self, host, port=None, strict=False, timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False, headers=None, _proxy=None, _proxy_headers=None, - source_address=None): + **conn_kw): ConnectionPool.__init__(self, host, port) RequestMethods.__init__(self, headers) @@ -156,8 +156,6 @@ def __init__(self, host, port=None, strict=False, self.proxy = _proxy self.proxy_headers = _proxy_headers or {} - self.source_address = source_address - # Fill the queue up so that doing get() on it will block properly for _ in xrange(maxsize): self.pool.put(None) @@ -166,6 +164,8 @@ def __init__(self, host, port=None, strict=False, self.num_connections = 0 self.num_requests = 0 + self.conn_kw = conn_kw + def _new_conn(self): """ Return a fresh :class:`HTTPConnection`. @@ -176,8 +176,7 @@ def _new_conn(self): conn = self.ConnectionCls(host=self.host, port=self.port, timeout=self.timeout.connect_timeout, - strict=self.strict, - source_address=self.source_address) + strict=self.strict, **self.conn_kw) if self.proxy is not None: # Enable Nagle's algorithm for proxies, to avoid packet # fragmentation. @@ -599,7 +598,7 @@ def __init__(self, host, port=None, key_file=None, cert_file=None, cert_reqs=None, ca_certs=None, ssl_version=None, assert_hostname=None, assert_fingerprint=None, - source_address=None): + **conn_kw): HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize, block, headers, _proxy, _proxy_headers) @@ -610,7 +609,8 @@ def __init__(self, host, port=None, self.ssl_version = ssl_version self.assert_hostname = assert_hostname self.assert_fingerprint = assert_fingerprint - self.source_address = source_address + + self.conn_kw = conn_kw def _prepare_conn(self, conn): """ @@ -659,7 +659,7 @@ def _new_conn(self): actual_host = self.proxy.host actual_port = self.proxy.port - extra_params = {} + extra_params = self.conn_kw if not six.PY3: # Python 2 extra_params['strict'] = self.strict diff --git a/urllib3/response.py b/urllib3/response.py index d793619880..0e6a15452a 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -80,7 +80,7 @@ class HTTPResponse(io.IOBase): def __init__(self, body='', headers=None, status=0, version=0, reason=None, strict=0, preload_content=True, decode_content=True, original_response=None, pool=None, connection=None, - source_address=None): + **kwargs): self.headers = HTTPHeaderDict() if headers: From 8640a3131be24268e2a15eea9dc1ab496034cd51 Mon Sep 17 00:00:00 2001 From: Bui Date: Sat, 8 Mar 2014 22:40:39 +0000 Subject: [PATCH 07/42] oops, small mistake --- urllib3/connection.py | 1 + 1 file changed, 1 insertion(+) diff --git a/urllib3/connection.py b/urllib3/connection.py index b8e15f2097..f5ab84c5cf 100644 --- a/urllib3/connection.py +++ b/urllib3/connection.py @@ -71,6 +71,7 @@ def __init__(self, *args, **kw): if sys.version_info < (2, 7): # Python 2.6 and earlier kw.pop('source_address', None) + self.source_address = None _HTTPConnection.__init__(self, *args, **kw) From f6217168638eaa29448b1bc0a4265ab7ca03e6b4 Mon Sep 17 00:00:00 2001 From: Bui Date: Sun, 9 Mar 2014 00:46:51 +0000 Subject: [PATCH 08/42] fix for VerifiedHTTPSConnection --- urllib3/connection.py | 9 ++++++--- urllib3/connectionpool.py | 8 ++++---- urllib3/response.py | 3 +-- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/urllib3/connection.py b/urllib3/connection.py index f5ab84c5cf..012befaddb 100644 --- a/urllib3/connection.py +++ b/urllib3/connection.py @@ -109,11 +109,11 @@ class HTTPSConnection(HTTPConnection): def __init__(self, host, port=None, key_file=None, cert_file=None, strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, - **kwargs): + source_address=None): HTTPConnection.__init__(self, host, port, strict=strict, - timeout=timeout, **kwargs) + timeout=timeout) self.key_file = key_file self.cert_file = cert_file @@ -135,7 +135,7 @@ class VerifiedHTTPSConnection(HTTPSConnection): def set_cert(self, key_file=None, cert_file=None, cert_reqs=None, ca_certs=None, - assert_hostname=None, assert_fingerprint=None): + assert_hostname=None, assert_fingerprint=None, **conn_kw): self.key_file = key_file self.cert_file = cert_file @@ -143,13 +143,16 @@ def set_cert(self, key_file=None, cert_file=None, self.ca_certs = ca_certs self.assert_hostname = assert_hostname self.assert_fingerprint = assert_fingerprint + self.conn_kw = conn_kw def connect(self): # Add certificate verification + try: sock = socket.create_connection( address=(self.host, self.port), timeout=self.timeout, + **self.conn_kw ) except SocketTimeout: raise ConnectTimeoutError( diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index ffc01a825c..fe47e7b598 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -601,7 +601,7 @@ def __init__(self, host, port=None, **conn_kw): HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize, - block, headers, _proxy, _proxy_headers) + block, headers, _proxy, _proxy_headers, **conn_kw) self.key_file = key_file self.cert_file = cert_file self.cert_reqs = cert_reqs @@ -609,7 +609,6 @@ def __init__(self, host, port=None, self.ssl_version = ssl_version self.assert_hostname = assert_hostname self.assert_fingerprint = assert_fingerprint - self.conn_kw = conn_kw def _prepare_conn(self, conn): @@ -624,7 +623,7 @@ def _prepare_conn(self, conn): cert_reqs=self.cert_reqs, ca_certs=self.ca_certs, assert_hostname=self.assert_hostname, - assert_fingerprint=self.assert_fingerprint) + assert_fingerprint=self.assert_fingerprint, **self.conn_kw) conn.ssl_version = self.ssl_version if self.proxy is not None: @@ -659,9 +658,10 @@ def _new_conn(self): actual_host = self.proxy.host actual_port = self.proxy.port - extra_params = self.conn_kw + extra_params = {} if not six.PY3: # Python 2 extra_params['strict'] = self.strict + extra_params.update(self.conn_kw) conn = self.ConnectionCls(host=actual_host, port=actual_port, timeout=self.timeout.connect_timeout, diff --git a/urllib3/response.py b/urllib3/response.py index 0e6a15452a..db441828aa 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -79,8 +79,7 @@ class HTTPResponse(io.IOBase): def __init__(self, body='', headers=None, status=0, version=0, reason=None, strict=0, preload_content=True, decode_content=True, - original_response=None, pool=None, connection=None, - **kwargs): + original_response=None, pool=None, connection=None): self.headers = HTTPHeaderDict() if headers: From 28b2c278db0edb0aac8b98ef33630532b69ec304 Mon Sep 17 00:00:00 2001 From: Bui Date: Sun, 9 Mar 2014 05:00:39 +0000 Subject: [PATCH 09/42] fixing things again --- urllib3/connection.py | 8 ++++---- urllib3/connectionpool.py | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/urllib3/connection.py b/urllib3/connection.py index 012befaddb..bff6e8dc3e 100644 --- a/urllib3/connection.py +++ b/urllib3/connection.py @@ -109,11 +109,12 @@ class HTTPSConnection(HTTPConnection): def __init__(self, host, port=None, key_file=None, cert_file=None, strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, - source_address=None): + **conn_kw): HTTPConnection.__init__(self, host, port, strict=strict, - timeout=timeout) + timeout=timeout, + **conn_kw) self.key_file = key_file self.cert_file = cert_file @@ -135,7 +136,7 @@ class VerifiedHTTPSConnection(HTTPSConnection): def set_cert(self, key_file=None, cert_file=None, cert_reqs=None, ca_certs=None, - assert_hostname=None, assert_fingerprint=None, **conn_kw): + assert_hostname=None, assert_fingerprint=None): self.key_file = key_file self.cert_file = cert_file @@ -143,7 +144,6 @@ def set_cert(self, key_file=None, cert_file=None, self.ca_certs = ca_certs self.assert_hostname = assert_hostname self.assert_fingerprint = assert_fingerprint - self.conn_kw = conn_kw def connect(self): # Add certificate verification diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index fe47e7b598..a88582bb44 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -275,6 +275,7 @@ def _make_request(self, conn, method, url, timeout=_Default, control over your timeouts. """ self.num_requests += 1 + conn.conn_kw = self.conn_kw timeout_obj = self._get_timeout(timeout) @@ -623,7 +624,7 @@ def _prepare_conn(self, conn): cert_reqs=self.cert_reqs, ca_certs=self.ca_certs, assert_hostname=self.assert_hostname, - assert_fingerprint=self.assert_fingerprint, **self.conn_kw) + assert_fingerprint=self.assert_fingerprint) conn.ssl_version = self.ssl_version if self.proxy is not None: From b8625059859a760541afa21b08c68e76e739792f Mon Sep 17 00:00:00 2001 From: Bui Date: Sun, 9 Mar 2014 05:05:36 +0000 Subject: [PATCH 10/42] that shouldn't be there --- urllib3/connectionpool.py | 1 - 1 file changed, 1 deletion(-) diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index a88582bb44..4058d74ca3 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -275,7 +275,6 @@ def _make_request(self, conn, method, url, timeout=_Default, control over your timeouts. """ self.num_requests += 1 - conn.conn_kw = self.conn_kw timeout_obj = self._get_timeout(timeout) From aef559402b48b3ea1c6efcfa6b65f954c65a5c00 Mon Sep 17 00:00:00 2001 From: Bui Date: Sun, 9 Mar 2014 05:24:15 +0000 Subject: [PATCH 11/42] trying again --- urllib3/connectionpool.py | 1 + 1 file changed, 1 insertion(+) diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index 4058d74ca3..aa9daca67c 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -625,6 +625,7 @@ def _prepare_conn(self, conn): assert_hostname=self.assert_hostname, assert_fingerprint=self.assert_fingerprint) conn.ssl_version = self.ssl_version + conn.conn_kw = self.conn_kw if self.proxy is not None: # Python 2.7+ From 9793f69b150b1845e7f76aa04a42d8b59704693e Mon Sep 17 00:00:00 2001 From: Bui Date: Mon, 10 Mar 2014 00:35:21 +0000 Subject: [PATCH 12/42] I think I understand now --- urllib3/connection.py | 6 +++--- urllib3/connectionpool.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/urllib3/connection.py b/urllib3/connection.py index bff6e8dc3e..39bda3b1ac 100644 --- a/urllib3/connection.py +++ b/urllib3/connection.py @@ -109,12 +109,12 @@ class HTTPSConnection(HTTPConnection): def __init__(self, host, port=None, key_file=None, cert_file=None, strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, - **conn_kw): + source_address=None): HTTPConnection.__init__(self, host, port, strict=strict, timeout=timeout, - **conn_kw) + source_address=None) self.key_file = key_file self.cert_file = cert_file @@ -152,7 +152,7 @@ def connect(self): sock = socket.create_connection( address=(self.host, self.port), timeout=self.timeout, - **self.conn_kw + source_address=self.source_address ) except SocketTimeout: raise ConnectTimeoutError( diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index aa9daca67c..35f1ec4e34 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -625,7 +625,7 @@ def _prepare_conn(self, conn): assert_hostname=self.assert_hostname, assert_fingerprint=self.assert_fingerprint) conn.ssl_version = self.ssl_version - conn.conn_kw = self.conn_kw + conn.source_address = self.conn_kw['source_address'] if self.proxy is not None: # Python 2.7+ From 4a67d4d41a45344aea52bdee866e665865a04f96 Mon Sep 17 00:00:00 2001 From: Bui Date: Mon, 10 Mar 2014 00:41:02 +0000 Subject: [PATCH 13/42] didn't try without the argument --- urllib3/connectionpool.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index 35f1ec4e34..b62ef5bf1c 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -625,7 +625,10 @@ def _prepare_conn(self, conn): assert_hostname=self.assert_hostname, assert_fingerprint=self.assert_fingerprint) conn.ssl_version = self.ssl_version - conn.source_address = self.conn_kw['source_address'] + if 'source_address' in self.conn_kw: + conn.source_address = self.conn_kw['source_address'] + else: + conn.source_address = None if self.proxy is not None: # Python 2.7+ From 39c42b85d76cc003e5ddc17641c5af2b77cbd773 Mon Sep 17 00:00:00 2001 From: Bui Date: Mon, 10 Mar 2014 00:51:26 +0000 Subject: [PATCH 14/42] this might be the problem --- urllib3/connection.py | 3 ++- urllib3/connectionpool.py | 5 +---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/urllib3/connection.py b/urllib3/connection.py index 39bda3b1ac..694a12a1fd 100644 --- a/urllib3/connection.py +++ b/urllib3/connection.py @@ -133,6 +133,7 @@ class VerifiedHTTPSConnection(HTTPSConnection): cert_reqs = None ca_certs = None ssl_version = None + conn_kw = None def set_cert(self, key_file=None, cert_file=None, cert_reqs=None, ca_certs=None, @@ -152,7 +153,7 @@ def connect(self): sock = socket.create_connection( address=(self.host, self.port), timeout=self.timeout, - source_address=self.source_address + **self.conn_kw ) except SocketTimeout: raise ConnectTimeoutError( diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index b62ef5bf1c..aa9daca67c 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -625,10 +625,7 @@ def _prepare_conn(self, conn): assert_hostname=self.assert_hostname, assert_fingerprint=self.assert_fingerprint) conn.ssl_version = self.ssl_version - if 'source_address' in self.conn_kw: - conn.source_address = self.conn_kw['source_address'] - else: - conn.source_address = None + conn.conn_kw = self.conn_kw if self.proxy is not None: # Python 2.7+ From c5eb43fe924314ea2eeb2d37934f5eda15526f5e Mon Sep 17 00:00:00 2001 From: Bui Date: Mon, 10 Mar 2014 00:52:53 +0000 Subject: [PATCH 15/42] fix TypeError when there's no args --- urllib3/connection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/urllib3/connection.py b/urllib3/connection.py index 694a12a1fd..57e6f5fbc7 100644 --- a/urllib3/connection.py +++ b/urllib3/connection.py @@ -133,7 +133,7 @@ class VerifiedHTTPSConnection(HTTPSConnection): cert_reqs = None ca_certs = None ssl_version = None - conn_kw = None + conn_kw = {} def set_cert(self, key_file=None, cert_file=None, cert_reqs=None, ca_certs=None, From 04cb87bdbeff8ae5685bc3a5fd2afc70fb811aab Mon Sep 17 00:00:00 2001 From: Kevin Burke Date: Mon, 10 Mar 2014 17:44:25 -0700 Subject: [PATCH 16/42] Refactor urllib3/util.py into submodules --- test/test_util.py | 4 +- urllib3/util.py | 650 --------------------------------------- urllib3/util/__init__.py | 36 +++ urllib3/util/conn.py | 43 +++ urllib3/util/request.py | 68 ++++ urllib3/util/response.py | 13 + urllib3/util/ssl_.py | 133 ++++++++ urllib3/util/timeout.py | 234 ++++++++++++++ urllib3/util/url.py | 162 ++++++++++ 9 files changed, 691 insertions(+), 652 deletions(-) delete mode 100644 urllib3/util.py create mode 100644 urllib3/util/__init__.py create mode 100644 urllib3/util/conn.py create mode 100644 urllib3/util/request.py create mode 100644 urllib3/util/response.py create mode 100644 urllib3/util/ssl_.py create mode 100644 urllib3/util/timeout.py create mode 100644 urllib3/util/url.py diff --git a/test/test_util.py b/test/test_util.py index ebd3b5f7e6..0683199aa4 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -231,7 +231,7 @@ def test_invalid_timeouts(self): self.assertTrue('int or float' in str(e)) - @patch('urllib3.util.current_time') + @patch('urllib3.util.timeout.current_time') def test_timeout(self, current_time): timeout = Timeout(total=3) @@ -278,7 +278,7 @@ def test_timeout_str(self): self.assertEqual(str(timeout), "Timeout(connect=1, read=None, total=3)") - @patch('urllib3.util.current_time') + @patch('urllib3.util.timeout.current_time') def test_timeout_elapsed(self, current_time): current_time.return_value = TIMEOUT_EPOCH timeout = Timeout(total=3) diff --git a/urllib3/util.py b/urllib3/util.py deleted file mode 100644 index 3d2ef95a3e..0000000000 --- a/urllib3/util.py +++ /dev/null @@ -1,650 +0,0 @@ -# urllib3/util.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - - -from base64 import b64encode -from binascii import hexlify, unhexlify -from collections import namedtuple -from hashlib import md5, sha1 -from socket import error as SocketError, _GLOBAL_DEFAULT_TIMEOUT -import time - -try: - from select import poll, POLLIN -except ImportError: # `poll` doesn't exist on OSX and other platforms - poll = False - try: - from select import select - except ImportError: # `select` doesn't exist on AppEngine. - select = False - -try: # Test for SSL features - SSLContext = None - HAS_SNI = False - - import ssl - from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 - from ssl import SSLContext # Modern SSL? - from ssl import HAS_SNI # Has SNI? -except ImportError: - pass - -from .packages import six -from .exceptions import LocationParseError, SSLError, TimeoutStateError - -ACCEPT_ENCODING = 'gzip,deflate' - - -_Default = object() -# The default timeout to use for socket connections. This is the attribute used -# by httplib to define the default timeout - - -def current_time(): - """ - Retrieve the current time, this function is mocked out in unit testing. - """ - return time.time() - - -class Timeout(object): - """ - Utility object for storing timeout values. - - Example usage: - - .. code-block:: python - - timeout = urllib3.util.Timeout(connect=2.0, read=7.0) - pool = HTTPConnectionPool('www.google.com', 80, timeout=timeout) - pool.request(...) # Etc, etc - - :param connect: - The maximum amount of time to wait for a connection attempt to a server - to succeed. Omitting the parameter will default the connect timeout to - the system default, probably `the global default timeout in socket.py - `_. - None will set an infinite timeout for connection attempts. - - :type connect: integer, float, or None - - :param read: - The maximum amount of time to wait between consecutive - read operations for a response from the server. Omitting - the parameter will default the read timeout to the system - default, probably `the global default timeout in socket.py - `_. - None will set an infinite timeout. - - :type read: integer, float, or None - - :param total: - This combines the connect and read timeouts into one; the read timeout - will be set to the time leftover from the connect attempt. In the - event that both a connect timeout and a total are specified, or a read - timeout and a total are specified, the shorter timeout will be applied. - - Defaults to None. - - :type total: integer, float, or None - - .. note:: - - Many factors can affect the total amount of time for urllib3 to return - an HTTP response. Specifically, Python's DNS resolver does not obey the - timeout specified on the socket. Other factors that can affect total - request time include high CPU load, high swap, the program running at a - low priority level, or other behaviors. The observed running time for - urllib3 to return a response may be greater than the value passed to - `total`. - - In addition, the read and total timeouts only measure the time between - read operations on the socket connecting the client and the server, - not the total amount of time for the request to return a complete - response. For most requests, the timeout is raised because the server - has not sent the first byte in the specified time. This is not always - the case; if a server streams one byte every fifteen seconds, a timeout - of 20 seconds will not ever trigger, even though the request will - take several minutes to complete. - - If your goal is to cut off any request after a set amount of wall clock - time, consider having a second "watcher" thread to cut off a slow - request. - """ - - #: A sentinel object representing the default timeout value - DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT - - def __init__(self, total=None, connect=_Default, read=_Default): - self._connect = self._validate_timeout(connect, 'connect') - self._read = self._validate_timeout(read, 'read') - self.total = self._validate_timeout(total, 'total') - self._start_connect = None - - def __str__(self): - return '%s(connect=%r, read=%r, total=%r)' % ( - type(self).__name__, self._connect, self._read, self.total) - - - @classmethod - def _validate_timeout(cls, value, name): - """ Check that a timeout attribute is valid - - :param value: The timeout value to validate - :param name: The name of the timeout attribute to validate. This is used - for clear error messages - :return: the value - :raises ValueError: if the type is not an integer or a float, or if it - is a numeric value less than zero - """ - if value is _Default: - return cls.DEFAULT_TIMEOUT - - if value is None or value is cls.DEFAULT_TIMEOUT: - return value - - try: - float(value) - except (TypeError, ValueError): - raise ValueError("Timeout value %s was %s, but it must be an " - "int or float." % (name, value)) - - try: - if value < 0: - raise ValueError("Attempted to set %s timeout to %s, but the " - "timeout cannot be set to a value less " - "than 0." % (name, value)) - except TypeError: # Python 3 - raise ValueError("Timeout value %s was %s, but it must be an " - "int or float." % (name, value)) - - return value - - @classmethod - def from_float(cls, timeout): - """ Create a new Timeout from a legacy timeout value. - - The timeout value used by httplib.py sets the same timeout on the - connect(), and recv() socket requests. This creates a :class:`Timeout` - object that sets the individual timeouts to the ``timeout`` value passed - to this function. - - :param timeout: The legacy timeout value - :type timeout: integer, float, sentinel default object, or None - :return: a Timeout object - :rtype: :class:`Timeout` - """ - return Timeout(read=timeout, connect=timeout) - - def clone(self): - """ Create a copy of the timeout object - - Timeout properties are stored per-pool but each request needs a fresh - Timeout object to ensure each one has its own start/stop configured. - - :return: a copy of the timeout object - :rtype: :class:`Timeout` - """ - # We can't use copy.deepcopy because that will also create a new object - # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to - # detect the user default. - return Timeout(connect=self._connect, read=self._read, - total=self.total) - - def start_connect(self): - """ Start the timeout clock, used during a connect() attempt - - :raises urllib3.exceptions.TimeoutStateError: if you attempt - to start a timer that has been started already. - """ - if self._start_connect is not None: - raise TimeoutStateError("Timeout timer has already been started.") - self._start_connect = current_time() - return self._start_connect - - def get_connect_duration(self): - """ Gets the time elapsed since the call to :meth:`start_connect`. - - :return: the elapsed time - :rtype: float - :raises urllib3.exceptions.TimeoutStateError: if you attempt - to get duration for a timer that hasn't been started. - """ - if self._start_connect is None: - raise TimeoutStateError("Can't get connect duration for timer " - "that has not started.") - return current_time() - self._start_connect - - @property - def connect_timeout(self): - """ Get the value to use when setting a connection timeout. - - This will be a positive float or integer, the value None - (never timeout), or the default system timeout. - - :return: the connect timeout - :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None - """ - if self.total is None: - return self._connect - - if self._connect is None or self._connect is self.DEFAULT_TIMEOUT: - return self.total - - return min(self._connect, self.total) - - @property - def read_timeout(self): - """ Get the value for the read timeout. - - This assumes some time has elapsed in the connection timeout and - computes the read timeout appropriately. - - If self.total is set, the read timeout is dependent on the amount of - time taken by the connect timeout. If the connection time has not been - established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be - raised. - - :return: the value to use for the read timeout - :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None - :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect` - has not yet been called on this object. - """ - if (self.total is not None and - self.total is not self.DEFAULT_TIMEOUT and - self._read is not None and - self._read is not self.DEFAULT_TIMEOUT): - # in case the connect timeout has not yet been established. - if self._start_connect is None: - return self._read - return max(0, min(self.total - self.get_connect_duration(), - self._read)) - elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT: - return max(0, self.total - self.get_connect_duration()) - else: - return self._read - - -class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])): - """ - Datastructure for representing an HTTP URL. Used as a return value for - :func:`parse_url`. - """ - slots = () - - def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None): - return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment) - - @property - def hostname(self): - """For backwards-compatibility with urlparse. We're nice like that.""" - return self.host - - @property - def request_uri(self): - """Absolute path including the query string.""" - uri = self.path or '/' - - if self.query is not None: - uri += '?' + self.query - - return uri - - @property - def netloc(self): - """Network location including host and port""" - if self.port: - return '%s:%d' % (self.host, self.port) - return self.host - - -def split_first(s, delims): - """ - Given a string and an iterable of delimiters, split on the first found - delimiter. Return two split parts and the matched delimiter. - - If not found, then the first part is the full input string. - - Example: :: - - >>> split_first('foo/bar?baz', '?/=') - ('foo', 'bar?baz', '/') - >>> split_first('foo/bar?baz', '123') - ('foo/bar?baz', '', None) - - Scales linearly with number of delims. Not ideal for large number of delims. - """ - min_idx = None - min_delim = None - for d in delims: - idx = s.find(d) - if idx < 0: - continue - - if min_idx is None or idx < min_idx: - min_idx = idx - min_delim = d - - if min_idx is None or min_idx < 0: - return s, '', None - - return s[:min_idx], s[min_idx+1:], min_delim - - -def parse_url(url): - """ - Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is - performed to parse incomplete urls. Fields not provided will be None. - - Partly backwards-compatible with :mod:`urlparse`. - - Example: :: - - >>> parse_url('http://google.com/mail/') - Url(scheme='http', host='google.com', port=None, path='/', ...) - >>> parse_url('google.com:80') - Url(scheme=None, host='google.com', port=80, path=None, ...) - >>> parse_url('/foo?bar') - Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) - """ - - # While this code has overlap with stdlib's urlparse, it is much - # simplified for our needs and less annoying. - # Additionally, this implementations does silly things to be optimal - # on CPython. - - scheme = None - auth = None - host = None - port = None - path = None - fragment = None - query = None - - # Scheme - if '://' in url: - scheme, url = url.split('://', 1) - - # Find the earliest Authority Terminator - # (http://tools.ietf.org/html/rfc3986#section-3.2) - url, path_, delim = split_first(url, ['/', '?', '#']) - - if delim: - # Reassemble the path - path = delim + path_ - - # Auth - if '@' in url: - # Last '@' denotes end of auth part - auth, url = url.rsplit('@', 1) - - # IPv6 - if url and url[0] == '[': - host, url = url.split(']', 1) - host += ']' - - # Port - if ':' in url: - _host, port = url.split(':', 1) - - if not host: - host = _host - - if port: - # If given, ports must be integers. - if not port.isdigit(): - raise LocationParseError("Failed to parse: %s" % url) - port = int(port) - else: - # Blank ports are cool, too. (rfc3986#section-3.2.3) - port = None - - elif not host and url: - host = url - - if not path: - return Url(scheme, auth, host, port, path, query, fragment) - - # Fragment - if '#' in path: - path, fragment = path.split('#', 1) - - # Query - if '?' in path: - path, query = path.split('?', 1) - - return Url(scheme, auth, host, port, path, query, fragment) - - -def get_host(url): - """ - Deprecated. Use :func:`.parse_url` instead. - """ - p = parse_url(url) - return p.scheme or 'http', p.hostname, p.port - - -def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, - basic_auth=None, proxy_basic_auth=None): - """ - Shortcuts for generating request headers. - - :param keep_alive: - If ``True``, adds 'connection: keep-alive' header. - - :param accept_encoding: - Can be a boolean, list, or string. - ``True`` translates to 'gzip,deflate'. - List will get joined by comma. - String will be used as provided. - - :param user_agent: - String representing the user-agent you want, such as - "python-urllib3/0.6" - - :param basic_auth: - Colon-separated username:password string for 'authorization: basic ...' - auth header. - - :param proxy_basic_auth: - Colon-separated username:password string for 'proxy-authorization: basic ...' - auth header. - - Example: :: - - >>> make_headers(keep_alive=True, user_agent="Batman/1.0") - {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} - >>> make_headers(accept_encoding=True) - {'accept-encoding': 'gzip,deflate'} - """ - headers = {} - if accept_encoding: - if isinstance(accept_encoding, str): - pass - elif isinstance(accept_encoding, list): - accept_encoding = ','.join(accept_encoding) - else: - accept_encoding = ACCEPT_ENCODING - headers['accept-encoding'] = accept_encoding - - if user_agent: - headers['user-agent'] = user_agent - - if keep_alive: - headers['connection'] = 'keep-alive' - - if basic_auth: - headers['authorization'] = 'Basic ' + \ - b64encode(six.b(basic_auth)).decode('utf-8') - - if proxy_basic_auth: - headers['proxy-authorization'] = 'Basic ' + \ - b64encode(six.b(proxy_basic_auth)).decode('utf-8') - - return headers - - -def is_connection_dropped(conn): # Platform-specific - """ - Returns True if the connection is dropped and should be closed. - - :param conn: - :class:`httplib.HTTPConnection` object. - - Note: For platforms like AppEngine, this will always return ``False`` to - let the platform handle connection recycling transparently for us. - """ - sock = getattr(conn, 'sock', False) - if not sock: # Platform-specific: AppEngine - return False - - if not poll: - if not select: # Platform-specific: AppEngine - return False - - try: - return select([sock], [], [], 0.0)[0] - except SocketError: - return True - - # This version is better on platforms that support it. - p = poll() - p.register(sock, POLLIN) - for (fno, ev) in p.poll(0.0): - if fno == sock.fileno(): - # Either data is buffered (bad), or the connection is dropped. - return True - - -def resolve_cert_reqs(candidate): - """ - Resolves the argument to a numeric constant, which can be passed to - the wrap_socket function/method from the ssl module. - Defaults to :data:`ssl.CERT_NONE`. - If given a string it is assumed to be the name of the constant in the - :mod:`ssl` module or its abbrevation. - (So you can specify `REQUIRED` instead of `CERT_REQUIRED`. - If it's neither `None` nor a string we assume it is already the numeric - constant which can directly be passed to wrap_socket. - """ - if candidate is None: - return CERT_NONE - - if isinstance(candidate, str): - res = getattr(ssl, candidate, None) - if res is None: - res = getattr(ssl, 'CERT_' + candidate) - return res - - return candidate - - -def resolve_ssl_version(candidate): - """ - like resolve_cert_reqs - """ - if candidate is None: - return PROTOCOL_SSLv23 - - if isinstance(candidate, str): - res = getattr(ssl, candidate, None) - if res is None: - res = getattr(ssl, 'PROTOCOL_' + candidate) - return res - - return candidate - - -def assert_fingerprint(cert, fingerprint): - """ - Checks if given fingerprint matches the supplied certificate. - - :param cert: - Certificate as bytes object. - :param fingerprint: - Fingerprint as string of hexdigits, can be interspersed by colons. - """ - - # Maps the length of a digest to a possible hash function producing - # this digest. - hashfunc_map = { - 16: md5, - 20: sha1 - } - - fingerprint = fingerprint.replace(':', '').lower() - - digest_length, rest = divmod(len(fingerprint), 2) - - if rest or digest_length not in hashfunc_map: - raise SSLError('Fingerprint is of invalid length.') - - # We need encode() here for py32; works on py2 and p33. - fingerprint_bytes = unhexlify(fingerprint.encode()) - - hashfunc = hashfunc_map[digest_length] - - cert_digest = hashfunc(cert).digest() - - if not cert_digest == fingerprint_bytes: - raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".' - .format(hexlify(fingerprint_bytes), - hexlify(cert_digest))) - -def is_fp_closed(obj): - """ - Checks whether a given file-like object is closed. - - :param obj: - The file-like object to check. - """ - if hasattr(obj, 'fp'): - # Object is a container for another file-like object that gets released - # on exhaustion (e.g. HTTPResponse) - return obj.fp is None - - return obj.closed - - -if SSLContext is not None: # Python 3.2+ - def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, - ca_certs=None, server_hostname=None, - ssl_version=None): - """ - All arguments except `server_hostname` have the same meaning as for - :func:`ssl.wrap_socket` - - :param server_hostname: - Hostname of the expected certificate - """ - context = SSLContext(ssl_version) - context.verify_mode = cert_reqs - - # Disable TLS compression to migitate CRIME attack (issue #309) - OP_NO_COMPRESSION = 0x20000 - context.options |= OP_NO_COMPRESSION - - if ca_certs: - try: - context.load_verify_locations(ca_certs) - # Py32 raises IOError - # Py33 raises FileNotFoundError - except Exception as e: # Reraise as SSLError - raise SSLError(e) - if certfile: - # FIXME: This block needs a test. - context.load_cert_chain(certfile, keyfile) - if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI - return context.wrap_socket(sock, server_hostname=server_hostname) - return context.wrap_socket(sock) - -else: # Python 3.1 and earlier - def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, - ca_certs=None, server_hostname=None, - ssl_version=None): - return wrap_socket(sock, keyfile=keyfile, certfile=certfile, - ca_certs=ca_certs, cert_reqs=cert_reqs, - ssl_version=ssl_version) diff --git a/urllib3/util/__init__.py b/urllib3/util/__init__.py new file mode 100644 index 0000000000..328ba7f139 --- /dev/null +++ b/urllib3/util/__init__.py @@ -0,0 +1,36 @@ +# urllib3/util/__init__.py +# Copyright 2008-2014 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +# For backwards compatibility, allow you to access resources that used to exist +# here. +from .conn import is_connection_dropped + +from .request import make_headers + +from .response import is_fp_closed + +from .ssl_ import ( + assert_fingerprint, + resolve_cert_reqs, + resolve_ssl_version, + ssl_wrap_socket, +) +try: + from .ssl_ import SSLContext, HAS_SNI +except ImportError: + pass + +from .timeout import ( + current_time, + Timeout, +) + +from .url import ( + get_host, + parse_url, + split_first, + Url, +) diff --git a/urllib3/util/conn.py b/urllib3/util/conn.py new file mode 100644 index 0000000000..a5f62fed22 --- /dev/null +++ b/urllib3/util/conn.py @@ -0,0 +1,43 @@ +from socket import error as SocketError +try: + from select import poll, POLLIN +except ImportError: # `poll` doesn't exist on OSX and other platforms + poll = False + try: + from select import select + except ImportError: # `select` doesn't exist on AppEngine. + select = False + +def is_connection_dropped(conn): # Platform-specific + """ + Returns True if the connection is dropped and should be closed. + + :param conn: + :class:`httplib.HTTPConnection` object. + + Note: For platforms like AppEngine, this will always return ``False`` to + let the platform handle connection recycling transparently for us. + """ + sock = getattr(conn, 'sock', False) + if not sock: # Platform-specific: AppEngine + return False + + if not poll: + if not select: # Platform-specific: AppEngine + return False + + try: + return select([sock], [], [], 0.0)[0] + except SocketError: + return True + + # This version is better on platforms that support it. + p = poll() + p.register(sock, POLLIN) + for (fno, ev) in p.poll(0.0): + if fno == sock.fileno(): + # Either data is buffered (bad), or the connection is dropped. + return True + + + diff --git a/urllib3/util/request.py b/urllib3/util/request.py new file mode 100644 index 0000000000..d48d6513b1 --- /dev/null +++ b/urllib3/util/request.py @@ -0,0 +1,68 @@ +from base64 import b64encode + +from ..packages import six + + +ACCEPT_ENCODING = 'gzip,deflate' + + +def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, + basic_auth=None, proxy_basic_auth=None): + """ + Shortcuts for generating request headers. + + :param keep_alive: + If ``True``, adds 'connection: keep-alive' header. + + :param accept_encoding: + Can be a boolean, list, or string. + ``True`` translates to 'gzip,deflate'. + List will get joined by comma. + String will be used as provided. + + :param user_agent: + String representing the user-agent you want, such as + "python-urllib3/0.6" + + :param basic_auth: + Colon-separated username:password string for 'authorization: basic ...' + auth header. + + :param proxy_basic_auth: + Colon-separated username:password string for 'proxy-authorization: basic ...' + auth header. + + Example: :: + + >>> make_headers(keep_alive=True, user_agent="Batman/1.0") + {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} + >>> make_headers(accept_encoding=True) + {'accept-encoding': 'gzip,deflate'} + """ + headers = {} + if accept_encoding: + if isinstance(accept_encoding, str): + pass + elif isinstance(accept_encoding, list): + accept_encoding = ','.join(accept_encoding) + else: + accept_encoding = ACCEPT_ENCODING + headers['accept-encoding'] = accept_encoding + + if user_agent: + headers['user-agent'] = user_agent + + if keep_alive: + headers['connection'] = 'keep-alive' + + if basic_auth: + headers['authorization'] = 'Basic ' + \ + b64encode(six.b(basic_auth)).decode('utf-8') + + if proxy_basic_auth: + headers['proxy-authorization'] = 'Basic ' + \ + b64encode(six.b(proxy_basic_auth)).decode('utf-8') + + return headers + + diff --git a/urllib3/util/response.py b/urllib3/util/response.py new file mode 100644 index 0000000000..d0325bc6b5 --- /dev/null +++ b/urllib3/util/response.py @@ -0,0 +1,13 @@ +def is_fp_closed(obj): + """ + Checks whether a given file-like object is closed. + + :param obj: + The file-like object to check. + """ + if hasattr(obj, 'fp'): + # Object is a container for another file-like object that gets released + # on exhaustion (e.g. HTTPResponse) + return obj.fp is None + + return obj.closed diff --git a/urllib3/util/ssl_.py b/urllib3/util/ssl_.py new file mode 100644 index 0000000000..dee4b87629 --- /dev/null +++ b/urllib3/util/ssl_.py @@ -0,0 +1,133 @@ +from binascii import hexlify, unhexlify +from hashlib import md5, sha1 + +from ..exceptions import SSLError + + +try: # Test for SSL features + SSLContext = None + HAS_SNI = False + + import ssl + from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 + from ssl import SSLContext # Modern SSL? + from ssl import HAS_SNI # Has SNI? +except ImportError: + pass + + +def assert_fingerprint(cert, fingerprint): + """ + Checks if given fingerprint matches the supplied certificate. + + :param cert: + Certificate as bytes object. + :param fingerprint: + Fingerprint as string of hexdigits, can be interspersed by colons. + """ + + # Maps the length of a digest to a possible hash function producing + # this digest. + hashfunc_map = { + 16: md5, + 20: sha1 + } + + fingerprint = fingerprint.replace(':', '').lower() + + digest_length, rest = divmod(len(fingerprint), 2) + + if rest or digest_length not in hashfunc_map: + raise SSLError('Fingerprint is of invalid length.') + + # We need encode() here for py32; works on py2 and p33. + fingerprint_bytes = unhexlify(fingerprint.encode()) + + hashfunc = hashfunc_map[digest_length] + + cert_digest = hashfunc(cert).digest() + + if not cert_digest == fingerprint_bytes: + raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".' + .format(hexlify(fingerprint_bytes), + hexlify(cert_digest))) + + +def resolve_cert_reqs(candidate): + """ + Resolves the argument to a numeric constant, which can be passed to + the wrap_socket function/method from the ssl module. + Defaults to :data:`ssl.CERT_NONE`. + If given a string it is assumed to be the name of the constant in the + :mod:`ssl` module or its abbrevation. + (So you can specify `REQUIRED` instead of `CERT_REQUIRED`. + If it's neither `None` nor a string we assume it is already the numeric + constant which can directly be passed to wrap_socket. + """ + if candidate is None: + return CERT_NONE + + if isinstance(candidate, str): + res = getattr(ssl, candidate, None) + if res is None: + res = getattr(ssl, 'CERT_' + candidate) + return res + + return candidate + + +def resolve_ssl_version(candidate): + """ + like resolve_cert_reqs + """ + if candidate is None: + return PROTOCOL_SSLv23 + + if isinstance(candidate, str): + res = getattr(ssl, candidate, None) + if res is None: + res = getattr(ssl, 'PROTOCOL_' + candidate) + return res + + return candidate + + +if SSLContext is not None: # Python 3.2+ + def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, + ca_certs=None, server_hostname=None, + ssl_version=None): + """ + All arguments except `server_hostname` have the same meaning as for + :func:`ssl.wrap_socket` + + :param server_hostname: + Hostname of the expected certificate + """ + context = SSLContext(ssl_version) + context.verify_mode = cert_reqs + + # Disable TLS compression to migitate CRIME attack (issue #309) + OP_NO_COMPRESSION = 0x20000 + context.options |= OP_NO_COMPRESSION + + if ca_certs: + try: + context.load_verify_locations(ca_certs) + # Py32 raises IOError + # Py33 raises FileNotFoundError + except Exception as e: # Reraise as SSLError + raise SSLError(e) + if certfile: + # FIXME: This block needs a test. + context.load_cert_chain(certfile, keyfile) + if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI + return context.wrap_socket(sock, server_hostname=server_hostname) + return context.wrap_socket(sock) + +else: # Python 3.1 and earlier + def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, + ca_certs=None, server_hostname=None, + ssl_version=None): + return wrap_socket(sock, keyfile=keyfile, certfile=certfile, + ca_certs=ca_certs, cert_reqs=cert_reqs, + ssl_version=ssl_version) diff --git a/urllib3/util/timeout.py b/urllib3/util/timeout.py new file mode 100644 index 0000000000..4f947cb249 --- /dev/null +++ b/urllib3/util/timeout.py @@ -0,0 +1,234 @@ +from socket import _GLOBAL_DEFAULT_TIMEOUT +import time + +from ..exceptions import TimeoutStateError + + +def current_time(): + """ + Retrieve the current time, this function is mocked out in unit testing. + """ + return time.time() + + +_Default = object() +# The default timeout to use for socket connections. This is the attribute used +# by httplib to define the default timeout + + +class Timeout(object): + """ + Utility object for storing timeout values. + + Example usage: + + .. code-block:: python + + timeout = urllib3.util.Timeout(connect=2.0, read=7.0) + pool = HTTPConnectionPool('www.google.com', 80, timeout=timeout) + pool.request(...) # Etc, etc + + :param connect: + The maximum amount of time to wait for a connection attempt to a server + to succeed. Omitting the parameter will default the connect timeout to + the system default, probably `the global default timeout in socket.py + `_. + None will set an infinite timeout for connection attempts. + + :type connect: integer, float, or None + + :param read: + The maximum amount of time to wait between consecutive + read operations for a response from the server. Omitting + the parameter will default the read timeout to the system + default, probably `the global default timeout in socket.py + `_. + None will set an infinite timeout. + + :type read: integer, float, or None + + :param total: + This combines the connect and read timeouts into one; the read timeout + will be set to the time leftover from the connect attempt. In the + event that both a connect timeout and a total are specified, or a read + timeout and a total are specified, the shorter timeout will be applied. + + Defaults to None. + + :type total: integer, float, or None + + .. note:: + + Many factors can affect the total amount of time for urllib3 to return + an HTTP response. Specifically, Python's DNS resolver does not obey the + timeout specified on the socket. Other factors that can affect total + request time include high CPU load, high swap, the program running at a + low priority level, or other behaviors. The observed running time for + urllib3 to return a response may be greater than the value passed to + `total`. + + In addition, the read and total timeouts only measure the time between + read operations on the socket connecting the client and the server, + not the total amount of time for the request to return a complete + response. For most requests, the timeout is raised because the server + has not sent the first byte in the specified time. This is not always + the case; if a server streams one byte every fifteen seconds, a timeout + of 20 seconds will not ever trigger, even though the request will + take several minutes to complete. + + If your goal is to cut off any request after a set amount of wall clock + time, consider having a second "watcher" thread to cut off a slow + request. + """ + + #: A sentinel object representing the default timeout value + DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT + + def __init__(self, total=None, connect=_Default, read=_Default): + self._connect = self._validate_timeout(connect, 'connect') + self._read = self._validate_timeout(read, 'read') + self.total = self._validate_timeout(total, 'total') + self._start_connect = None + + def __str__(self): + return '%s(connect=%r, read=%r, total=%r)' % ( + type(self).__name__, self._connect, self._read, self.total) + + + @classmethod + def _validate_timeout(cls, value, name): + """ Check that a timeout attribute is valid + + :param value: The timeout value to validate + :param name: The name of the timeout attribute to validate. This is used + for clear error messages + :return: the value + :raises ValueError: if the type is not an integer or a float, or if it + is a numeric value less than zero + """ + if value is _Default: + return cls.DEFAULT_TIMEOUT + + if value is None or value is cls.DEFAULT_TIMEOUT: + return value + + try: + float(value) + except (TypeError, ValueError): + raise ValueError("Timeout value %s was %s, but it must be an " + "int or float." % (name, value)) + + try: + if value < 0: + raise ValueError("Attempted to set %s timeout to %s, but the " + "timeout cannot be set to a value less " + "than 0." % (name, value)) + except TypeError: # Python 3 + raise ValueError("Timeout value %s was %s, but it must be an " + "int or float." % (name, value)) + + return value + + @classmethod + def from_float(cls, timeout): + """ Create a new Timeout from a legacy timeout value. + + The timeout value used by httplib.py sets the same timeout on the + connect(), and recv() socket requests. This creates a :class:`Timeout` + object that sets the individual timeouts to the ``timeout`` value passed + to this function. + + :param timeout: The legacy timeout value + :type timeout: integer, float, sentinel default object, or None + :return: a Timeout object + :rtype: :class:`Timeout` + """ + return Timeout(read=timeout, connect=timeout) + + def clone(self): + """ Create a copy of the timeout object + + Timeout properties are stored per-pool but each request needs a fresh + Timeout object to ensure each one has its own start/stop configured. + + :return: a copy of the timeout object + :rtype: :class:`Timeout` + """ + # We can't use copy.deepcopy because that will also create a new object + # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to + # detect the user default. + return Timeout(connect=self._connect, read=self._read, + total=self.total) + + def start_connect(self): + """ Start the timeout clock, used during a connect() attempt + + :raises urllib3.exceptions.TimeoutStateError: if you attempt + to start a timer that has been started already. + """ + if self._start_connect is not None: + raise TimeoutStateError("Timeout timer has already been started.") + self._start_connect = current_time() + return self._start_connect + + def get_connect_duration(self): + """ Gets the time elapsed since the call to :meth:`start_connect`. + + :return: the elapsed time + :rtype: float + :raises urllib3.exceptions.TimeoutStateError: if you attempt + to get duration for a timer that hasn't been started. + """ + if self._start_connect is None: + raise TimeoutStateError("Can't get connect duration for timer " + "that has not started.") + return current_time() - self._start_connect + + @property + def connect_timeout(self): + """ Get the value to use when setting a connection timeout. + + This will be a positive float or integer, the value None + (never timeout), or the default system timeout. + + :return: the connect timeout + :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None + """ + if self.total is None: + return self._connect + + if self._connect is None or self._connect is self.DEFAULT_TIMEOUT: + return self.total + + return min(self._connect, self.total) + + @property + def read_timeout(self): + """ Get the value for the read timeout. + + This assumes some time has elapsed in the connection timeout and + computes the read timeout appropriately. + + If self.total is set, the read timeout is dependent on the amount of + time taken by the connect timeout. If the connection time has not been + established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be + raised. + + :return: the value to use for the read timeout + :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None + :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect` + has not yet been called on this object. + """ + if (self.total is not None and + self.total is not self.DEFAULT_TIMEOUT and + self._read is not None and + self._read is not self.DEFAULT_TIMEOUT): + # in case the connect timeout has not yet been established. + if self._start_connect is None: + return self._read + return max(0, min(self.total - self.get_connect_duration(), + self._read)) + elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT: + return max(0, self.total - self.get_connect_duration()) + else: + return self._read diff --git a/urllib3/util/url.py b/urllib3/util/url.py new file mode 100644 index 0000000000..fbd96bc241 --- /dev/null +++ b/urllib3/util/url.py @@ -0,0 +1,162 @@ +from collections import namedtuple + +from ..exceptions import LocationParseError + + +class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])): + """ + Datastructure for representing an HTTP URL. Used as a return value for + :func:`parse_url`. + """ + slots = () + + def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None): + return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment) + + @property + def hostname(self): + """For backwards-compatibility with urlparse. We're nice like that.""" + return self.host + + @property + def request_uri(self): + """Absolute path including the query string.""" + uri = self.path or '/' + + if self.query is not None: + uri += '?' + self.query + + return uri + + @property + def netloc(self): + """Network location including host and port""" + if self.port: + return '%s:%d' % (self.host, self.port) + return self.host + + +def split_first(s, delims): + """ + Given a string and an iterable of delimiters, split on the first found + delimiter. Return two split parts and the matched delimiter. + + If not found, then the first part is the full input string. + + Example: :: + + >>> split_first('foo/bar?baz', '?/=') + ('foo', 'bar?baz', '/') + >>> split_first('foo/bar?baz', '123') + ('foo/bar?baz', '', None) + + Scales linearly with number of delims. Not ideal for large number of delims. + """ + min_idx = None + min_delim = None + for d in delims: + idx = s.find(d) + if idx < 0: + continue + + if min_idx is None or idx < min_idx: + min_idx = idx + min_delim = d + + if min_idx is None or min_idx < 0: + return s, '', None + + return s[:min_idx], s[min_idx+1:], min_delim + + +def parse_url(url): + """ + Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is + performed to parse incomplete urls. Fields not provided will be None. + + Partly backwards-compatible with :mod:`urlparse`. + + Example: :: + + >>> parse_url('http://google.com/mail/') + Url(scheme='http', host='google.com', port=None, path='/', ...) + >>> parse_url('google.com:80') + Url(scheme=None, host='google.com', port=80, path=None, ...) + >>> parse_url('/foo?bar') + Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) + """ + + # While this code has overlap with stdlib's urlparse, it is much + # simplified for our needs and less annoying. + # Additionally, this implementations does silly things to be optimal + # on CPython. + + scheme = None + auth = None + host = None + port = None + path = None + fragment = None + query = None + + # Scheme + if '://' in url: + scheme, url = url.split('://', 1) + + # Find the earliest Authority Terminator + # (http://tools.ietf.org/html/rfc3986#section-3.2) + url, path_, delim = split_first(url, ['/', '?', '#']) + + if delim: + # Reassemble the path + path = delim + path_ + + # Auth + if '@' in url: + # Last '@' denotes end of auth part + auth, url = url.rsplit('@', 1) + + # IPv6 + if url and url[0] == '[': + host, url = url.split(']', 1) + host += ']' + + # Port + if ':' in url: + _host, port = url.split(':', 1) + + if not host: + host = _host + + if port: + # If given, ports must be integers. + if not port.isdigit(): + raise LocationParseError("Failed to parse: %s" % url) + port = int(port) + else: + # Blank ports are cool, too. (rfc3986#section-3.2.3) + port = None + + elif not host and url: + host = url + + if not path: + return Url(scheme, auth, host, port, path, query, fragment) + + # Fragment + if '#' in path: + path, fragment = path.split('#', 1) + + # Query + if '?' in path: + path, query = path.split('?', 1) + + return Url(scheme, auth, host, port, path, query, fragment) + + +def get_host(url): + """ + Deprecated. Use :func:`.parse_url` instead. + """ + p = parse_url(url) + return p.scheme or 'http', p.hostname, p.port From ea7e79eb444b95392abe1006d3dbf85ce041f1c0 Mon Sep 17 00:00:00 2001 From: Tahia Khan Date: Tue, 11 Mar 2014 20:10:33 -0400 Subject: [PATCH 17/42] Updating Timeout examples with pr comments --- docs/index.rst | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 8f53e349b8..ce543e0e79 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -155,30 +155,29 @@ of :class:`httplib.HTTPConnection` objects. Timeout ------- -A timeout can be set to block socket operations on individual connections. This -can be done with a float or integer, which sets the timeout for the entire HTTP -request, or an instance of :class:`~urllib3.util.Timeout` which will give you -more granular control over how much time is given to different stages of the request. -More specifically, the Timeout object can be used to granularly set a specific -timeout for connections and reads on the request. +A timeout can be set to abort socket operations on individual connections after +a specified duration. This can be done with a float or integer, which sets the +timeout for the entire HTTP request, or an instance of :class:`~urllib3.util.Timeout` +which will give you more granular control over how much time is given to different +stages of the request. :: - >>> timeout1 = 7.0 #specifies 7.0 for both connect and read - >>> pool1 = HTTPConnectionPool('ajax.googleapis.com', timeout=timeout1) - >>> pool1.request(...) # Etc, etc + >>> # Timeout on pool with 7.0 for both connect and read. + >>> pool = HTTPConnectionPool('ajax.googleapis.com', timeout=7.0) + >>> pool.request(...) - >>> timeout2 = urllib3.util.Timeout(read=5) #specifies infinite timeout for connections and 5 for read - >>> pool2 = HTTPConnectionPool('ajax.googleapis.com', timeout=timeout2) - >>> pool2.request(...) # Etc, etc + >>> # Timeout object on pool with infinite timeout for connections and 5 for read. + >>> pool = HTTPConnectionPool('ajax.googleapis.com', timeout=urllib3.util.Timeout(read=5)) + >>> pool.request(...) - >>> timeout2 = urllib3.util.Timeout(connect=3, read=5) #specifies 3 for connect and 5 for read - >>> pool2 = HTTPConnectionPool('ajax.googleapis.com', timeout=timeout2) - >>> pool2.request(...) # Etc, etc + >>> # Timeout object on request with 3 for connect and 5 for read. + >>> pool = HTTPConnectionPool('ajax.googleapis.com', maxsize=1) + >>> r = pool.request('GET', '/ajax/services/search/web', timeout=urllib3.util.Timeout(connect=3, read=5)) - >>> timeout3 = urllib3.util.Timeout(total=7.5) #specifies 7.5 for read and connect combined - >>> pool3 = HTTPConnectionPool('ajax.googleapis.com', timeout=timeout3) - >>> pool3.request(...) # Etc, etc + >>> # Timeout object on request with 7.5 for read and connect combined. + >>> pool = HTTPConnectionPool('ajax.googleapis.com', maxsize=1) + >>> r = pool.request('GET', '/ajax/services/search/web', timeout=urllib3.util.Timeout(total=7.5)) Foundation ---------- From ed885b2d87585e2e7efa637f434463dd3759c357 Mon Sep 17 00:00:00 2001 From: Tahia Khan Date: Tue, 11 Mar 2014 22:49:24 -0400 Subject: [PATCH 18/42] Updating CONTRIBUTORS.txt --- CONTRIBUTORS.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 59d3ad62ac..d93dd9f03b 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -104,5 +104,8 @@ In chronological order: * Roman Bogorodskiy * Account retries on proxy errors +* Tahia Khan + * Added Timeout examples in docs + * [Your name or handle] <[email or website]> * [Brief summary of your changes] From 357cf186fc4a78c5c721240dc33815607f33b7b5 Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Fri, 14 Mar 2014 20:25:48 -0700 Subject: [PATCH 19/42] Some docs tweaks. --- docs/index.rst | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index ce543e0e79..9bee57c4a4 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -155,29 +155,31 @@ of :class:`httplib.HTTPConnection` objects. Timeout ------- -A timeout can be set to abort socket operations on individual connections after -a specified duration. This can be done with a float or integer, which sets the -timeout for the entire HTTP request, or an instance of :class:`~urllib3.util.Timeout` -which will give you more granular control over how much time is given to different -stages of the request. +A timeout can be set to abort socket operations on individual connections +after the specified duration. The timeout can be defined as a float or an instance of +:class:`~urllib3.util.Timeout` which gives more granular configuration over how +much time is allowed for different stages of the request. This can be set for +the entire pool or per-request. :: + >>> from urllib3 import PoolManager, Timeout - >>> # Timeout on pool with 7.0 for both connect and read. - >>> pool = HTTPConnectionPool('ajax.googleapis.com', timeout=7.0) - >>> pool.request(...) + >>> # Manager with 3 seconds combined timeout. + >>> http = PoolManager(timeout=3.0) + >>> r = http.request('GET', 'http://httpbin.org/delay/1') - >>> # Timeout object on pool with infinite timeout for connections and 5 for read. - >>> pool = HTTPConnectionPool('ajax.googleapis.com', timeout=urllib3.util.Timeout(read=5)) - >>> pool.request(...) + >>> # Manager with 2 second timeout for the read phase, no limit for the rest. + >>> http = PoolManager(timeout=Timeout(read=2.0)) + >>> r = http.request('GET', 'http://httpbin.org/delay/1') - >>> # Timeout object on request with 3 for connect and 5 for read. - >>> pool = HTTPConnectionPool('ajax.googleapis.com', maxsize=1) - >>> r = pool.request('GET', '/ajax/services/search/web', timeout=urllib3.util.Timeout(connect=3, read=5)) + >>> # Manager with no timeout but a request with a timeout of 1 seconds for + >>> # the connect phase and 2 seconds for the read phase. + >>> http = PoolManager() + >>> r = http.request('GET', 'http://httpbin.org/delay/1', timeout=Timeout(connect=1.0, read=2.0)) + + >>> # Same Manager but request with a 5 second total timeout. + >>> r = http.request('GET', 'http://httpbin.org/delay/1', timeout=Timeout(total=5.0)) - >>> # Timeout object on request with 7.5 for read and connect combined. - >>> pool = HTTPConnectionPool('ajax.googleapis.com', maxsize=1) - >>> r = pool.request('GET', '/ajax/services/search/web', timeout=urllib3.util.Timeout(total=7.5)) Foundation ---------- From 627fc7a897e8e52b59d8ca2eb3d5ebccbe66ac7a Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sat, 15 Mar 2014 11:05:26 -0700 Subject: [PATCH 20/42] Fixed AppEngine bug of HTTPS requests going out as HTTP (closes #356) --- CHANGES.rst | 2 ++ urllib3/connection.py | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index bf888c23db..1f6dab4d46 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,6 +4,8 @@ Changes dev (master) ++++++++++++ +* Fix AppEngine bug of HTTPS requests going out as HTTP. (Issue #356) + * ... [Short description of non-trivial change.] (Issue #) diff --git a/urllib3/connection.py b/urllib3/connection.py index 662bd2e454..afb0897f6c 100644 --- a/urllib3/connection.py +++ b/urllib3/connection.py @@ -119,6 +119,10 @@ def __init__(self, host, port=None, key_file=None, cert_file=None, self.key_file = key_file self.cert_file = cert_file + # Required property for Google AppEngine 1.9.0 which otherwise causes + # HTTPS requests to go out as HTTP. (See Issue #356) + self._protocol = 'https' + def connect(self): conn = self._new_conn() self._prepare_conn(conn) From f8976bdd8c7dbf08768f7a5efd16d0a9afec0d2e Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sat, 15 Mar 2014 17:20:35 -0700 Subject: [PATCH 21/42] util.conn -> util.connection and import cleanup --- urllib3/util/__init__.py | 15 +++------------ urllib3/util/{conn.py => connection.py} | 0 2 files changed, 3 insertions(+), 12 deletions(-) rename urllib3/util/{conn.py => connection.py} (100%) diff --git a/urllib3/util/__init__.py b/urllib3/util/__init__.py index 328ba7f139..a40185eeaf 100644 --- a/urllib3/util/__init__.py +++ b/urllib3/util/__init__.py @@ -4,30 +4,21 @@ # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php -# For backwards compatibility, allow you to access resources that used to exist -# here. -from .conn import is_connection_dropped - +from .connection import is_connection_dropped from .request import make_headers - from .response import is_fp_closed - from .ssl_ import ( + SSLContext, + HAS_SNI, assert_fingerprint, resolve_cert_reqs, resolve_ssl_version, ssl_wrap_socket, ) -try: - from .ssl_ import SSLContext, HAS_SNI -except ImportError: - pass - from .timeout import ( current_time, Timeout, ) - from .url import ( get_host, parse_url, diff --git a/urllib3/util/conn.py b/urllib3/util/connection.py similarity index 100% rename from urllib3/util/conn.py rename to urllib3/util/connection.py From 62ecd1523ec383802cb13b09bd7084d2da997420 Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sat, 15 Mar 2014 17:22:07 -0700 Subject: [PATCH 22/42] Docs reflow. --- docs/index.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 9bee57c4a4..ad7cb0a7ab 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -155,11 +155,11 @@ of :class:`httplib.HTTPConnection` objects. Timeout ------- -A timeout can be set to abort socket operations on individual connections -after the specified duration. The timeout can be defined as a float or an instance of -:class:`~urllib3.util.Timeout` which gives more granular configuration over how -much time is allowed for different stages of the request. This can be set for -the entire pool or per-request. +A timeout can be set to abort socket operations on individual connections after +the specified duration. The timeout can be defined as a float or an instance of +:class:`~urllib3.util.timeout.Timeout` which gives more granular configuration +over how much time is allowed for different stages of the request. This can be +set for the entire pool or per-request. :: >>> from urllib3 import PoolManager, Timeout From 7c7e78ca966d32860b2b7d616cf7e8a74d53ad10 Mon Sep 17 00:00:00 2001 From: Kevin Burke Date: Sun, 16 Mar 2014 14:39:26 +0800 Subject: [PATCH 23/42] Raise LocationParseError if host is None Also fixes double-encoding of the error message "Failed to parse:" for LocationParseErrors, and adds tests that parse_url and poolmanager behave in the appropriate ways. --- test/test_poolmanager.py | 8 +++++++- test/test_util.py | 2 ++ urllib3/connectionpool.py | 6 +++++- urllib3/util/url.py | 2 +- 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/test/test_poolmanager.py b/test/test_poolmanager.py index 2faab9444d..759b5e3357 100644 --- a/test/test_poolmanager.py +++ b/test/test_poolmanager.py @@ -2,7 +2,10 @@ from urllib3.poolmanager import PoolManager from urllib3 import connection_from_url -from urllib3.exceptions import ClosedPoolError +from urllib3.exceptions import ( + ClosedPoolError, + LocationParseError, +) class TestPoolManager(unittest.TestCase): @@ -63,6 +66,9 @@ def test_manager_clear(self): self.assertEqual(len(p.pools), 0) + def test_nohost(self): + p = PoolManager(5) + self.assertRaises(LocationParseError, p.connection_from_url, 'http://@') if __name__ == '__main__': diff --git a/test/test_util.py b/test/test_util.py index 0683199aa4..5dcaeabdde 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -77,6 +77,7 @@ def test_invalid_host(self): for location in invalid_host: self.assertRaises(LocationParseError, get_host, location) + def test_parse_url(self): url_host_map = { 'http://google.com/mail': Url('http', host='google.com', path='/mail'), @@ -107,6 +108,7 @@ def test_parse_url(self): 'http://foo:bar@localhost/': Url('http', auth='foo:bar', host='localhost', path='/'), 'http://foo@localhost/': Url('http', auth='foo', host='localhost', path='/'), 'http://foo:bar@baz@localhost/': Url('http', auth='foo:bar@baz', host='localhost', path='/'), + 'http://@': Url('http', host=None, auth='') } for url, expected_url in url_host_map.items(): returned_url = parse_url(url) diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index 6d0dbb184c..ecfc79140b 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -23,6 +23,7 @@ ConnectTimeoutError, EmptyPoolError, HostChangedError, + LocationParseError, MaxRetryError, SSLError, TimeoutError, @@ -40,7 +41,6 @@ from .request import RequestMethods from .response import HTTPResponse from .util import ( - assert_fingerprint, get_host, is_connection_dropped, Timeout, @@ -65,6 +65,10 @@ class ConnectionPool(object): QueueCls = LifoQueue def __init__(self, host, port=None): + + if host is None: + raise LocationParseError(host) + # httplib doesn't like it when we include brackets in ipv6 addresses host = host.strip('[]') diff --git a/urllib3/util/url.py b/urllib3/util/url.py index fbd96bc241..362d216089 100644 --- a/urllib3/util/url.py +++ b/urllib3/util/url.py @@ -131,7 +131,7 @@ def parse_url(url): if port: # If given, ports must be integers. if not port.isdigit(): - raise LocationParseError("Failed to parse: %s" % url) + raise LocationParseError(url) port = int(port) else: # Blank ports are cool, too. (rfc3986#section-3.2.3) From 2d5145ad3ed8d4e194e7a49a11c25beb61a5daa9 Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Sun, 16 Mar 2014 01:51:37 -0700 Subject: [PATCH 24/42] Formatting. --- urllib3/connectionpool.py | 1 - 1 file changed, 1 deletion(-) diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index ecfc79140b..ac96165264 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -65,7 +65,6 @@ class ConnectionPool(object): QueueCls = LifoQueue def __init__(self, host, port=None): - if host is None: raise LocationParseError(host) From 3501c67bf53765ff786aacfb042d67283ff8fc40 Mon Sep 17 00:00:00 2001 From: Kevin Burke Date: Sun, 16 Mar 2014 19:58:07 +0800 Subject: [PATCH 25/42] Update pyopenssl docs --- urllib3/contrib/pyopenssl.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/urllib3/contrib/pyopenssl.py b/urllib3/contrib/pyopenssl.py index 7c513f3aec..21a12c68ad 100644 --- a/urllib3/contrib/pyopenssl.py +++ b/urllib3/contrib/pyopenssl.py @@ -1,4 +1,7 @@ -'''SSL with SNI_-support for Python 2. +'''SSL with SNI_-support for Python 2. Follow these instructions if you would +like to verify SSL certificates in Python 2. Note, the default libraries do +*not* do certificate checking; you need to do additional work to validate +certificates yourself. This needs the following packages installed: @@ -6,9 +9,15 @@ * ndg-httpsclient (tested with 0.3.2) * pyasn1 (tested with 0.1.6) -To activate it call :func:`~urllib3.contrib.pyopenssl.inject_into_urllib3`. -This can be done in a ``sitecustomize`` module, or at any other time before -your application begins using ``urllib3``, like this:: +You can install them with the following command: + + pip install pyopenssl ndg-httpsclient pyasn1 + +To activate certificate checking, call +:func:`~urllib3.contrib.pyopenssl.inject_into_urllib3` from your Python code +before you begin making HTTP requests. This can be done in a ``sitecustomize`` +module, or at any other time before your application begins using ``urllib3``, +like this:: try: import urllib3.contrib.pyopenssl From 10b83d7d399046fb74a30306d4597d1aeb3f9422 Mon Sep 17 00:00:00 2001 From: Kevin Burke Date: Sun, 16 Mar 2014 20:02:08 +0800 Subject: [PATCH 26/42] add doc-requirements file, for sphinx --- docs/doc-requirements.txt | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 docs/doc-requirements.txt diff --git a/docs/doc-requirements.txt b/docs/doc-requirements.txt new file mode 100644 index 0000000000..c5a55ad8ff --- /dev/null +++ b/docs/doc-requirements.txt @@ -0,0 +1,11 @@ +ndg-httpsclient==0.3.2 +pyasn1==0.1.7 +Sphinx==1.2.2 +Jinja2==2.7.2 +MarkupSafe==0.19 +Pygments==1.6 +cryptography==0.2.2 +six==1.6.1 +cffi==0.8.2 +docutils==0.11 +pycparser==2.10 From ed7fbe81c593a3069dae247a1fa1df3fb887a17b Mon Sep 17 00:00:00 2001 From: Kevin Burke Date: Thu, 20 Mar 2014 16:04:44 +0800 Subject: [PATCH 27/42] Fix up grammar in test comment --- test/with_dummyserver/test_https.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/test/with_dummyserver/test_https.py b/test/with_dummyserver/test_https.py index 18b17ca601..9d6c62cb0a 100644 --- a/test/with_dummyserver/test_https.py +++ b/test/with_dummyserver/test_https.py @@ -121,14 +121,14 @@ def test_ssl_unverified_with_ca_certs(self): @requires_network def test_ssl_verified_with_platform_ca_certs(self): """ - This test check that whe rely on platform CA file to validate - authenticity of SSL certificate. Since this file is used by many - components of the OS, such as curl, apt-get, etc., we decided to not - touch it, in order to not compromise the security of the OS - running the test suite (typically urllib3 developer's OS). - - This test assume that httpbin.org use a certificate signed - by a well known Certificate Authority. + We should rely on the platform CA file to validate authenticity of SSL + certificates. Since this file is used by many components of the OS, + such as curl, apt-get, etc., we decided to not touch it, in order to + not compromise the security of the OS running the test suite (typically + urllib3 developer's OS). + + This test assumes that httpbin.org uses a certificate signed by a well + known Certificate Authority. """ try: import urllib3.contrib.pyopenssl @@ -137,7 +137,8 @@ def test_ssl_verified_with_platform_ca_certs(self): if (urllib3.connection.ssl_wrap_socket is urllib3.contrib.pyopenssl.orig_connection_ssl_wrap_socket): # Not patched - raise SkipTest('This test needs pyopenssl support') + raise SkipTest('This test should only be run after pyopenssl ' + 'monkey patching') https_pool = HTTPSConnectionPool('httpbin.org', 443, cert_reqs=ssl.CERT_REQUIRED) From 3361f884456699c67bf4eef2eeafb14a071cc509 Mon Sep 17 00:00:00 2001 From: Sascha Peilicke Date: Thu, 20 Mar 2014 15:41:39 +0100 Subject: [PATCH 28/42] Don't install dummyserver into site-packages It is pure example code with self-signed certificates. It pollutes the global site-packages spaces and is potentially dangerous (if used accidentally). It doesn't make sense to install it as 'package_data' or 'date_files' either. Since it servers as an example it should be part of the source distribution. --- MANIFEST.in | 1 + setup.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/MANIFEST.in b/MANIFEST.in index d1abae25fa..3f344d181d 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1,2 @@ include README.rst CHANGES.rst LICENSE.txt CONTRIBUTORS.txt test-requirements.txt +recursive-include dummyserver *.* diff --git a/setup.py b/setup.py index 392b885d74..84e6d5ff77 100644 --- a/setup.py +++ b/setup.py @@ -44,7 +44,7 @@ author_email='andrey.petrov@shazow.net', url='http://urllib3.readthedocs.org/', license='MIT', - packages=['urllib3', 'dummyserver', + packages=['urllib3', 'urllib3.packages', 'urllib3.packages.ssl_match_hostname', 'urllib3.contrib', ], From c7e114d8f5c3aefb9fb83209cb699f7ed8cfa028 Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Thu, 20 Mar 2014 11:25:16 -0700 Subject: [PATCH 29/42] Added #362 to CHANGES. --- CHANGES.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 1f6dab4d46..28b26181de 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -6,6 +6,9 @@ dev (master) * Fix AppEngine bug of HTTPS requests going out as HTTP. (Issue #356) +* Don't install `dummyserver` into `site-packages` as it's only needed + for the test suite. (Issue #362) + * ... [Short description of non-trivial change.] (Issue #) From 2e677321d6411c23ae60817a7935978762462c74 Mon Sep 17 00:00:00 2001 From: Josh Schneier Date: Tue, 1 Apr 2014 21:58:46 -0400 Subject: [PATCH 30/42] make it more clear why we are closing connections --- urllib3/util/connection.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/urllib3/util/connection.py b/urllib3/util/connection.py index a5f62fed22..568e538ec2 100644 --- a/urllib3/util/connection.py +++ b/urllib3/util/connection.py @@ -19,7 +19,10 @@ def is_connection_dropped(conn): # Platform-specific let the platform handle connection recycling transparently for us. """ sock = getattr(conn, 'sock', False) - if not sock: # Platform-specific: AppEngine + if sock is False: # Platform-specific: AppEngine + return False + + if sock is None: # Connection already closed by e.g. httplib. return False if not poll: From 98c6fbfc27d7d51327ad85a85a80dd4fe096cc79 Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Tue, 1 Apr 2014 19:42:53 -0700 Subject: [PATCH 31/42] Comment/Formatting. --- urllib3/util/connection.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/urllib3/util/connection.py b/urllib3/util/connection.py index 568e538ec2..8deeab5cc0 100644 --- a/urllib3/util/connection.py +++ b/urllib3/util/connection.py @@ -19,14 +19,13 @@ def is_connection_dropped(conn): # Platform-specific let the platform handle connection recycling transparently for us. """ sock = getattr(conn, 'sock', False) - if sock is False: # Platform-specific: AppEngine + if sock is False: # Platform-specific: AppEngine return False - - if sock is None: # Connection already closed by e.g. httplib. + if sock is None: # Connection already closed (such as by httplib). return False if not poll: - if not select: # Platform-specific: AppEngine + if not select: # Platform-specific: AppEngine return False try: From 85ace14c6e01fdbe4215c2ce3bb5228316dec0c3 Mon Sep 17 00:00:00 2001 From: grun Date: Sat, 12 Apr 2014 13:14:06 -0700 Subject: [PATCH 32/42] Add source_address to HTTP[S]Connection and HTTP[S]ConnectionPool to specify the address to bind to when making requests. source_address can be a string, like '127.0.0.1', or a (host, port) 2-tuple like ('127.0.0.1', 1337). --- dummyserver/handlers.py | 4 ++ test/__init__.py | 36 +++++++++++++++- test/with_dummyserver/test_connectionpool.py | 38 ++++++++++++++--- test/with_dummyserver/test_https.py | 45 +++++++++++++++++--- urllib3/connection.py | 41 ++++++++---------- urllib3/connectionpool.py | 9 +++- 6 files changed, 133 insertions(+), 40 deletions(-) diff --git a/dummyserver/handlers.py b/dummyserver/handlers.py index bc51f3146c..5d6e2e6b97 100644 --- a/dummyserver/handlers.py +++ b/dummyserver/handlers.py @@ -70,6 +70,10 @@ def index(self, _request): "Render simple message" return Response("Dummy server!") + def source_address(self, request): + """Return the requester's IP address.""" + return Response(request.remote_ip) + def set_up(self, request): test_type = request.params.get('test_type') test_id = request.params.get('test_id') diff --git a/test/__init__.py b/test/__init__.py index a9d950dcee..3f90840a88 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -1,3 +1,4 @@ +import sys import errno import functools import socket @@ -8,7 +9,40 @@ from urllib3.packages import six -def onlyPY3(test): +# We need a host that will not immediately close the connection with a TCP +# Reset. SO suggests this hostname +TARPIT_HOST = '10.255.255.1' + +VALID_SOURCE_ADDRESSES = ['::1', ('::1', 0), '127.0.0.1', ('127.0.0.1', 0)] +# RFC 5737: 192.0.2.0/24 is for testing only. +# RFC 3849: 2001:db8::/32 is for documentation only. +INVALID_SOURCE_ADDRESSES = [ + '192.0.2.255', ('192.0.2.255', 0), '2001:db8::1', ('2001:db8::1', 0)] + + +def onlyPy26OrEarlier(test): + """Skips this test unless you are on Python2.6.x or earlier.""" + + @functools.wraps(test) + def wrapper(*args, **kwargs): + msg = "{name} requires Python2.7.x+ to run".format(name=test.__name__) + if sys.version_info > (2, 6): + raise SkipTest(msg) + return test(*args, **kwargs) + return wrapper + +def onlyPy27OrLater(test): + """Skips this test unless you are on Python2.7.x or later.""" + + @functools.wraps(test) + def wrapper(*args, **kwargs): + msg = "{name} requires Python2.7.x+ to run".format(name=test.__name__) + if sys.version_info < (2, 7): + raise SkipTest(msg) + return test(*args, **kwargs) + return wrapper + +def onlyPy3(test): """Skips this test unless you are on Python3.x""" @functools.wraps(test) diff --git a/test/with_dummyserver/test_connectionpool.py b/test/with_dummyserver/test_connectionpool.py index eb77de24e0..3cf9434daa 100644 --- a/test/with_dummyserver/test_connectionpool.py +++ b/test/with_dummyserver/test_connectionpool.py @@ -10,7 +10,9 @@ except: from urllib import urlencode -from test import requires_network, onlyPY3 +from test import ( + onlyPy3, onlyPy27OrLater, onlyPy26OrEarlier, requires_network, TARPIT_HOST, + VALID_SOURCE_ADDRESSES, INVALID_SOURCE_ADDRESSES) from urllib3 import ( encode_multipart_formdata, HTTPConnectionPool, @@ -23,7 +25,7 @@ MaxRetryError, ReadTimeoutError, ) -from urllib3.packages.six import u +from urllib3.packages.six import b, u, string_types from urllib3 import util import tornado @@ -35,10 +37,6 @@ log.setLevel(logging.NOTSET) log.addHandler(logging.StreamHandler(sys.stdout)) -# We need a host that will not immediately close the connection with a TCP -# Reset. SO suggests this hostname -TARPIT_HOST = '10.255.255.1' - class TestConnectionPool(HTTPDummyServerTestCase): @@ -536,7 +534,33 @@ def test_dns_error(self): pool = HTTPConnectionPool('thishostdoesnotexist.invalid', self.port, timeout=0.001) self.assertRaises(MaxRetryError, pool.request, 'GET', '/test', retries=2) - @onlyPY3 + @onlyPy26OrEarlier + def test_source_address_ignored(self): + # source_address is ignored in Python 2.6 and earlier. + for addr in INVALID_SOURCE_ADDRESSES: + pool = HTTPConnectionPool( + self.host, self.port, source_address=addr) + r = pool.request('GET', '/source_address') + assert r.status == 200 + + @onlyPy27OrLater + def test_source_address(self): + for addr in VALID_SOURCE_ADDRESSES: + pool = HTTPConnectionPool( + self.host, self.port, source_address=addr) + r = pool.request('GET', '/source_address') + addr_bytes = b(addr if isinstance(addr, string_types) else addr[0]) + assert r.data == addr_bytes + + @onlyPy27OrLater + def test_source_address_error(self): + for addr in INVALID_SOURCE_ADDRESSES: + pool = HTTPConnectionPool( + self.host, self.port, source_address=addr) + self.assertRaises( + MaxRetryError, pool.request, 'GET', '/source_address') + + @onlyPy3 def test_httplib_headers_case_insensitive(self): HEADERS = {'Content-Length': '0', 'Content-type': 'text/plain', 'Server': 'TornadoServer/%s' % tornado.version} diff --git a/test/with_dummyserver/test_https.py b/test/with_dummyserver/test_https.py index 18b17ca601..9c053c5ac4 100644 --- a/test/with_dummyserver/test_https.py +++ b/test/with_dummyserver/test_https.py @@ -9,15 +9,18 @@ from dummyserver.testcase import HTTPSDummyServerTestCase from dummyserver.server import DEFAULT_CA, DEFAULT_CA_BAD, DEFAULT_CERTS -from test import requires_network - +from test import ( + onlyPy3, onlyPy27OrLater, onlyPy26OrEarlier, requires_network, TARPIT_HOST, + VALID_SOURCE_ADDRESSES, INVALID_SOURCE_ADDRESSES) from urllib3 import HTTPSConnectionPool +from urllib3.packages.six import b, string_types import urllib3.connection from urllib3.connection import ( VerifiedHTTPSConnection, UnverifiedHTTPSConnection, ) -from urllib3.exceptions import SSLError, ConnectTimeoutError, ReadTimeoutError +from urllib3.exceptions import ( + SSLError, MaxRetryError, ReadTimeoutError, ConnectTimeoutError) from urllib3.util import Timeout @@ -25,9 +28,6 @@ log.setLevel(logging.NOTSET) log.addHandler(logging.StreamHandler(sys.stdout)) -# We need a host that will not immediately close the connection with a TCP -# Reset. SO suggests this hostname -TARPIT_HOST = '10.255.255.1' class TestHTTPS(HTTPSDummyServerTestCase): def setUp(self): @@ -268,7 +268,6 @@ def test_tunnel_old_python(self): del conn._tunnel_host self._pool._make_request(conn, 'GET', '/') - @requires_network def test_enhanced_timeout(self): def new_pool(timeout, cert_reqs='CERT_REQUIRED'): @@ -303,6 +302,38 @@ def test_enhanced_ssl_connection(self): '7A:F2:8A:D7:1E:07:33:67:DE' https_pool._make_request(conn, 'GET', '/') + @onlyPy26OrEarlier + def test_source_address_ignored(self): + # source_address is ignored in Python 2.6 and earlier. + for addr in INVALID_SOURCE_ADDRESSES: + https_pool = HTTPSConnectionPool( + self.host, self.port, cert_reqs='CERT_REQUIRED', + source_address=addr) + https_pool.ca_certs = DEFAULT_CA + r = https_pool.request('GET', '/source_address') + assert r.status == 200 + + @onlyPy27OrLater + def test_source_address(self): + for addr in VALID_SOURCE_ADDRESSES: + https_pool = HTTPSConnectionPool( + self.host, self.port, cert_reqs='CERT_REQUIRED', + source_address=addr) + https_pool.ca_certs = DEFAULT_CA + r = https_pool.request('GET', '/source_address') + addr_bytes = b(addr if isinstance(addr, string_types) else addr[0]) + assert r.data == addr_bytes + + @onlyPy27OrLater + def test_source_address_error(self): + for addr in INVALID_SOURCE_ADDRESSES: + https_pool = HTTPSConnectionPool( + self.host, self.port, cert_reqs='CERT_REQUIRED', + source_address=addr) + https_pool.ca_certs = DEFAULT_CA + self.assertRaises( + MaxRetryError, https_pool.request, 'GET', '/source_address') + class TestHTTPS_TLSv1(HTTPSDummyServerTestCase): certs = DEFAULT_CERTS.copy() diff --git a/urllib3/connection.py b/urllib3/connection.py index 57e6f5fbc7..5c528e869b 100644 --- a/urllib3/connection.py +++ b/urllib3/connection.py @@ -66,29 +66,26 @@ class HTTPConnection(_HTTPConnection, object): tcp_nodelay = 1 def __init__(self, *args, **kw): - if six.PY3: # Python 3 + if six.PY3: # Python 3. kw.pop('strict', None) - - if sys.version_info < (2, 7): # Python 2.6 and earlier + if sys.version_info < (2, 7): # Python 2.6 and earlier. kw.pop('source_address', None) - self.source_address = None - _HTTPConnection.__init__(self, *args, **kw) + if isinstance(kw.get('source_address'), six.string_types): + kw['source_address'] = (kw['source_address'], 0) + self.source_address = kw.get('source_address') # Python 2.6 and earlier. + + # _HTTPConnection.__init__() sets self.source_address in Python 2.7+. + _HTTPConnection.__init__(self, *args, **kw) def _new_conn(self): """ Establish a socket connection and set nodelay settings on it :return: a new socket connection """ - extra_args = [] - if self.source_address: # Python 2.7+ - extra_args.append(self.source_address) - + args = [] if self.source_address is None else [self.source_address] conn = socket.create_connection( - (self.host, self.port), - self.timeout, - *extra_args - ) + (self.host, self.port), self.timeout, *args) conn.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, self.tcp_nodelay) return conn @@ -108,13 +105,10 @@ class HTTPSConnection(HTTPConnection): default_port = port_by_scheme['https'] def __init__(self, host, port=None, key_file=None, cert_file=None, - strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, - source_address=None): + strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, **kw): - HTTPConnection.__init__(self, host, port, - strict=strict, - timeout=timeout, - source_address=None) + HTTPConnection.__init__(self, host, port, strict=strict, + timeout=timeout, **kw) self.key_file = key_file self.cert_file = cert_file @@ -149,12 +143,13 @@ def set_cert(self, key_file=None, cert_file=None, def connect(self): # Add certificate verification + conn_kw = dict(self.conn_kw) + if isinstance(conn_kw.get('source_address'), six.string_types): + conn_kw['source_address'] = (conn_kw['source_address'], 0) + try: sock = socket.create_connection( - address=(self.host, self.port), - timeout=self.timeout, - **self.conn_kw - ) + address=(self.host, self.port), timeout=self.timeout, **conn_kw) except SocketTimeout: raise ConnectTimeoutError( self, "Connection to %s timed out. (connect timeout=%s)" % diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index aa9daca67c..f7f517ed28 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -4,6 +4,7 @@ # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php +import sys import errno import logging @@ -136,8 +137,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): def __init__(self, host, port=None, strict=False, timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False, - headers=None, _proxy=None, _proxy_headers=None, - **conn_kw): + headers=None, _proxy=None, _proxy_headers=None, **conn_kw): ConnectionPool.__init__(self, host, port) RequestMethods.__init__(self, headers) @@ -164,6 +164,8 @@ def __init__(self, host, port=None, strict=False, self.num_connections = 0 self.num_requests = 0 + if sys.version_info < (2, 7): # Python 2.6 and earlier. + conn_kw.pop('source_address', None) self.conn_kw = conn_kw def _new_conn(self): @@ -600,6 +602,9 @@ def __init__(self, host, port=None, assert_hostname=None, assert_fingerprint=None, **conn_kw): + if sys.version_info < (2, 7): # Python 2.6 and earlier. + conn_kw.pop('source_address', None) + HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize, block, headers, _proxy, _proxy_headers, **conn_kw) self.key_file = key_file From 2fc63242bee7e4ab3bc220a69c5de4d1e9edbffd Mon Sep 17 00:00:00 2001 From: grun Date: Sat, 12 Apr 2014 13:28:27 -0700 Subject: [PATCH 33/42] Comments. --- urllib3/connection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/urllib3/connection.py b/urllib3/connection.py index 5c528e869b..c9daa5470f 100644 --- a/urllib3/connection.py +++ b/urllib3/connection.py @@ -73,7 +73,7 @@ def __init__(self, *args, **kw): if isinstance(kw.get('source_address'), six.string_types): kw['source_address'] = (kw['source_address'], 0) - self.source_address = kw.get('source_address') # Python 2.6 and earlier. + self.source_address = kw.get('source_address') # For Py2.6 and earlier. # _HTTPConnection.__init__() sets self.source_address in Python 2.7+. _HTTPConnection.__init__(self, *args, **kw) From 8bb7404fe53584b1084db319a7f70f2191367dbc Mon Sep 17 00:00:00 2001 From: grun Date: Sat, 12 Apr 2014 16:27:26 -0700 Subject: [PATCH 34/42] Add a shorthand python version comment to exclude_lines: '# Py\d.*'. --- .coveragerc | 1 + 1 file changed, 1 insertion(+) diff --git a/.coveragerc b/.coveragerc index 693b97e2bf..a9b4aa2de4 100644 --- a/.coveragerc +++ b/.coveragerc @@ -5,6 +5,7 @@ omit = urllib3/packages/* exclude_lines = .* # Platform-specific.* except ImportError: + .*:.* # Py\d.* .*:.* # Python \d.* pass .* # Abstract From 6a0a49e531330261b41d81f3f5392e5137951d24 Mon Sep 17 00:00:00 2001 From: grun Date: Sat, 12 Apr 2014 16:28:16 -0700 Subject: [PATCH 35/42] Add python version comments to exclude lines from Python 2.6 tests. --- urllib3/connection.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/urllib3/connection.py b/urllib3/connection.py index c9daa5470f..e62e0b9f18 100644 --- a/urllib3/connection.py +++ b/urllib3/connection.py @@ -71,9 +71,9 @@ def __init__(self, *args, **kw): if sys.version_info < (2, 7): # Python 2.6 and earlier. kw.pop('source_address', None) - if isinstance(kw.get('source_address'), six.string_types): + if isinstance(kw.get('source_address'), six.string_types): # Py2.7+. kw['source_address'] = (kw['source_address'], 0) - self.source_address = kw.get('source_address') # For Py2.6 and earlier. + self.source_address = kw.get('source_address') # For Py2.6 and earlier. # _HTTPConnection.__init__() sets self.source_address in Python 2.7+. _HTTPConnection.__init__(self, *args, **kw) @@ -143,13 +143,12 @@ def set_cert(self, key_file=None, cert_file=None, def connect(self): # Add certificate verification - conn_kw = dict(self.conn_kw) - if isinstance(conn_kw.get('source_address'), six.string_types): - conn_kw['source_address'] = (conn_kw['source_address'], 0) - + kw = dict(self.conn_kw) + if isinstance(kw.get('source_address'), six.string_types): # Py2.7+. + kw['source_address'] = (kw['source_address'], 0) try: sock = socket.create_connection( - address=(self.host, self.port), timeout=self.timeout, **conn_kw) + address=(self.host, self.port), timeout=self.timeout, **kw) except SocketTimeout: raise ConnectTimeoutError( self, "Connection to %s timed out. (connect timeout=%s)" % From 9b8ee5ac22f70744ea6fb9215a0696c78d4912a5 Mon Sep 17 00:00:00 2001 From: grun Date: Sun, 13 Apr 2014 12:45:22 -0700 Subject: [PATCH 36/42] Remove support for string only source_address values. Now only (host, port) 2-tuples are supported. Remove trailing periods from Python version comments. --- .coveragerc | 1 - test/__init__.py | 5 ++--- test/with_dummyserver/test_connectionpool.py | 5 ++--- test/with_dummyserver/test_https.py | 3 +-- urllib3/connection.py | 12 ++++-------- urllib3/connectionpool.py | 4 ++-- 6 files changed, 11 insertions(+), 19 deletions(-) diff --git a/.coveragerc b/.coveragerc index a9b4aa2de4..693b97e2bf 100644 --- a/.coveragerc +++ b/.coveragerc @@ -5,7 +5,6 @@ omit = urllib3/packages/* exclude_lines = .* # Platform-specific.* except ImportError: - .*:.* # Py\d.* .*:.* # Python \d.* pass .* # Abstract diff --git a/test/__init__.py b/test/__init__.py index 3f90840a88..2721f4ec19 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -13,11 +13,10 @@ # Reset. SO suggests this hostname TARPIT_HOST = '10.255.255.1' -VALID_SOURCE_ADDRESSES = ['::1', ('::1', 0), '127.0.0.1', ('127.0.0.1', 0)] +VALID_SOURCE_ADDRESSES = [('::1', 0), ('127.0.0.1', 0)] # RFC 5737: 192.0.2.0/24 is for testing only. # RFC 3849: 2001:db8::/32 is for documentation only. -INVALID_SOURCE_ADDRESSES = [ - '192.0.2.255', ('192.0.2.255', 0), '2001:db8::1', ('2001:db8::1', 0)] +INVALID_SOURCE_ADDRESSES = [('192.0.2.255', 0), ('2001:db8::1', 0)] def onlyPy26OrEarlier(test): diff --git a/test/with_dummyserver/test_connectionpool.py b/test/with_dummyserver/test_connectionpool.py index 3cf9434daa..189a5c5393 100644 --- a/test/with_dummyserver/test_connectionpool.py +++ b/test/with_dummyserver/test_connectionpool.py @@ -536,7 +536,7 @@ def test_dns_error(self): @onlyPy26OrEarlier def test_source_address_ignored(self): - # source_address is ignored in Python 2.6 and earlier. + # source_address is ignored in Python 2.6 and older. for addr in INVALID_SOURCE_ADDRESSES: pool = HTTPConnectionPool( self.host, self.port, source_address=addr) @@ -549,8 +549,7 @@ def test_source_address(self): pool = HTTPConnectionPool( self.host, self.port, source_address=addr) r = pool.request('GET', '/source_address') - addr_bytes = b(addr if isinstance(addr, string_types) else addr[0]) - assert r.data == addr_bytes + assert r.data == b(addr[0]) @onlyPy27OrLater def test_source_address_error(self): diff --git a/test/with_dummyserver/test_https.py b/test/with_dummyserver/test_https.py index 9c053c5ac4..f32ff503f1 100644 --- a/test/with_dummyserver/test_https.py +++ b/test/with_dummyserver/test_https.py @@ -321,8 +321,7 @@ def test_source_address(self): source_address=addr) https_pool.ca_certs = DEFAULT_CA r = https_pool.request('GET', '/source_address') - addr_bytes = b(addr if isinstance(addr, string_types) else addr[0]) - assert r.data == addr_bytes + assert r.data == b(addr[0]) @onlyPy27OrLater def test_source_address_error(self): diff --git a/urllib3/connection.py b/urllib3/connection.py index e62e0b9f18..0f7af788e1 100644 --- a/urllib3/connection.py +++ b/urllib3/connection.py @@ -66,13 +66,11 @@ class HTTPConnection(_HTTPConnection, object): tcp_nodelay = 1 def __init__(self, *args, **kw): - if six.PY3: # Python 3. + if six.PY3: # Python 3 kw.pop('strict', None) - if sys.version_info < (2, 7): # Python 2.6 and earlier. + if sys.version_info < (2, 7): # Python 2.6 and earlier kw.pop('source_address', None) - if isinstance(kw.get('source_address'), six.string_types): # Py2.7+. - kw['source_address'] = (kw['source_address'], 0) self.source_address = kw.get('source_address') # For Py2.6 and earlier. # _HTTPConnection.__init__() sets self.source_address in Python 2.7+. @@ -143,12 +141,10 @@ def set_cert(self, key_file=None, cert_file=None, def connect(self): # Add certificate verification - kw = dict(self.conn_kw) - if isinstance(kw.get('source_address'), six.string_types): # Py2.7+. - kw['source_address'] = (kw['source_address'], 0) try: sock = socket.create_connection( - address=(self.host, self.port), timeout=self.timeout, **kw) + address=(self.host, self.port), timeout=self.timeout, + **self.conn_kw) except SocketTimeout: raise ConnectTimeoutError( self, "Connection to %s timed out. (connect timeout=%s)" % diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index f7f517ed28..493d937083 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -164,7 +164,7 @@ def __init__(self, host, port=None, strict=False, self.num_connections = 0 self.num_requests = 0 - if sys.version_info < (2, 7): # Python 2.6 and earlier. + if sys.version_info < (2, 7): # Python 2.6 and older conn_kw.pop('source_address', None) self.conn_kw = conn_kw @@ -602,7 +602,7 @@ def __init__(self, host, port=None, assert_hostname=None, assert_fingerprint=None, **conn_kw): - if sys.version_info < (2, 7): # Python 2.6 and earlier. + if sys.version_info < (2, 7): # Python 2.6 or older conn_kw.pop('source_address', None) HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize, From 34a943fa46c295b0418d12ec115ac047a85adc9c Mon Sep 17 00:00:00 2001 From: grun Date: Sun, 13 Apr 2014 12:56:57 -0700 Subject: [PATCH 37/42] Rename @onlyPy26OrEarlier and @onlyPy27OrLater to the more explicit @onlyPy26OrOlder and @onlyPy27OrNewer. --- test/__init__.py | 4 ++-- test/with_dummyserver/test_connectionpool.py | 8 ++++---- test/with_dummyserver/test_https.py | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/test/__init__.py b/test/__init__.py index 2721f4ec19..a0d7c753b4 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -19,7 +19,7 @@ INVALID_SOURCE_ADDRESSES = [('192.0.2.255', 0), ('2001:db8::1', 0)] -def onlyPy26OrEarlier(test): +def onlyPy26OrOlder(test): """Skips this test unless you are on Python2.6.x or earlier.""" @functools.wraps(test) @@ -30,7 +30,7 @@ def wrapper(*args, **kwargs): return test(*args, **kwargs) return wrapper -def onlyPy27OrLater(test): +def onlyPy27OrNewer(test): """Skips this test unless you are on Python2.7.x or later.""" @functools.wraps(test) diff --git a/test/with_dummyserver/test_connectionpool.py b/test/with_dummyserver/test_connectionpool.py index 189a5c5393..9ab0920f50 100644 --- a/test/with_dummyserver/test_connectionpool.py +++ b/test/with_dummyserver/test_connectionpool.py @@ -11,7 +11,7 @@ from urllib import urlencode from test import ( - onlyPy3, onlyPy27OrLater, onlyPy26OrEarlier, requires_network, TARPIT_HOST, + onlyPy3, onlyPy27OrNewer, onlyPy26OrOlder, requires_network, TARPIT_HOST, VALID_SOURCE_ADDRESSES, INVALID_SOURCE_ADDRESSES) from urllib3 import ( encode_multipart_formdata, @@ -534,7 +534,7 @@ def test_dns_error(self): pool = HTTPConnectionPool('thishostdoesnotexist.invalid', self.port, timeout=0.001) self.assertRaises(MaxRetryError, pool.request, 'GET', '/test', retries=2) - @onlyPy26OrEarlier + @onlyPy26OrOlder def test_source_address_ignored(self): # source_address is ignored in Python 2.6 and older. for addr in INVALID_SOURCE_ADDRESSES: @@ -543,7 +543,7 @@ def test_source_address_ignored(self): r = pool.request('GET', '/source_address') assert r.status == 200 - @onlyPy27OrLater + @onlyPy27OrNewer def test_source_address(self): for addr in VALID_SOURCE_ADDRESSES: pool = HTTPConnectionPool( @@ -551,7 +551,7 @@ def test_source_address(self): r = pool.request('GET', '/source_address') assert r.data == b(addr[0]) - @onlyPy27OrLater + @onlyPy27OrNewer def test_source_address_error(self): for addr in INVALID_SOURCE_ADDRESSES: pool = HTTPConnectionPool( diff --git a/test/with_dummyserver/test_https.py b/test/with_dummyserver/test_https.py index f32ff503f1..90fcc6330e 100644 --- a/test/with_dummyserver/test_https.py +++ b/test/with_dummyserver/test_https.py @@ -10,7 +10,7 @@ from dummyserver.server import DEFAULT_CA, DEFAULT_CA_BAD, DEFAULT_CERTS from test import ( - onlyPy3, onlyPy27OrLater, onlyPy26OrEarlier, requires_network, TARPIT_HOST, + onlyPy3, onlyPy27OrNewer, onlyPy26OrOlder, requires_network, TARPIT_HOST, VALID_SOURCE_ADDRESSES, INVALID_SOURCE_ADDRESSES) from urllib3 import HTTPSConnectionPool from urllib3.packages.six import b, string_types @@ -302,7 +302,7 @@ def test_enhanced_ssl_connection(self): '7A:F2:8A:D7:1E:07:33:67:DE' https_pool._make_request(conn, 'GET', '/') - @onlyPy26OrEarlier + @onlyPy26OrOlder def test_source_address_ignored(self): # source_address is ignored in Python 2.6 and earlier. for addr in INVALID_SOURCE_ADDRESSES: @@ -313,7 +313,7 @@ def test_source_address_ignored(self): r = https_pool.request('GET', '/source_address') assert r.status == 200 - @onlyPy27OrLater + @onlyPy27OrNewer def test_source_address(self): for addr in VALID_SOURCE_ADDRESSES: https_pool = HTTPSConnectionPool( @@ -323,7 +323,7 @@ def test_source_address(self): r = https_pool.request('GET', '/source_address') assert r.data == b(addr[0]) - @onlyPy27OrLater + @onlyPy27OrNewer def test_source_address_error(self): for addr in INVALID_SOURCE_ADDRESSES: https_pool = HTTPSConnectionPool( From 6f2f9456255afbbf74c7e9a2b59b25ba204d0bb4 Mon Sep 17 00:00:00 2001 From: grun Date: Sun, 13 Apr 2014 13:00:52 -0700 Subject: [PATCH 38/42] Comments. --- urllib3/connection.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/urllib3/connection.py b/urllib3/connection.py index 0f7af788e1..cc372c5a27 100644 --- a/urllib3/connection.py +++ b/urllib3/connection.py @@ -68,10 +68,10 @@ class HTTPConnection(_HTTPConnection, object): def __init__(self, *args, **kw): if six.PY3: # Python 3 kw.pop('strict', None) - if sys.version_info < (2, 7): # Python 2.6 and earlier + if sys.version_info < (2, 7): # Python 2.6 and older kw.pop('source_address', None) - self.source_address = kw.get('source_address') # For Py2.6 and earlier. + self.source_address = kw.get('source_address') # For Py2.6 and older. # _HTTPConnection.__init__() sets self.source_address in Python 2.7+. _HTTPConnection.__init__(self, *args, **kw) From 7a6e4e89db5b7ec0f61e59980fb70feb48720eed Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Mon, 14 Apr 2014 17:30:30 -0700 Subject: [PATCH 39/42] CHANGES mention #352 --- CHANGES.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 28b26181de..d064da39ac 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -6,9 +6,11 @@ dev (master) * Fix AppEngine bug of HTTPS requests going out as HTTP. (Issue #356) -* Don't install `dummyserver` into `site-packages` as it's only needed +* Don't install ``dummyserver`` into ``site-packages`` as it's only needed for the test suite. (Issue #362) +* Added support for specifying ``source_address``. (Issue #352) + * ... [Short description of non-trivial change.] (Issue #) From f78e676ae60596e61879a99864cf4469339a3fae Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Mon, 14 Apr 2014 17:40:59 -0700 Subject: [PATCH 40/42] Reformatting. --- urllib3/connection.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/urllib3/connection.py b/urllib3/connection.py index 4c8b98ef2c..e6d38e66eb 100644 --- a/urllib3/connection.py +++ b/urllib3/connection.py @@ -71,21 +71,26 @@ def __init__(self, *args, **kw): if sys.version_info < (2, 7): # Python 2.6 and older kw.pop('source_address', None) - self.source_address = kw.get('source_address') # For Py2.6 and older. + # Pre-set source_address in case we have an older Python like 2.6. + self.source_address = kw.get('source_address') - # _HTTPConnection.__init__() sets self.source_address in Python 2.7+. + # Superclass also sets self.source_address in Python 2.7+. _HTTPConnection.__init__(self, *args, **kw) def _new_conn(self): - """ Establish a socket connection and set nodelay settings on it + """ Establish a socket connection and set nodelay settings on it. :return: a new socket connection """ - args = [] if self.source_address is None else [self.source_address] + extra_args = [] + if self.source_address: + extra_args.append(self.source_address) + conn = socket.create_connection( - (self.host, self.port), self.timeout, *args) - conn.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, - self.tcp_nodelay) + (self.host, self.port), self.timeout, *extra_args) + conn.setsockopt( + socket.IPPROTO_TCP, socket.TCP_NODELAY, self.tcp_nodelay) + return conn def _prepare_conn(self, conn): From 8946ee30901e20e6fd1567fed149b1aad6917c36 Mon Sep 17 00:00:00 2001 From: grun Date: Wed, 16 Apr 2014 00:58:27 -0700 Subject: [PATCH 41/42] Add Arthur Grunseid to contributors. --- CONTRIBUTORS.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index e790d81095..e6178f17dc 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -114,5 +114,8 @@ In chronological order: * Tahia Khan * Added Timeout examples in docs +* Arthur Grunseid + * source_address support and tests (with https://github.com/bui) + * [Your name or handle] <[email or website]> * [Brief summary of your changes] From f126fd5a0558645de652cd32fd5eb89ae913c395 Mon Sep 17 00:00:00 2001 From: grun Date: Wed, 16 Apr 2014 01:08:47 -0700 Subject: [PATCH 42/42] Add Python 2.7+ version comment to achieve 100% Python 2.6 test coverage. --- urllib3/connection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/urllib3/connection.py b/urllib3/connection.py index e6d38e66eb..de7b925ea9 100644 --- a/urllib3/connection.py +++ b/urllib3/connection.py @@ -83,7 +83,7 @@ def _new_conn(self): :return: a new socket connection """ extra_args = [] - if self.source_address: + if self.source_address: # Python 2.7+ extra_args.append(self.source_address) conn = socket.create_connection(