diff --git a/.coveragerc b/.coveragerc index e8e30c193e..1fafb7aa0f 100644 --- a/.coveragerc +++ b/.coveragerc @@ -5,5 +5,6 @@ omit = urllib3/packages/* exclude_lines = .* # Platform-specific.* except ImportError: - try:.* # Python 3 + .*:.* # Python 3 pass + .* # Abstract diff --git a/CHANGES.rst b/CHANGES.rst index d998db8964..5afdd62897 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,24 @@ Changes ======= +1.3 (2012-03-25) +++++++++++++++++ + +* Removed pre-1.0 deprecated API. + +* Refactored helpers into a ``urllib3.util`` submodule. + +* Fixed multipart encoding to support list-of-tuples for keys with multiple + values. (Issue #48) + +* Fixed multiple Set-Cookie headers in response not getting merged properly in + Python 3. (Issue #53) + +* AppEngine support with Py27. (Issue #61) + +* Minor ``encode_multipart_formdata`` fixes related to Python 3 strings vs + bytes. + 1.2.2 (2012-02-06) ++++++++++++++++++ diff --git a/dummyserver/server.py b/dummyserver/server.py index 529850f480..6c0943c062 100755 --- a/dummyserver/server.py +++ b/dummyserver/server.py @@ -83,7 +83,7 @@ def _start_server(self): else: http_server = tornado.httpserver.HTTPServer(container) - http_server.listen(self.port) + http_server.listen(self.port, address=self.host) return http_server def run(self): @@ -106,7 +106,7 @@ def stop(self): if len(sys.argv) > 1: url = sys.argv[1] - print("Starting WGI server at: %s" % url) + print("Starting WSGI server at: %s" % url) scheme, host, port = get_host(url) t = TornadoServerThread(scheme=scheme, host=host, port=port) diff --git a/test/test_collections.py b/test/test_collections.py index f8275e04ab..6cb5aca283 100644 --- a/test/test_collections.py +++ b/test/test_collections.py @@ -4,6 +4,7 @@ from urllib3.packages import six xrange = six.moves.xrange + class TestLRUContainer(unittest.TestCase): def test_maxsize(self): d = Container(5) diff --git a/test/test_connectionpool.py b/test/test_connectionpool.py index 4281d4262d..c32c6dcec0 100644 --- a/test/test_connectionpool.py +++ b/test/test_connectionpool.py @@ -1,11 +1,7 @@ import unittest -from urllib3.connectionpool import ( - connection_from_url, - get_host, - HTTPConnectionPool, - make_headers) - +from urllib3.connectionpool import connection_from_url, HTTPConnectionPool +from urllib3.util import get_host, make_headers from urllib3.exceptions import EmptyPoolError, LocationParseError @@ -105,7 +101,7 @@ def test_max_connections(self): pass try: - pool.get_url('/', pool_timeout=0.01) + pool.request('GET', '/', pool_timeout=0.01) self.fail("Managed to get a connection without EmptyPoolError") except EmptyPoolError: pass diff --git a/test/test_filepost.py b/test/test_filepost.py new file mode 100644 index 0000000000..c2517782bd --- /dev/null +++ b/test/test_filepost.py @@ -0,0 +1,85 @@ +import unittest + +from urllib3.filepost import encode_multipart_formdata, iter_fields +from urllib3.packages.six import b, u + + +BOUNDARY = '!! test boundary !!' + + +class TestIterfields(unittest.TestCase): + + def test_dict(self): + for fieldname, value in iter_fields(dict(a='b')): + self.assertEqual((fieldname, value), ('a', 'b')) + + self.assertEqual( + list(sorted(iter_fields(dict(a='b', c='d')))), + [('a', 'b'), ('c', 'd')]) + + def test_tuple_list(self): + for fieldname, value in iter_fields([('a', 'b')]): + self.assertEqual((fieldname, value), ('a', 'b')) + + self.assertEqual( + list(iter_fields([('a', 'b'), ('c', 'd')])), + [('a', 'b'), ('c', 'd')]) + + +class TestMultipartEncoding(unittest.TestCase): + + def test_input_datastructures(self): + fieldsets = [ + dict(k='v', k2='v2'), + [('k', 'v'), ('k2', 'v2')], + ] + + for fields in fieldsets: + encoded, _ = encode_multipart_formdata(fields, boundary=BOUNDARY) + self.assertEqual(encoded.count(b(BOUNDARY)), 3) + + + def test_field_encoding(self): + fieldsets = [ + [('k', 'v'), ('k2', 'v2')], + [('k', b'v'), (u('k2'), b'v2')], + [('k', b'v'), (u('k2'), 'v2')], + ] + + for fields in fieldsets: + encoded, content_type = encode_multipart_formdata(fields, boundary=BOUNDARY) + + self.assertEqual(encoded, + b'--' + b(BOUNDARY) + b'\r\n' + b'Content-Disposition: form-data; name="k"\r\n' + b'Content-Type: text/plain\r\n' + b'\r\n' + b'v\r\n' + b'--' + b(BOUNDARY) + b'\r\n' + b'Content-Disposition: form-data; name="k2"\r\n' + b'Content-Type: text/plain\r\n' + b'\r\n' + b'v2\r\n' + b'--' + b(BOUNDARY) + b'--\r\n' + , fields) + + self.assertEqual(content_type, + b'multipart/form-data; boundary=' + b(BOUNDARY)) + + + def test_filename(self): + fields = [('k', ('somename', b'v'))] + + encoded, content_type = encode_multipart_formdata(fields, boundary=BOUNDARY) + + self.assertEqual(encoded, + b'--' + b(BOUNDARY) + b'\r\n' + b'Content-Disposition: form-data; name="k"; filename="somename"\r\n' + b'Content-Type: application/octet-stream\r\n' + b'\r\n' + b'v\r\n' + b'--' + b(BOUNDARY) + b'--\r\n' + ) + + self.assertEqual(content_type, + b'multipart/form-data; boundary=' + b(BOUNDARY)) diff --git a/test/with_dummyserver/test_connectionpool.py b/test/with_dummyserver/test_connectionpool.py index cbe69e8c28..22bf93de45 100644 --- a/test/with_dummyserver/test_connectionpool.py +++ b/test/with_dummyserver/test_connectionpool.py @@ -60,6 +60,21 @@ def test_upload(self): r = self.pool.request('POST', '/upload', fields=fields) self.assertEqual(r.status, 200, r.data) + def test_one_name_multiple_values(self): + fields = [ + ('foo', 'a'), + ('foo', 'b'), + ] + + # urlencode + r = self.pool.request('GET', '/echo', fields=fields) + self.assertEqual(r.data, b'foo=a&foo=b') + + # multipart + r = self.pool.request('POST', '/echo', fields=fields) + self.assertEqual(r.data.count(b'name="foo"'), 2) + + def test_unicode_upload(self): fieldname = u('myfile') filename = u('\xe2\x99\xa5.txt') @@ -86,13 +101,10 @@ def test_timeout(self): pass def test_redirect(self): - r = self.pool.request('GET', '/redirect', - fields={'target': '/'}, - redirect=False) + r = self.pool.request('GET', '/redirect', fields={'target': '/'}, redirect=False) self.assertEqual(r.status, 303) - r = self.pool.request('GET', '/redirect', - fields={'target': '/'}) + r = self.pool.request('GET', '/redirect', fields={'target': '/'}) self.assertEqual(r.status, 200) self.assertEqual(r.data, b'Dummy server!') @@ -169,9 +181,7 @@ def test_keepalive_close(self): def test_post_with_urlencode(self): data = {'banana': 'hammock', 'lol': 'cat'} - r = self.pool.request('POST', '/echo', - fields=data, - encode_multipart=False) + r = self.pool.request('POST', '/echo', fields=data, encode_multipart=False) self.assertEqual(r.data.decode('utf-8'), urlencode(data)) def test_post_with_multipart(self): diff --git a/test/with_dummyserver/test_socketlevel.py b/test/with_dummyserver/test_socketlevel.py index de5d24b138..46510281f0 100644 --- a/test/with_dummyserver/test_socketlevel.py +++ b/test/with_dummyserver/test_socketlevel.py @@ -1,11 +1,32 @@ from urllib3 import HTTPConnectionPool -from urllib3.poolmanager import ProxyManager, proxy_from_url +from urllib3.poolmanager import proxy_from_url from dummyserver.testcase import SocketDummyServerTestCase from threading import Event +class TestCookies(SocketDummyServerTestCase): + + def test_multi_setcookie(self): + def multicookie_response_handler(listener): + sock = listener.accept()[0] + + buf = b'' + while not buf.endswith(b'\r\n\r\n'): + buf += sock.recv(65536) + + sock.send(b'HTTP/1.1 200 OK\r\n' + b'Set-Cookie: foo=1\r\n' + b'Set-Cookie: bar=1\r\n' + b'\r\n') + + self._start_server(multicookie_response_handler) + pool = HTTPConnectionPool(self.host, self.port) + r = pool.request('GET', '/', retries=0) + self.assertEquals(r.headers, {'set-cookie': 'foo=1, bar=1'}) + + class TestSocketClosing(SocketDummyServerTestCase): def test_recovery_when_server_closes_connection(self): diff --git a/urllib3/__init__.py b/urllib3/__init__.py index 2e9c663ea5..2d6fecec4f 100644 --- a/urllib3/__init__.py +++ b/urllib3/__init__.py @@ -10,26 +10,20 @@ __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = '1.2.2' +__version__ = '1.3' from .connectionpool import ( HTTPConnectionPool, HTTPSConnectionPool, - connection_from_url, - get_host, - make_headers) - - -from .exceptions import ( - HTTPError, - MaxRetryError, - SSLError, - TimeoutError) + connection_from_url +) +from . import exceptions +from .filepost import encode_multipart_formdata from .poolmanager import PoolManager, ProxyManager, proxy_from_url from .response import HTTPResponse -from .filepost import encode_multipart_formdata +from .util import make_headers, get_host # Set default logging handler to avoid "No handler found" warnings. diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index 39e652ed44..c3cb3b125f 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -7,15 +7,8 @@ import logging import socket -from base64 import b64encode from socket import error as SocketError, timeout as SocketTimeout -try: - from select import poll, POLLIN -except ImportError: # Doesn't exist on OSX and other platforms - from select import select - poll = False - try: # Python 3 from http.client import HTTPConnection, HTTPException from http.client import HTTP_PORT, HTTPS_PORT @@ -42,17 +35,16 @@ import ssl BaseSSLError = ssl.SSLError -except ImportError: +except (ImportError, AttributeError): pass -from .packages.ssl_match_hostname import match_hostname, CertificateError from .request import RequestMethods from .response import HTTPResponse +from .util import get_host, is_connection_dropped from .exceptions import ( EmptyPoolError, HostChangedError, - LocationParseError, MaxRetryError, SSLError, TimeoutError, @@ -61,6 +53,7 @@ from .packages.ssl_match_hostname import match_hostname, CertificateError from .packages import six + xrange = six.moves.xrange log = logging.getLogger(__name__) @@ -72,6 +65,7 @@ 'https': HTTPS_PORT, } + ## Connection objects (extension of httplib) class VerifiedHTTPSConnection(HTTPSConnection): @@ -107,6 +101,7 @@ def connect(self): if self.ca_certs: match_hostname(self.sock.getpeercert(), self.host) + ## Pool objects class ConnectionPool(object): @@ -212,7 +207,7 @@ def _get_conn(self, timeout=None): conn = self.pool.get(block=self.block, timeout=timeout) # If this is a persistent connection, check if it got disconnected - if conn and conn.sock and is_connection_dropped(conn): + if conn and is_connection_dropped(conn): log.info("Resetting dropped connection: %s" % self.host) conn.close() @@ -256,9 +251,13 @@ def _make_request(self, conn, method, url, timeout=_Default, timeout = self.timeout conn.timeout = timeout # This only does anything in Py26+ - conn.request(method, url, **httplib_request_kw) - conn.sock.settimeout(timeout) + + # Set timeout + sock = getattr(conn, 'sock', False) # AppEngine doesn't have sock attr. + if sock: + sock.settimeout(timeout) + httplib_response = conn.getresponse() log.debug("\"%s %s %s\" %s %s" % @@ -295,7 +294,7 @@ def urlopen(self, method, url, body=None, headers=None, retries=3, .. note:: More commonly, it's appropriate to use a convenience method provided - by :class:`.RequestMethods`, such as :meth:`.request`. + by :class:`.RequestMethods`, such as :meth:`request`. .. note:: @@ -495,94 +494,6 @@ def _new_conn(self): return connection -## Helpers - -def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, - basic_auth=None): - """ - Shortcuts for generating request headers. - - :param keep_alive: - If ``True``, adds 'connection: keep-alive' header. - - :param accept_encoding: - Can be a boolean, list, or string. - ``True`` translates to 'gzip,deflate'. - List will get joined by comma. - String will be used as provided. - - :param user_agent: - String representing the user-agent you want, such as - "python-urllib3/0.6" - - :param basic_auth: - Colon-separated username:password string for 'authorization: basic ...' - auth header. - - Example: :: - - >>> make_headers(keep_alive=True, user_agent="Batman/1.0") - {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} - >>> make_headers(accept_encoding=True) - {'accept-encoding': 'gzip,deflate'} - """ - headers = {} - if accept_encoding: - if isinstance(accept_encoding, str): - pass - elif isinstance(accept_encoding, list): - accept_encoding = ','.join(accept_encoding) - else: - accept_encoding = 'gzip,deflate' - headers['accept-encoding'] = accept_encoding - - if user_agent: - headers['user-agent'] = user_agent - - if keep_alive: - headers['connection'] = 'keep-alive' - - if basic_auth: - headers['authorization'] = 'Basic ' + \ - b64encode(six.b(basic_auth)).decode('utf-8') - - return headers - - -def get_host(url): - """ - Given a url, return its scheme, host and port (None if it's not there). - - For example: :: - - >>> get_host('http://google.com/mail/') - ('http', 'google.com', None) - >>> get_host('google.com:80') - ('http', 'google.com', 80) - """ - - # This code is actually similar to urlparse.urlsplit, but much - # simplified for our needs. - port = None - scheme = 'http' - - if '://' in url: - scheme, url = url.split('://', 1) - if '/' in url: - url, _path = url.split('/', 1) - if '@' in url: - _auth, url = url.split('@', 1) - if ':' in url: - url, port = url.split(':', 1) - - if not port.isdigit(): - raise LocationParseError("Failed to parse: %s") - - port = int(port) - - return scheme, url, port - - def connection_from_url(url, **kw): """ Given a url, return an :class:`.ConnectionPool` instance of its host. @@ -608,22 +519,3 @@ def connection_from_url(url, **kw): return HTTPSConnectionPool(host, port=port, **kw) else: return HTTPConnectionPool(host, port=port, **kw) - - -def is_connection_dropped(conn): - """ - Returns True if the connection is dropped and should be closed. - - :param conn: - ``HTTPConnection`` object. - """ - if not poll: # Platform-specific - return select([conn.sock], [], [], 0.0)[0] - - # This version is better on platforms that support it. - p = poll() - p.register(conn.sock, POLLIN) - for (fno, ev) in p.poll(0.0): - if fno == conn.sock.fileno(): - # Either data is buffered (bad), or the connection is dropped. - return True diff --git a/urllib3/filepost.py b/urllib3/filepost.py index e1ec8af792..344a1030cc 100644 --- a/urllib3/filepost.py +++ b/urllib3/filepost.py @@ -24,15 +24,29 @@ def get_content_type(filename): return mimetypes.guess_type(filename)[0] or 'application/octet-stream' +def iter_fields(fields): + """ + Iterate over fields. + + Supports list of (k, v) tuples and dicts. + """ + if isinstance(fields, dict): + return ((k, v) for k, v in six.iteritems(fields)) + + return ((k, v) for k, v in fields) + + def encode_multipart_formdata(fields, boundary=None): """ Encode a dictionary of ``fields`` using the multipart/form-data mime format. :param fields: - Dictionary of fields. The key is treated as the field name, and the - value as the body of the form-data. If the value is a tuple of two - elements, then the first element is treated as the filename of the - form-data section. + Dictionary of fields or list of (key, value) field tuples. The key is + treated as the field name, and the value as the body of the form-data + bytes. If the value is a tuple of two elements, then the first element + is treated as the filename of the form-data section. + + Field names and filenames must be unicode. :param boundary: If not specified, then a random boundary will be generated using @@ -42,7 +56,7 @@ def encode_multipart_formdata(fields, boundary=None): if boundary is None: boundary = choose_boundary() - for fieldname, value in six.iteritems(fields): + for fieldname, value in iter_fields(fields): body.write(b('--%s\r\n' % (boundary))) if isinstance(value, tuple): diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py index d42f35bc25..310ea21d96 100644 --- a/urllib3/poolmanager.py +++ b/urllib3/poolmanager.py @@ -39,11 +39,11 @@ class PoolManager(RequestMethods): Example: :: - >>> manager = PoolManager() + >>> manager = PoolManager(num_pools=2) >>> r = manager.urlopen("http://google.com/") >>> r = manager.urlopen("http://google.com/mail") >>> r = manager.urlopen("http://yahoo.com/") - >>> len(r.pools) + >>> len(manager.pools) 2 """ diff --git a/urllib3/request.py b/urllib3/request.py index 5ea26a0f04..569ac96625 100644 --- a/urllib3/request.py +++ b/urllib3/request.py @@ -44,7 +44,7 @@ class RequestMethods(object): def urlopen(self, method, url, body=None, headers=None, encode_multipart=True, multipart_boundary=None, - **kw): + **kw): # Abstract raise NotImplemented("Classes extending RequestMethods must implement " "their own ``urlopen`` method.") @@ -126,22 +126,3 @@ def request_encode_body(self, method, url, fields=None, headers=None, return self.urlopen(method, url, body=body, headers=headers, **urlopen_kw) - - # Deprecated: - - def get_url(self, url, fields=None, **urlopen_kw): - """ - .. deprecated:: 1.0 - Use :meth:`request` instead. - """ - return self.request_encode_url('GET', url, fields=fields, - **urlopen_kw) - - def post_url(self, url, fields=None, headers=None, **urlopen_kw): - """ - .. deprecated:: 1.0 - Use :meth:`request` instead. - """ - return self.request_encode_body('POST', url, fields=fields, - headers=headers, - **urlopen_kw) diff --git a/urllib3/response.py b/urllib3/response.py index 4dd431ec55..5fab824383 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -171,11 +171,22 @@ def from_httplib(ResponseCls, r, **response_kw): with ``original_response=r``. """ + # Normalize headers between different versions of Python + headers = {} + for k, v in r.getheaders(): + # Python 3: Header keys are returned capitalised + k = k.lower() + + has_value = headers.get(k) + if has_value: # Python 3: Repeating header keys are unmerged. + v = ', '.join([has_value, v]) + + headers[k] = v + # HTTPResponse objects in Python 3 don't have a .strict attribute strict = getattr(r, 'strict', 0) return ResponseCls(body=r, - # In Python 3, the header keys are returned capitalised - headers=dict((k.lower(), v) for k,v in r.getheaders()), + headers=headers, status=r.status, version=r.version, reason=r.reason, diff --git a/urllib3/util.py b/urllib3/util.py new file mode 100644 index 0000000000..2684a2fd40 --- /dev/null +++ b/urllib3/util.py @@ -0,0 +1,136 @@ +# urllib3/util.py +# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + + +from base64 import b64encode + +try: + from select import poll, POLLIN +except ImportError: # `poll` doesn't exist on OSX and other platforms + poll = False + try: + from select import select + except ImportError: # `select` doesn't exist on AppEngine. + select = False + +from .packages import six +from .exceptions import LocationParseError + + +def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, + basic_auth=None): + """ + Shortcuts for generating request headers. + + :param keep_alive: + If ``True``, adds 'connection: keep-alive' header. + + :param accept_encoding: + Can be a boolean, list, or string. + ``True`` translates to 'gzip,deflate'. + List will get joined by comma. + String will be used as provided. + + :param user_agent: + String representing the user-agent you want, such as + "python-urllib3/0.6" + + :param basic_auth: + Colon-separated username:password string for 'authorization: basic ...' + auth header. + + Example: :: + + >>> make_headers(keep_alive=True, user_agent="Batman/1.0") + {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} + >>> make_headers(accept_encoding=True) + {'accept-encoding': 'gzip,deflate'} + """ + headers = {} + if accept_encoding: + if isinstance(accept_encoding, str): + pass + elif isinstance(accept_encoding, list): + accept_encoding = ','.join(accept_encoding) + else: + accept_encoding = 'gzip,deflate' + headers['accept-encoding'] = accept_encoding + + if user_agent: + headers['user-agent'] = user_agent + + if keep_alive: + headers['connection'] = 'keep-alive' + + if basic_auth: + headers['authorization'] = 'Basic ' + \ + b64encode(six.b(basic_auth)).decode('utf-8') + + return headers + + +def get_host(url): + """ + Given a url, return its scheme, host and port (None if it's not there). + + For example: :: + + >>> get_host('http://google.com/mail/') + ('http', 'google.com', None) + >>> get_host('google.com:80') + ('http', 'google.com', 80) + """ + + # This code is actually similar to urlparse.urlsplit, but much + # simplified for our needs. + port = None + scheme = 'http' + + if '://' in url: + scheme, url = url.split('://', 1) + if '/' in url: + url, _path = url.split('/', 1) + if '@' in url: + _auth, url = url.split('@', 1) + if ':' in url: + url, port = url.split(':', 1) + + if not port.isdigit(): + raise LocationParseError("Failed to parse: %s" % url) + + port = int(port) + + return scheme, url, port + + + +def is_connection_dropped(conn): + """ + Returns True if the connection is dropped and should be closed. + + :param conn: + ``HTTPConnection`` object. + + Note: For platforms like AppEngine, this will always return ``False`` to + let the platform handle connection recycling transparently for us. + """ + sock = getattr(conn, 'sock', False) + if not sock: #Platform-specific: AppEngine + return False + + if not poll: # Platform-specific + if not select: #Platform-specific: AppEngine + return False + + return select([sock], [], [], 0.0)[0] + + # This version is better on platforms that support it. + p = poll() + p.register(sock, POLLIN) + for (fno, ev) in p.poll(0.0): + if fno == sock.fileno(): + # Either data is buffered (bad), or the connection is dropped. + return True