Skip to content

Commit

Permalink
Add SKIP_HEADER for skipping automatically added headers
Browse files Browse the repository at this point in the history
  • Loading branch information
sethmlarson committed Oct 27, 2020
1 parent 68c7826 commit 16b7b33
Show file tree
Hide file tree
Showing 6 changed files with 176 additions and 28 deletions.
2 changes: 1 addition & 1 deletion src/urllib3/_collections.py
Expand Up @@ -155,7 +155,7 @@ def __setitem__(self, key, val):

def __getitem__(self, key):
val = self._container[key.lower()]
return ", ".join([six.ensure_str(v, "ascii") for v in val[1:]])
return ", ".join(val[1:])

def __delitem__(self, key):
del self._container[key.lower()]
Expand Down
36 changes: 23 additions & 13 deletions src/urllib3/connection.py
Expand Up @@ -43,7 +43,6 @@ class BrokenPipeError(Exception):
pass


from ._collections import HTTPHeaderDict
from ._version import __version__
from .exceptions import (
ConnectTimeoutError,
Expand All @@ -52,7 +51,7 @@ class BrokenPipeError(Exception):
SystemTimeWarning,
)
from .packages.ssl_match_hostname import CertificateError, match_hostname
from .util import SUPPRESS_USER_AGENT, connection
from .util import SKIP_HEADER, SKIPPABLE_HEADERS, connection
from .util.ssl_ import (
assert_fingerprint,
create_urllib3_context,
Expand Down Expand Up @@ -213,29 +212,40 @@ def putrequest(self, method, url, *args, **kwargs):

return _HTTPConnection.putrequest(self, method, url, *args, **kwargs)

def putheader(self, header, *values):
""""""
if SKIP_HEADER not in values:
_HTTPConnection.putheader(self, header, *values)
elif six.ensure_str(header.lower()) not in SKIPPABLE_HEADERS:
raise ValueError(
"urllib3.util.SKIP_HEADER only supports '%s'"
% ("', '".join(map(str.title, sorted(SKIPPABLE_HEADERS))),)
)

def request(self, method, url, body=None, headers=None):
headers = HTTPHeaderDict(headers if headers is not None else {})
if "user-agent" not in headers:
if headers is None:
headers = {}
else:
# Avoid modifying the headers passed into .request()
headers = headers.copy()
if "user-agent" not in (k.lower() for k in headers):
headers["User-Agent"] = _get_default_user_agent()
elif headers["user-agent"] == SUPPRESS_USER_AGENT:
del headers["user-agent"]
super(HTTPConnection, self).request(method, url, body=body, headers=headers)

def request_chunked(self, method, url, body=None, headers=None):
"""
Alternative to the common request method, which sends the
body with chunked encoding and not as one block
"""
headers = HTTPHeaderDict(headers if headers is not None else {})
skip_accept_encoding = "accept-encoding" in headers
skip_host = "host" in headers
headers = headers or {}
header_keys = set([six.ensure_str(k.lower()) for k in headers])
skip_accept_encoding = "accept-encoding" in header_keys
skip_host = "host" in header_keys
self.putrequest(
method, url, skip_accept_encoding=skip_accept_encoding, skip_host=skip_host
)
if "user-agent" not in headers:
headers["User-Agent"] = _get_default_user_agent()
elif headers["user-agent"] == SUPPRESS_USER_AGENT:
del headers["user-agent"]
if "user-agent" not in header_keys:
self.putheader("User-Agent", _get_default_user_agent())
for header, value in headers.items():
self.putheader(header, value)
if "transfer-encoding" not in headers:
Expand Down
5 changes: 3 additions & 2 deletions src/urllib3/util/__init__.py
Expand Up @@ -2,7 +2,7 @@

# For backwards compatibility, provide imports that used to be here.
from .connection import is_connection_dropped
from .request import SUPPRESS_USER_AGENT, make_headers
from .request import SKIP_HEADER, SKIPPABLE_HEADERS, make_headers
from .response import is_fp_closed
from .retry import Retry
from .ssl_ import (
Expand Down Expand Up @@ -44,5 +44,6 @@
"ssl_wrap_socket",
"wait_for_read",
"wait_for_write",
"SUPPRESS_USER_AGENT",
"SKIP_HEADER",
"SKIPPABLE_HEADERS",
)
11 changes: 7 additions & 4 deletions src/urllib3/util/request.py
Expand Up @@ -5,10 +5,13 @@
from ..exceptions import UnrewindableBodyError
from ..packages.six import b, integer_types

# Use an invalid User-Agent to represent suppressing of default user agent.
# See https://tools.ietf.org/html/rfc7231#section-5.5.3 and
# https://tools.ietf.org/html/rfc7230#section-3.2.6
SUPPRESS_USER_AGENT = "@@@INVALID_USER_AGENT@@@"
# Pass as a value within ``headers`` to skip
# emitting some HTTP headers that are added automatically.
# The only headers that are supported are ``Accept-Encoding``,
# ``Host``, and ``User-Agent``.
SKIP_HEADER = "@@@SKIP_HEADER@@@"
SKIPPABLE_HEADERS = frozenset(["accept-encoding", "host", "user-agent"])

ACCEPT_ENCODING = "gzip,deflate"
try:
import brotli as _unused_module_brotli # noqa: F401
Expand Down
4 changes: 2 additions & 2 deletions test/with_dummyserver/test_chunked_transfer.py
Expand Up @@ -8,7 +8,7 @@
consume_socket,
)
from urllib3 import HTTPConnectionPool
from urllib3.util import SUPPRESS_USER_AGENT
from urllib3.util import SKIP_HEADER
from urllib3.util.retry import Retry

# Retry failed tests
Expand Down Expand Up @@ -123,7 +123,7 @@ def test_remove_user_agent_header(self):
"GET",
"/",
chunks,
headers={"User-Agent": SUPPRESS_USER_AGENT},
headers={"User-Agent": SKIP_HEADER},
chunked=True,
)

Expand Down
146 changes: 140 additions & 6 deletions test/with_dummyserver/test_connectionpool.py
@@ -1,19 +1,23 @@
# -*- coding: utf-8 -*-

import io
import json
import logging
import socket
import sys
import time
import warnings
from test import LONG_TIMEOUT, SHORT_TIMEOUT
from test import LONG_TIMEOUT, SHORT_TIMEOUT, onlyPy2
from threading import Event

import mock
import pytest
import six

from dummyserver.server import HAS_IPV6_AND_DNS, NoIPv6Warning
from dummyserver.testcase import HTTPDummyServerTestCase, SocketDummyServerTestCase
from urllib3 import HTTPConnectionPool, encode_multipart_formdata
from urllib3._collections import HTTPHeaderDict
from urllib3.connection import _get_default_user_agent
from urllib3.exceptions import (
ConnectTimeoutError,
Expand All @@ -26,7 +30,7 @@
)
from urllib3.packages.six import b, u
from urllib3.packages.six.moves.urllib.parse import urlencode
from urllib3.util import SUPPRESS_USER_AGENT
from urllib3.util import SKIP_HEADER, SKIPPABLE_HEADERS
from urllib3.util.retry import RequestHistory, Retry
from urllib3.util.timeout import Timeout

Expand Down Expand Up @@ -830,28 +834,125 @@ def test_no_user_agent_header(self):
custom_ua = "I'm not a web scraper, what are you talking about?"
with HTTPConnectionPool(self.host, self.port) as pool:
# Suppress user agent in the request headers.
no_ua_headers = {"User-Agent": SUPPRESS_USER_AGENT}
no_ua_headers = {"User-Agent": SKIP_HEADER}
r = pool.request("GET", "/headers", headers=no_ua_headers)
request_headers = json.loads(r.data.decode("utf8"))
assert "User-Agent" not in request_headers
assert no_ua_headers["User-Agent"] == SUPPRESS_USER_AGENT
assert no_ua_headers["User-Agent"] == SKIP_HEADER

# Suppress user agent in the pool headers.
pool.headers = no_ua_headers
r = pool.request("GET", "/headers")
request_headers = json.loads(r.data.decode("utf8"))
assert "User-Agent" not in request_headers
assert no_ua_headers["User-Agent"] == SUPPRESS_USER_AGENT
assert no_ua_headers["User-Agent"] == SKIP_HEADER

# Request headers override pool headers.
pool_headers = {"User-Agent": custom_ua}
pool.headers = pool_headers
r = pool.request("GET", "/headers", headers=no_ua_headers)
request_headers = json.loads(r.data.decode("utf8"))
assert "User-Agent" not in request_headers
assert no_ua_headers["User-Agent"] == SUPPRESS_USER_AGENT
assert no_ua_headers["User-Agent"] == SKIP_HEADER
assert pool_headers.get("User-Agent") == custom_ua

@pytest.mark.parametrize(
"accept_encoding",
[
"Accept-Encoding",
"accept-encoding",
b"Accept-Encoding",
b"accept-encoding",
None,
],
)
@pytest.mark.parametrize("host", ["Host", "host", b"Host", b"host", None])
@pytest.mark.parametrize(
"user_agent", ["User-Agent", "user-agent", b"User-Agent", b"user-agent", None]
)
@pytest.mark.parametrize("chunked", [True, False])
def test_skip_header(self, accept_encoding, host, user_agent, chunked):
headers = {}

if accept_encoding is not None:
headers[accept_encoding] = SKIP_HEADER
if host is not None:
headers[host] = SKIP_HEADER
if user_agent is not None:
headers[user_agent] = SKIP_HEADER

with HTTPConnectionPool(self.host, self.port) as pool:
r = pool.request("GET", "/headers", headers=headers, chunked=chunked)
request_headers = json.loads(r.data.decode("utf8"))

if accept_encoding is None:
assert "Accept-Encoding" in request_headers
else:
assert accept_encoding not in request_headers
if host is None:
assert "Host" in request_headers
else:
assert host not in request_headers
if user_agent is None:
assert "User-Agent" in request_headers
else:
assert user_agent not in request_headers

@pytest.mark.parametrize("header", ["Content-Length", "content-length"])
@pytest.mark.parametrize("chunked", [True, False])
def test_skip_header_non_supported(self, header, chunked):
with HTTPConnectionPool(self.host, self.port) as pool:
with pytest.raises(ValueError) as e:
pool.request(
"GET", "/headers", headers={header: SKIP_HEADER}, chunked=chunked
)
assert (
str(e.value)
== "urllib3.util.SKIP_HEADER only supports 'Accept-Encoding', 'Host', 'User-Agent'"
)

# Ensure that the error message stays up to date with 'SKIP_HEADER_SUPPORTED_HEADERS'
assert all(
("'" + header.title() + "'") in str(e.value)
for header in SKIPPABLE_HEADERS
)

@pytest.mark.parametrize("chunked", [True, False])
@pytest.mark.parametrize("pool_request", [True, False])
@pytest.mark.parametrize("header_type", [dict, HTTPHeaderDict])
def test_headers_not_modified_by_request(self, chunked, pool_request, header_type):
# Test that the .request*() methods of ConnectionPool and HTTPConnection
# don't modify the given 'headers' structure, instead they should
# make their own internal copies at request time.
headers = header_type()
headers["key"] = "val"

with HTTPConnectionPool(self.host, self.port) as pool:
pool.headers = headers
if pool_request:
pool.request("GET", "/headers", chunked=chunked)
else:
conn = pool._get_conn()
if chunked:
conn.request_chunked("GET", "/headers")
else:
conn.request("GET", "/headers")

assert pool.headers == {"key": "val"}
assert isinstance(pool.headers, header_type)

with HTTPConnectionPool(self.host, self.port) as pool:
if pool_request:
pool.request("GET", "/headers", headers=headers, chunked=chunked)
else:
conn = pool._get_conn()
if chunked:
conn.request_chunked("GET", "/headers", headers=headers)
else:
conn.request("GET", "/headers", headers=headers)

assert headers == {"key": "val"}

def test_bytes_header(self):
with HTTPConnectionPool(self.host, self.port) as pool:
headers = {"User-Agent": b"test header"}
Expand All @@ -860,6 +961,39 @@ def test_bytes_header(self):
assert "User-Agent" in request_headers
assert request_headers["User-Agent"] == "test header"

@pytest.mark.parametrize(
"user_agent", [u"Schönefeld/1.18.0", u"Schönefeld/1.18.0".encode("iso-8859-1")]
)
def test_user_agent_non_ascii_user_agent(self, user_agent):
if six.PY2 and not isinstance(user_agent, str):
pytest.skip(
"Python 2 raises UnicodeEncodeError when passed a unicode header"
)

with HTTPConnectionPool(self.host, self.port, retries=False) as pool:
r = pool.urlopen(
"GET",
"/headers",
headers={"User-Agent": user_agent},
)
request_headers = json.loads(r.data.decode("utf8"))
assert "User-Agent" in request_headers
assert request_headers["User-Agent"] == u"Schönefeld/1.18.0"

@onlyPy2
def test_user_agent_non_ascii_fails_on_python_2(self):
with HTTPConnectionPool(self.host, self.port, retries=False) as pool:
with pytest.raises(UnicodeEncodeError) as e:
pool.urlopen(
"GET",
"/headers",
headers={"User-Agent": u"Schönefeld/1.18.0"},
)
assert str(e.value) == (
"'ascii' codec can't encode character u'\\xf6' in "
"position 3: ordinal not in range(128)"
)


class TestRetry(HTTPDummyServerTestCase):
def test_max_retry(self):
Expand Down

0 comments on commit 16b7b33

Please sign in to comment.