urllib3 · pquentin · Jun 7, 2023 · Jun 7, 2023 · Jun 7, 2023 · Jun 7, 2023
diff --git a/src/urllib3/util/request.py b/src/urllib3/util/request.py
@@ -213,7 +213,7 @@ def body_to_chunks(
 
     # Bytes or strings become bytes
     elif isinstance(body, (str, bytes)):
-        chunks = (to_bytes(body),)
+        chunks = (to_bytes(body, "latin-1"),)
         content_length = len(chunks[0])
 
     # File-like object, TODO: use seek() and tell() for length?
@@ -227,7 +227,7 @@ def chunk_readable() -> typing.Iterable[bytes]:
                 if not datablock:
                     break
                 if encode:
-                    datablock = datablock.encode("iso-8859-1")
+                    datablock = datablock.encode("latin-1")
                 yield datablock
 
         chunks = chunk_readable()

diff --git a/test/with_dummyserver/test_chunked_transfer.py b/test/with_dummyserver/test_chunked_transfer.py
@@ -65,7 +65,7 @@ def _test_body(self, data: bytes | str | None) -> None:
 
             assert b"Transfer-Encoding: chunked" in header.split(b"\r\n")
             if data:
-                bdata = data if isinstance(data, bytes) else data.encode("utf-8")
+                bdata = data if isinstance(data, bytes) else data.encode("latin-1")
 def _test_body(self, data): 
     self.start_chunked_handler() 
     with HTTPConnectionPool(self.host, self.port, retries=False) as pool: 
         pool.urlopen("GET", "/", data, chunked=True) 
         header, body = self.buffer.split(b"\r\n\r\n", 1) 
         assert b"Transfer-Encoding: chunked" in header.split(b"\r\n") 
         if data: 
             bdata = data if isinstance(data, bytes) else data.encode("utf-8") 
             assert b"\r\n" + bdata + b"\r\n" in body 
             assert body.endswith(b"\r\n0\r\n\r\n") 
             len_str = body.split(b"\r\n", 1)[0] 
             stated_len = int(len_str, 16) 
             assert stated_len == len(bdata) 
         else: 
             assert body == b"0\r\n\r\n" 
 def test_bytestring_body(self): 
     self._test_body(b"thisshouldbeonechunk\r\nasdf") 
 def test_unicode_body(self): 
     self._test_body(u"thisshouldbeonechunk\r\näöüß") 
 chunk = chunk.encode("utf8") 
 def _test_body(self, data): 
     self.start_chunked_handler() 
     with HTTPConnectionPool(self.host, self.port, retries=False) as pool: 
         pool.urlopen("GET", "/", data, chunked=True) 
         header, body = self.buffer.split(b"\r\n\r\n", 1) 
  
         assert b"Transfer-Encoding: chunked" in header.split(b"\r\n") 
         if data: 
             bdata = data if isinstance(data, bytes) else data.encode("utf-8") 
             assert b"\r\n" + bdata + b"\r\n" in body 
             assert body.endswith(b"\r\n0\r\n\r\n") 
  
             len_str = body.split(b"\r\n", 1)[0] 
             stated_len = int(len_str, 16) 
             assert stated_len == len(bdata) 
         else: 
             assert body == b"0\r\n\r\n" 
  
 def test_bytestring_body(self): 
     self._test_body(b"thisshouldbeonechunk\r\nasdf") 
  
 def test_unicode_body(self): 
     self._test_body(u"thisshouldbeonechunk\r\näöüß") 
 chunk = chunk.encode("utf8") 
                 assert b"\r\n" + bdata + b"\r\n" in body
                 assert body.endswith(b"\r\n0\r\n\r\n")
 

diff --git a/test/with_dummyserver/test_connectionpool.py b/test/with_dummyserver/test_connectionpool.py
@@ -1089,7 +1089,7 @@ def test_bytes_header(self) -> None:
             assert request_headers["User-Agent"] == "test header"
 
     @pytest.mark.parametrize(
-        "user_agent", ["Schönefeld/1.18.0", "Schönefeld/1.18.0".encode("iso-8859-1")]
+        "user_agent", ["Schönefeld/1.18.0", "Schönefeld/1.18.0".encode("latin-1")]
     )
     def test_user_agent_non_ascii_user_agent(self, user_agent: str) -> None:
         with HTTPConnectionPool(self.host, self.port, retries=False) as pool:

diff --git a/test/with_dummyserver/test_socketlevel.py b/test/with_dummyserver/test_socketlevel.py
@@ -55,6 +55,7 @@
 from urllib3.util import ssl_, ssl_wrap_socket
 from urllib3.util.retry import Retry
 from urllib3.util.timeout import Timeout
+from urllib3.util.util import to_bytes
 
 from .. import LogRecorder, has_alpn
 
@@ -2256,6 +2257,38 @@ def socket_handler(listener: socket.socket) -> None:
         assert b"Content-Length: 0\r\n" in sent_bytes
         assert b"transfer-encoding" not in sent_bytes.lower()
 
+    def test_encode_body_latin_1(self) -> None:
+        buffer = bytearray()
+
+        def socket_handler(listener: socket.socket) -> None:
+            nonlocal buffer
+            sock = listener.accept()[0]
+            sock.settimeout(0)
+
+            start = time.perf_counter()
+            while time.perf_counter() - start < (LONG_TIMEOUT / 2):
+                try:
+                    buffer += sock.recv(65536)
+                except OSError:
+                    continue
+
+            sock.sendall(
+                b"HTTP/1.1 200 OK\r\n"
+                b"Server: example.com\r\n"
+                b"Content-Length: 0\r\n\r\n"
+            )
+            sock.close()
+
+        self._start_server(socket_handler)
+
+        with HTTPConnectionPool(self.host, self.port, timeout=3) as pool:
+            resp = pool.request("POST", "/", body="\x80")
+            assert resp.status == 200
+
+        sent_bytes = bytes(buffer)
+        assert to_bytes("\x80", "latin-1") in sent_bytes
+        assert to_bytes("\x80", "utf-8") not in sent_bytes
+
     @pytest.mark.parametrize("chunked", [True, False])
     @pytest.mark.parametrize("method", ["POST", "PUT", "PATCH"])
     @pytest.mark.parametrize("body_type", ["file", "generator", "bytes"])
@@ -2344,30 +2377,34 @@ def socket_handler(listener: socket.socket) -> None:
         self._start_server(socket_handler)
 
         body: typing.Any
+        # \x80 encodes to two bytes with UTF-8, so it's good way to make sure that
+        # latin-1 was in fact used
+        body_str = "x" * 9 + "\x80"
+        body_bytes = body_str.encode("latin-1")
         if body_type == "generator":
 
             def body_generator() -> typing.Generator[bytes, None, None]:
-                yield b"x" * 10
+                yield body_bytes
 
             body = body_generator()
             should_be_chunked = True
 
         elif body_type == "file":
-            body = io.BytesIO(b"x" * 10)
+            body = io.BytesIO(body_bytes)
             body.seek(0, 0)
             should_be_chunked = True
 
         elif body_type == "file_text":
-            body = io.StringIO("x" * 10)
+            body = io.StringIO(body_str)
             body.seek(0, 0)
             should_be_chunked = True
 
         elif body_type == "bytearray":
-            body = bytearray(b"x" * 10)
+            body = bytearray(body_bytes)
             should_be_chunked = False
 
         else:
-            body = b"x" * 10
+            body = body_bytes
             should_be_chunked = False
 
         with HTTPConnectionPool(
@@ -2385,12 +2422,13 @@ def body_generator() -> typing.Generator[bytes, None, None]:
         if should_be_chunked:
             assert b"content-length" not in sent_bytes.lower()
             assert b"Transfer-Encoding: chunked\r\n" in sent_bytes
-            assert b"\r\n\r\na\r\nxxxxxxxxxx\r\n0\r\n\r\n" in sent_bytes
-
+            expected_str_body = f"\r\n\r\na\r\n{body_str}\r\n0\r\n\r\n"
+            assert to_bytes(expected_str_body, "latin-1") in sent_bytes
+            assert to_bytes(expected_str_body, "utf-8") not in sent_bytes
         else:
             assert b"Content-Length: 10\r\n" in sent_bytes
             assert b"transfer-encoding" not in sent_bytes.lower()
-            assert sent_bytes.endswith(b"\r\n\r\nxxxxxxxxxx")
+            assert sent_bytes.endswith(to_bytes(f"\r\n\r\n{body_str}", "latin-1"))
 
     @pytest.mark.parametrize(
         "header_transform",