Skip to content

Commit

Permalink
Support charset parameter for content type `application/x-www-form-…
Browse files Browse the repository at this point in the history
…encoded` (#1104)

* Support `charset` parameter for content type `application/x-www-form-urlencoded`

* Fix
  • Loading branch information
d-maurer committed Mar 10, 2023
1 parent baf54b8 commit 73d2dfc
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 29 deletions.
9 changes: 9 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,15 @@ https://github.com/zopefoundation/Zope/blob/4.x/CHANGES.rst

- Update to newest compatible versions of dependencies.

- Support the (non standard) ``charset`` parameter for
content type ``application/x-www-form-urlencoded``.
This is required (e.g. for ``Plone``) because
``jquery`` constructs content types of the form
```application/x-www-form-urlencoded; charset=utf-8``.
For details see
`plone/buildout.coredev#844
<https://github.com/plone/buildout.coredev/pull/844>`_.


5.8 (2023-01-10)
----------------
Expand Down
67 changes: 40 additions & 27 deletions src/ZPublisher/HTTPRequest.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,23 +547,24 @@ def processInputs(
# problems when surrogates reach the application because
# they cannot be encoded with a standard error handler.
# We might want to prevent this.
character_encoding = '' # currently used encoding
key = item.name
if key is None:
continue
key = item.name.encode("latin-1").decode(self.charset)
character_encoding = ""
key = item.name.encode("latin-1").decode(
item.name_charset or self.charset)

if hasattr(item, 'file') and \
hasattr(item, 'filename') and \
hasattr(item, 'headers'):
item = FileUpload(item, self.charset)
else:
character_encoding = self.charset
character_encoding = item.value_charset or self.charset
item = item.value.decode(
character_encoding, "surrogateescape")
# from here on, `item` contains the field value
# either as `FileUpload` or `str` with
# `character_encoding` as encoding.
# `character_encoding` as encoding,
# `key` the field name (`str`)

flags = 0
Expand Down Expand Up @@ -1382,6 +1383,9 @@ class FormField(SimpleNamespace, ValueAccessor):
the field name
value
the field value (`bytes`)
name_charset, value_charset
the charset for the name and value, respectively, or ``None``
if no charset has been specified.
File fields additionally have the attributes:
file
Expand All @@ -1396,17 +1400,21 @@ class FormField(SimpleNamespace, ValueAccessor):
are used to represent textual data.
"""

name_charset = value_charset = None


class ZopeFieldStorage(ValueAccessor):
def __init__(self, fp, environ):
self.file = fp
method = environ.get("REQUEST_METHOD", "GET").upper()
qs = environ.get("QUERY_STRING", "")
url_qs = environ.get("QUERY_STRING", "")
post_qs = ""
hl = []
content_type = environ.get("CONTENT_TYPE",
"application/x-www-form-urlencoded")
content_type = content_type
hl.append(("content-type", content_type))
content_type, options = parse_options_header(content_type)
content_type = content_type.lower()
content_disposition = environ.get("CONTENT_DISPOSITION")
if content_disposition is not None:
hl.append(("content-disposition", content_disposition))
Expand All @@ -1417,40 +1425,36 @@ def __init__(self, fp, environ):
fpos = fp.tell()
except Exception:
fpos = None
if content_type.startswith("multipart/form-data"):
ct, options = parse_options_header(content_type)
if content_type == "multipart/form-data":
parts = MultipartParser(
fp, options["boundary"],
mem_limit=FORM_MEMORY_LIMIT,
disk_limit=FORM_DISK_LIMIT,
memfile_limit=FORM_MEMFILE_LIMIT,
charset="latin-1").parts()
elif content_type == "application/x-www-form-urlencoded":
if qs:
qs += "&"
qs += fp.read(FORM_MEMORY_LIMIT).decode("latin-1")
post_qs = fp.read(FORM_MEMORY_LIMIT).decode("latin-1")
if fp.read(1):
raise BadRequest("form data processing "
"requires too much memory")
else:
# `processInputs` currently expects either
# form values or a response body, not both.
# reset `qs` to fulfill this expectation.
qs = ""
elif url_qs:
raise NotImplementedError("request parameters and body")
if fpos is not None:
fp.seek(fpos)
elif method not in ("GET", "HEAD"):
# `processInputs` currently expects either
# form values or a response body, not both.
# reset `qs` to fulfill this expectation.
qs = ""
elif url_qs and content_type != "application/x-www-form-urlencoded":
raise NotImplementedError("request parameters and body")
fl = []
add_field = fl.append
for name, val in parse_qsl(
qs, # noqa: E121
keep_blank_values=True, encoding="latin-1"):
add_field(FormField(
name=name, value=val.encode("latin-1")))
post_opts = {}
if options.get("charset"):
post_opts["name_charset"] = post_opts["value_charset"] = \
options["charset"]
for qs, opts in ((url_qs, {}), (post_qs, post_opts)):
for name, val in parse_qsl(
qs, # noqa: E121
keep_blank_values=True, encoding="latin-1"):
add_field(FormField(
name=name, value=val.encode("latin-1"), **opts))
for part in parts:
if part.filename:
# a file
Expand All @@ -1460,12 +1464,21 @@ def __init__(self, fp, environ):
filename=part.filename,
headers=part.headers)
else:
field = FormField(name=part.name, value=part.raw)
field = FormField(
name=part.name, value=part.raw,
value_charset=_mp_charset(part))
add_field(field)
if fl:
self.list = fl


def _mp_charset(part):
"""the charset of *part*."""
content_type = part.headers.get("Content-Type", "")
_, options = parse_options_header(content_type)
return options.get("charset")


# Original version: zope.publisher.browser.FileUpload
class FileUpload:
'''File upload objects
Expand Down
32 changes: 30 additions & 2 deletions src/ZPublisher/tests/testHTTPRequest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from contextlib import contextmanager
from io import BytesIO
from unittest.mock import patch
from urllib.parse import quote_plus

from AccessControl.tainted import TaintedString
from AccessControl.tainted import should_be_tainted
Expand Down Expand Up @@ -144,8 +145,6 @@ def _makeOne(self, stdin=None, environ=None, response=None, clean=1):
class HTTPRequestTests(unittest.TestCase, HTTPRequestFactoryMixin):

def _processInputs(self, inputs):
from urllib.parse import quote_plus

# Have the inputs processed, and return a HTTPRequest object
# holding the result.
# inputs is expected to be a list of (key, value) tuples, no CGI
Expand Down Expand Up @@ -1346,6 +1345,26 @@ def test_issue_1095(self):
self.assertIsInstance(r[0].x, FileUpload)
self.assertIsInstance(r[1].x, TaintedString)

def test_field_charset(self):
body = TEST_FIELD_CHARSET_DATA
env = self._makePostEnviron(body)
env["QUERY_STRING"] = "y=" + quote_plus("äöü")
req = self._makeOne(BytesIO(body), env)
req.processInputs()
self.assertEqual(req["x"], "äöü")
self.assertEqual(req["y"], "äöü")

def test_form_charset(self):
body = ("x=" + quote_plus("äöü", encoding="latin-1")).encode("ASCII")
env = self._makePostEnviron(body)
env["CONTENT_TYPE"] = \
"application/x-www-form-urlencoded; charset=latin-1"
env["QUERY_STRING"] = "y=" + quote_plus("äöü")
req = self._makeOne(BytesIO(body), env)
req.processInputs()
self.assertEqual(req["x"], "äöü")
self.assertEqual(req["y"], "äöü")


class TestHTTPRequestZope3Views(TestRequestViewsBase):

Expand Down Expand Up @@ -1444,3 +1463,12 @@ def test_special(self):
--12345--
'''

TEST_FIELD_CHARSET_DATA = b'''
--12345
Content-Disposition: form-data; name="x"
Content-Type: text/plain; charset=latin-1
%s
--12345--
''' % 'äöü'.encode("latin-1")

0 comments on commit 73d2dfc

Please sign in to comment.