From 541b009efbfcbe5ba8335e815f0dd7be465a8e62 Mon Sep 17 00:00:00 2001 From: Marius Gedminas Date: Wed, 10 Jul 2019 18:09:34 +0300 Subject: [PATCH] Handle non-UTF-8 charsets --- src/zope/publisher/browser.py | 10 ++++---- .../publisher/tests/test_browserrequest.py | 24 +++++++++++++++++++ tox.ini | 4 ++-- 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/src/zope/publisher/browser.py b/src/zope/publisher/browser.py index ee09c54d..68abd3a2 100644 --- a/src/zope/publisher/browser.py +++ b/src/zope/publisher/browser.py @@ -250,20 +250,20 @@ def _decode(self, text): """Try to decode the text using one of the available charsets.""" # All text comes from cgi.FieldStorage. On Python 2 it's all bytes # and we must decode. On Python 3 it's already been decoded into - # Unicode, using the default charset (UTF-8) and error handling mode - # (replace). + # Unicode, using the charset we specified when instantiating the + # FieldStorage instance (Latin-1). if self.charsets is None: envadapter = IUserPreferredCharsets(self) self.charsets = envadapter.getPreferredCharsets() or ['utf-8'] self.charsets = [c for c in self.charsets if c != '*'] if not PYTHON2: - if self.charsets and self.charsets[0] == 'utf-8': + if self.charsets and self.charsets[0] == 'iso-8859-1': # optimization: we are trying to decode something # cgi.FieldStorage already decoded for us, let's just return it # rather than waste time decoding... return text # undo what cgi.FieldStorage did and maintain backwards compat - text = text.encode('utf-8') + text = text.encode('latin-1') for charset in self.charsets: try: text = text.decode(charset) @@ -316,7 +316,7 @@ def processInputs(self): qs = env['QUERY_STRING'].encode('latin-1') env['QUERY_STRING'] = qs.decode(locale.getpreferredencoding(), 'surrogateescape') - args = {'encoding': 'utf-8'} if not PYTHON2 else {} + args = {'encoding': 'latin-1'} if not PYTHON2 else {} fs = ZopeFieldStorage(fp=fp, environ=env, keep_blank_values=1, **args) # On python 3.4 and up, FieldStorage explictly closes files diff --git a/src/zope/publisher/tests/test_browserrequest.py b/src/zope/publisher/tests/test_browserrequest.py index f6198506..13f12e5f 100644 --- a/src/zope/publisher/tests/test_browserrequest.py +++ b/src/zope/publisher/tests/test_browserrequest.py @@ -324,6 +324,30 @@ def testFormURLEncodedUTF8(self): self.assertTrue(isinstance(request.form[u"street"], unicode)) self.assertEqual(u"汉语/漢語", request.form['street']) + def testFormURLEncodedLatin1(self): + extra = { + 'REQUEST_METHOD': 'POST', + 'CONTENT_TYPE': 'application/x-www-form-urlencoded', + 'HTTP_ACCEPT_CHARSET': 'ISO-8859-1', + } + body = b'a=5&b:int=6&street=K\xf6hlerstra\xdfe' + request = self._createRequest(extra, body) + publish(request) + self.assertTrue(isinstance(request.form[u"street"], unicode)) + self.assertEqual(u"K\xf6hlerstra\xdfe", request.form['street']) + + def testFormURLEncodedLatin7(self): + extra = { + 'REQUEST_METHOD': 'POST', + 'CONTENT_TYPE': 'application/x-www-form-urlencoded', + 'HTTP_ACCEPT_CHARSET': 'ISO-8859-13', + } + body = u'a=5&b:int=6&street=Ąžuolyno'.encode('iso-8859-13') + request = self._createRequest(extra, body) + publish(request) + self.assertTrue(isinstance(request.form[u"street"], unicode)) + self.assertEqual(u"Ąžuolyno", request.form['street']) + def testFormNoEncodingUsesUTF8(self): encoded = 'K\xc3\xb6hlerstra\xc3\x9fe' extra = { diff --git a/tox.ini b/tox.ini index 1ce8300a..c0937de7 100644 --- a/tox.ini +++ b/tox.ini @@ -4,7 +4,7 @@ envlist = [testenv] commands = - zope-testrunner --test-path=src {posargs:-pvc} + zope-testrunner --test-path=src {posargs:-vc} # without explicit deps, setup.py test will download a bunch of eggs into $PWD deps = .[test] @@ -22,7 +22,7 @@ usedevelop = true basepython = python3.6 commands = - coverage run -m zope.testrunner --test-path=src [] + coverage run -m zope.testrunner --test-path=src {posargs:-vc} coverage report --fail-under=92 deps = {[testenv]deps}