From 541b009efbfcbe5ba8335e815f0dd7be465a8e62 Mon Sep 17 00:00:00 2001
From: Marius Gedminas <marius@gedmin.as>
Date: Wed, 10 Jul 2019 18:09:34 +0300
Subject: [PATCH] Handle non-UTF-8 charsets

---
 src/zope/publisher/browser.py                 | 10 ++++----
 .../publisher/tests/test_browserrequest.py    | 24 +++++++++++++++++++
 tox.ini                                       |  4 ++--
 3 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/src/zope/publisher/browser.py b/src/zope/publisher/browser.py
index ee09c54d..68abd3a2 100644
--- a/src/zope/publisher/browser.py
+++ b/src/zope/publisher/browser.py
@@ -250,20 +250,20 @@ def _decode(self, text):
         """Try to decode the text using one of the available charsets."""
         # All text comes from cgi.FieldStorage.  On Python 2 it's all bytes
         # and we must decode.  On Python 3 it's already been decoded into
-        # Unicode, using the default charset (UTF-8) and error handling mode
-        # (replace).
+        # Unicode, using the charset we specified when instantiating the
+        # FieldStorage instance (Latin-1).
         if self.charsets is None:
             envadapter = IUserPreferredCharsets(self)
             self.charsets = envadapter.getPreferredCharsets() or ['utf-8']
             self.charsets = [c for c in self.charsets if c != '*']
         if not PYTHON2:
-            if self.charsets and self.charsets[0] == 'utf-8':
+            if self.charsets and self.charsets[0] == 'iso-8859-1':
                 # optimization: we are trying to decode something
                 # cgi.FieldStorage already decoded for us, let's just return it
                 # rather than waste time decoding...
                 return text
             # undo what cgi.FieldStorage did and maintain backwards compat
-            text = text.encode('utf-8')
+            text = text.encode('latin-1')
         for charset in self.charsets:
             try:
                 text = text.decode(charset)
@@ -316,7 +316,7 @@ def processInputs(self):
             qs = env['QUERY_STRING'].encode('latin-1')
             env['QUERY_STRING'] = qs.decode(locale.getpreferredencoding(), 'surrogateescape')
 
-        args = {'encoding': 'utf-8'} if not PYTHON2 else {}
+        args = {'encoding': 'latin-1'} if not PYTHON2 else {}
         fs = ZopeFieldStorage(fp=fp, environ=env,
                               keep_blank_values=1, **args)
         # On python 3.4 and up, FieldStorage explictly closes files
diff --git a/src/zope/publisher/tests/test_browserrequest.py b/src/zope/publisher/tests/test_browserrequest.py
index f6198506..13f12e5f 100644
--- a/src/zope/publisher/tests/test_browserrequest.py
+++ b/src/zope/publisher/tests/test_browserrequest.py
@@ -324,6 +324,30 @@ def testFormURLEncodedUTF8(self):
         self.assertTrue(isinstance(request.form[u"street"], unicode))
         self.assertEqual(u"汉语/漢語", request.form['street'])
 
+    def testFormURLEncodedLatin1(self):
+        extra = {
+            'REQUEST_METHOD': 'POST',
+            'CONTENT_TYPE': 'application/x-www-form-urlencoded',
+            'HTTP_ACCEPT_CHARSET': 'ISO-8859-1',
+        }
+        body = b'a=5&b:int=6&street=K\xf6hlerstra\xdfe'
+        request = self._createRequest(extra, body)
+        publish(request)
+        self.assertTrue(isinstance(request.form[u"street"], unicode))
+        self.assertEqual(u"K\xf6hlerstra\xdfe", request.form['street'])
+
+    def testFormURLEncodedLatin7(self):
+        extra = {
+            'REQUEST_METHOD': 'POST',
+            'CONTENT_TYPE': 'application/x-www-form-urlencoded',
+            'HTTP_ACCEPT_CHARSET': 'ISO-8859-13',
+        }
+        body = u'a=5&b:int=6&street=Ąžuolyno'.encode('iso-8859-13')
+        request = self._createRequest(extra, body)
+        publish(request)
+        self.assertTrue(isinstance(request.form[u"street"], unicode))
+        self.assertEqual(u"Ąžuolyno", request.form['street'])
+
     def testFormNoEncodingUsesUTF8(self):
         encoded = 'K\xc3\xb6hlerstra\xc3\x9fe'
         extra = {
diff --git a/tox.ini b/tox.ini
index 1ce8300a..c0937de7 100644
--- a/tox.ini
+++ b/tox.ini
@@ -4,7 +4,7 @@ envlist =
 
 [testenv]
 commands =
-    zope-testrunner --test-path=src {posargs:-pvc}
+    zope-testrunner --test-path=src {posargs:-vc}
 # without explicit deps, setup.py test will download a bunch of eggs into $PWD
 deps =
     .[test]
@@ -22,7 +22,7 @@ usedevelop = true
 basepython =
     python3.6
 commands =
-    coverage run -m zope.testrunner --test-path=src []
+    coverage run -m zope.testrunner --test-path=src {posargs:-vc}
     coverage report --fail-under=92
 deps =
     {[testenv]deps}