Skip to content

Commit

Permalink
Handle non-UTF-8 charsets
Browse files Browse the repository at this point in the history
  • Loading branch information
mgedmin committed Jul 10, 2019
1 parent 819e6d8 commit 541b009
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 7 deletions.
10 changes: 5 additions & 5 deletions src/zope/publisher/browser.py
Expand Up @@ -250,20 +250,20 @@ def _decode(self, text):
"""Try to decode the text using one of the available charsets."""
# All text comes from cgi.FieldStorage. On Python 2 it's all bytes
# and we must decode. On Python 3 it's already been decoded into
# Unicode, using the default charset (UTF-8) and error handling mode
# (replace).
# Unicode, using the charset we specified when instantiating the
# FieldStorage instance (Latin-1).
if self.charsets is None:
envadapter = IUserPreferredCharsets(self)
self.charsets = envadapter.getPreferredCharsets() or ['utf-8']
self.charsets = [c for c in self.charsets if c != '*']
if not PYTHON2:
if self.charsets and self.charsets[0] == 'utf-8':
if self.charsets and self.charsets[0] == 'iso-8859-1':
# optimization: we are trying to decode something
# cgi.FieldStorage already decoded for us, let's just return it
# rather than waste time decoding...
return text
# undo what cgi.FieldStorage did and maintain backwards compat
text = text.encode('utf-8')
text = text.encode('latin-1')
for charset in self.charsets:
try:
text = text.decode(charset)
Expand Down Expand Up @@ -316,7 +316,7 @@ def processInputs(self):
qs = env['QUERY_STRING'].encode('latin-1')
env['QUERY_STRING'] = qs.decode(locale.getpreferredencoding(), 'surrogateescape')

args = {'encoding': 'utf-8'} if not PYTHON2 else {}
args = {'encoding': 'latin-1'} if not PYTHON2 else {}
fs = ZopeFieldStorage(fp=fp, environ=env,
keep_blank_values=1, **args)
# On python 3.4 and up, FieldStorage explictly closes files
Expand Down
24 changes: 24 additions & 0 deletions src/zope/publisher/tests/test_browserrequest.py
Expand Up @@ -324,6 +324,30 @@ def testFormURLEncodedUTF8(self):
self.assertTrue(isinstance(request.form[u"street"], unicode))
self.assertEqual(u"汉语/漢語", request.form['street'])

def testFormURLEncodedLatin1(self):
extra = {
'REQUEST_METHOD': 'POST',
'CONTENT_TYPE': 'application/x-www-form-urlencoded',
'HTTP_ACCEPT_CHARSET': 'ISO-8859-1',
}
body = b'a=5&b:int=6&street=K\xf6hlerstra\xdfe'
request = self._createRequest(extra, body)
publish(request)
self.assertTrue(isinstance(request.form[u"street"], unicode))
self.assertEqual(u"K\xf6hlerstra\xdfe", request.form['street'])

def testFormURLEncodedLatin7(self):
extra = {
'REQUEST_METHOD': 'POST',
'CONTENT_TYPE': 'application/x-www-form-urlencoded',
'HTTP_ACCEPT_CHARSET': 'ISO-8859-13',
}
body = u'a=5&b:int=6&street=Ąžuolyno'.encode('iso-8859-13')
request = self._createRequest(extra, body)
publish(request)
self.assertTrue(isinstance(request.form[u"street"], unicode))
self.assertEqual(u"Ąžuolyno", request.form['street'])

def testFormNoEncodingUsesUTF8(self):
encoded = 'K\xc3\xb6hlerstra\xc3\x9fe'
extra = {
Expand Down
4 changes: 2 additions & 2 deletions tox.ini
Expand Up @@ -4,7 +4,7 @@ envlist =

[testenv]
commands =
zope-testrunner --test-path=src {posargs:-pvc}
zope-testrunner --test-path=src {posargs:-vc}
# without explicit deps, setup.py test will download a bunch of eggs into $PWD
deps =
.[test]
Expand All @@ -22,7 +22,7 @@ usedevelop = true
basepython =
python3.6
commands =
coverage run -m zope.testrunner --test-path=src []
coverage run -m zope.testrunner --test-path=src {posargs:-vc}
coverage report --fail-under=92
deps =
{[testenv]deps}
Expand Down

0 comments on commit 541b009

Please sign in to comment.