Skip to content

Commit

Permalink
Allow unicode in object ids.
Browse files Browse the repository at this point in the history
We only want to restrict the use of non-printable characters in object ids. General unicode characters are fine.
  • Loading branch information
sallner committed Sep 15, 2017
1 parent eae76e4 commit f9d6ab1
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 16 deletions.
3 changes: 2 additions & 1 deletion src/OFS/ObjectManager.py
Expand Up @@ -83,7 +83,8 @@
# the name BadRequestException is relied upon by 3rd-party code
BadRequestException = BadRequest

bad_id = re.compile(r'[^a-zA-Z0-9-_~,.$\(\)# @]').search
# We want a regex to define the lower ascii control bytes and DEL as bad.
bad_id = re.compile(r'[\x00-\x1F\x7F]').search


def checkValidId(self, id, allow_dup=0):
Expand Down
61 changes: 46 additions & 15 deletions src/OFS/tests/testObjectManager.py
@@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
from logging import getLogger
import unittest

Expand Down Expand Up @@ -362,9 +363,6 @@ def test_setObject_checkId_bad(self):
si = SimpleItem('2')
self.assertRaises(BadRequest, om._setObject, 123, si)
self.assertRaises(BadRequest, om._setObject, 'a\x01b', si)
self.assertRaises(BadRequest, om._setObject, 'a\\b', si)
self.assertRaises(BadRequest, om._setObject, 'a:b', si)
self.assertRaises(BadRequest, om._setObject, 'a;b', si)
self.assertRaises(BadRequest, om._setObject, '.', si)
self.assertRaises(BadRequest, om._setObject, '..', si)
self.assertRaises(BadRequest, om._setObject, '_foo', si)
Expand Down Expand Up @@ -531,30 +529,63 @@ def test_empty_string(self):
"('Empty or invalid id specified', '')")

def test_unicode(self):
# Unicode can only be handled under Python 3, Python 2 needs
# bytestrings.
if PY2:
e = self.assertBadRequest(u'abc')
self.assertEqual(str(e),
"('Empty or invalid id specified', u'abc')")
e = self.assertBadRequest(u'abc')
self.assertEqual(
str(e), "('Empty or invalid id specified', u'abc\\u2603')")
else:
# Does not raise
self._callFUT(self._makeContainer(), u'abc')
self._callFUT(self._makeContainer(), u'abc')

def test_unicode_escaped(self):
e = self.assertBadRequest(u'<abc>&def')
if PY2:
# No unicode allowed in general
e = self.assertBadRequest(u'<abc>&def')
self.assertEqual(str(e),
"('Empty or invalid id specified', "
"u'&lt;abc&gt;&amp;def')")
else:
# With Python 3, this is valid.
self._callFUT(self._makeContainer(), u'<abc>&def')

def test_allow_brackets_and_ampersand(self):
# We allow this from now, as these characters are quoted by urllib.
self._callFUT(self._makeContainer(), '<abc>&def')

def test_encoded_unicode(self):
if PY2:
# For Python 2 we allow encoded unicode
self._callFUT(self._makeContainer(), u'abcö'.encode('utf-8'))
else:
# In Python 3 we do not accept bytestrings.
e = self.assertBadRequest(u'abcö'.encode('utf-8'))
self.assertEqual(str(e),
'The id "&lt;abc&gt;&amp;def" contains '
'characters illegal in URLs.')
"('Empty or invalid id specified', "
"b'abc\\xc3\\xb6')")

def test_badid_XSS(self):
e = self.assertBadRequest('<abc>&def')
self.assertEqual(str(e),
'The id "&lt;abc&gt;&amp;def" contains characters '
'illegal in URLs.')
def test_unprintable_characters(self):
# We do not allow the first 31 ASCII characters. \x00-\x19
# We do not allow the DEL character. \x7f
if PY2:
e = self.assertBadRequest('abc\x10')
self.assertEqual(str(e),
'The id "abc\x10" contains characters illegal'
' in URLs.')
e = self.assertBadRequest('abc\x7f')
self.assertEqual(str(e),
'The id "abc\x7f" contains characters illegal'
' in URLs.')
else:
e = self.assertBadRequest(u'abc\x10')
self.assertEqual(str(e),
'The id "abc\x10" contains characters illegal'
' in URLs.')
e = self.assertBadRequest(u'abc\x7f')
self.assertEqual(str(e),
'The id "abc\x7f" contains characters illegal'
' in URLs.')

def test_one_dot(self):
e = self.assertBadRequest('.')
Expand Down

0 comments on commit f9d6ab1

Please sign in to comment.