Skip to content

Commit

Permalink
Merge pull request #44 from zopefoundation/issue15
Browse files Browse the repository at this point in the history
Handle Unicode token values with non-ascii chars.
  • Loading branch information
jamadden committed Aug 14, 2018
2 parents ec25d71 + 62d9970 commit c61de98
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 11 deletions.
7 changes: 7 additions & 0 deletions CHANGES.rst
Expand Up @@ -20,6 +20,13 @@ Changes
``max`` values (they must still specify a ``default`` value). See
`issue 9 <https://github.com/zopefoundation/zope.schema/issues/9>`_.

- ``Choice``, ``SimpleVocabulary`` and ``SimpleTerm`` all gracefully
handle using Unicode token values with non-ASCII characters by encoding
them with the ``backslashreplace`` error handler. See `issue 15
<https://github.com/zopefoundation/zope.schema/issues/15>`_ and `PR
6 <https://github.com/zopefoundation/zope.schema/pull/6>`_.


4.5.0 (2017-07-10)
------------------

Expand Down
7 changes: 4 additions & 3 deletions src/zope/schema/interfaces.py
Expand Up @@ -592,13 +592,14 @@ class ITokenizedTerm(ITerm):
"""

# Should be a ``zope.schema.ASCIILine``, but `ASCIILine` is not a bootstrap
# field.
# field. `ASCIILine` is a type of NativeString.
token = Attribute(
"token",
"""Token which can be used to represent the value on a stream.
The value of this attribute must be a non-empty 7-bit string.
Control characters are not allowed.
The value of this attribute must be a non-empty 7-bit native string
(i.e., the ``str`` type on both Python 2 and 3).
Control characters, including newline, are not allowed.
""")


Expand Down
12 changes: 12 additions & 0 deletions src/zope/schema/tests/test__field.py
Expand Up @@ -802,6 +802,18 @@ def test_ctor_w_values(self):
self.assertEqual(sorted(choose.vocabulary.by_value.keys()), [1, 2])
self.assertEqual(sorted(choose.source.by_value.keys()), [1, 2])

def test_ctor_w_unicode_non_ascii_values(self):
values = [u'K\xf6ln', u'D\xfcsseldorf', 'Bonn']
choose = self._makeOne(values=values)
self.assertEqual(sorted(choose.vocabulary.by_value.keys()),
sorted(values))
self.assertEqual(sorted(choose.source.by_value.keys()),
sorted(values))
self.assertEqual(
sorted(choose.vocabulary.by_token.keys()),
sorted([x.encode('ascii', 'backslashreplace').decode('ascii') for x in values]))


def test_ctor_w_named_vocabulary(self):
choose = self._makeOne(vocabulary="vocab")
self.assertEqual(choose.vocabularyName, 'vocab')
Expand Down
7 changes: 7 additions & 0 deletions src/zope/schema/tests/test_vocabulary.py
Expand Up @@ -58,6 +58,13 @@ def test_bytes_value(self):
self.assertEqual(term.token, 'term')
self.assertFalse(ITitledTokenizedTerm.providedBy(term))

def test_unicode_non_ascii_value(self):
from zope.schema.interfaces import ITitledTokenizedTerm
term = self._makeOne(u'Snowman \u2603')
self.assertEqual(term.value, u'Snowman \u2603')
self.assertEqual(term.token, 'Snowman \\u2603')
self.assertFalse(ITitledTokenizedTerm.providedBy(term))


class SimpleVocabularyTests(unittest.TestCase):

Expand Down
31 changes: 23 additions & 8 deletions src/zope/schema/vocabulary.py
Expand Up @@ -13,15 +13,18 @@
##############################################################################
"""Vocabulary support for schema.
"""
from collections import OrderedDict

from zope.interface import directlyProvides
from zope.interface import implementer

from zope.schema._compat import text_type
from zope.schema.interfaces import ITitledTokenizedTerm
from zope.schema.interfaces import ITokenizedTerm
from zope.schema.interfaces import ITreeVocabulary
from zope.schema.interfaces import IVocabularyRegistry
from zope.schema.interfaces import IVocabularyTokenized
from collections import OrderedDict


# simple vocabularies performing enumerated-like tasks
_marker = object()
Expand All @@ -32,19 +35,31 @@ class SimpleTerm(object):
"""Simple tokenized term used by SimpleVocabulary."""

def __init__(self, value, token=None, title=None):
"""Create a term for value and token. If token is omitted,
str(value) is used for the token. If title is provided,
term implements ITitledTokenizedTerm.
"""Create a term for *value* and *token*. If *token* is
omitted, str(value) is used for the token, escaping any
non-ASCII characters.
If *title* is provided, term implements `ITitledTokenizedTerm`.
"""
self.value = value
if token is None:
token = value
# In Python 3 str(bytes) returns str(repr(bytes)), which is not what
# we want here. On the other hand, we want to try to keep the token as
# readable as possible.
self.token = str(token) \
if not isinstance(token, bytes) \
else str(token.decode('ascii', 'ignore'))
# readable as possible. On both 2 and 3, self.token should be a native
# string (ASCIILine).
if not isinstance(token, (str, bytes, text_type)):
# Nothing we recognize as intended to be textual data.
# Get its str() as promised
token = str(token)

if isinstance(token, text_type):
token = token.encode('ascii', 'backslashreplace')
# Token should be bytes at this point. Now back to native string,
# if needed.
if not isinstance(token, str):
token = token.decode('ascii')
self.token = token
self.title = title
if title is not None:
directlyProvides(self, ITitledTokenizedTerm)
Expand Down

0 comments on commit c61de98

Please sign in to comment.