Merge 8dc845f into 144e300

zopefoundation · Feb 14, 2020 · 86a947e · 86a947e
2 parents 144e300 + 8dc845f
commit 86a947e
Show file tree

Hide file tree

Showing 2 changed files with 39 additions and 6 deletions.
diff --git a/src/zope/schema/_bootstrapfields.py b/src/zope/schema/_bootstrapfields.py
@@ -20,6 +20,7 @@
 import numbers
 import sys
 import threading
+import unicodedata
 from math import isinf
 
 from zope.interface import Attribute
@@ -505,11 +506,13 @@ class Text(MinMaxLen, Field):
     """A field containing text used for human discourse."""
     _type = text_type
 
-    def __init__(self, *args, **kw):
+    def __init__(self,  *args, **kw):
+        self.unicode_normalization = kw.pop('unicode_normalization', 'NFC')
         super(Text, self).__init__(*args, **kw)
 
-    def fromUnicode(self, str):
+    def fromUnicode(self, value):
         """
+        >>> import unicodedata
         >>> from zope.schema.interfaces import WrongType
         >>> from zope.schema.interfaces import ConstraintNotSatisfied
         >>> from zope.schema import Text
@@ -529,9 +532,14 @@ def fromUnicode(self, str):
         ...
         zope.schema._bootstrapinterfaces.ConstraintNotSatisfied: (u'foo spam', '')
         """
-        self.validate(str)
-        return str
-
+        if not PY2:
+            unicode = str
+        if isinstance(value, unicode):
+            if self.unicode_normalization:
+                value = unicodedata.normalize(self.unicode_normalization, value)
+        self.validate(value)
+        return value
+
 
 class TextLine(Text):
     """A text field with no newlines."""

diff --git a/src/zope/schema/tests/test__bootstrapfields.py b/src/zope/schema/tests/test__bootstrapfields.py
@@ -13,6 +13,7 @@
 ##############################################################################
 import doctest
 import unittest
+import unicodedata
 
 # pylint:disable=protected-access,inherit-non-class,blacklisted-name
 
@@ -958,11 +959,35 @@ def test_fromUnicode_miss(self):
         self.assertRaisesWrongType(txt.fromUnicode, txt._type, deadbeef)
 
     def test_fromUnicode_hit(self):
-
         deadbeef = u'DEADBEEF'
         txt = self._makeOne()
         self.assertEqual(txt.fromUnicode(deadbeef), deadbeef)
 
+    def test_normalization(self):
+        deadbeef = unicodedata.normalize('NFD', b'\xc3\x84\xc3\x96\xc3\x9c'.decode('utf-8'))
+        txt = self._makeOne()
+        self.assertEqual(
+            [unicodedata.name(c) for c in txt.fromUnicode(deadbeef)],
+            [
+                'LATIN CAPITAL LETTER A WITH DIAERESIS',
+                'LATIN CAPITAL LETTER O WITH DIAERESIS',
+                'LATIN CAPITAL LETTER U WITH DIAERESIS',
+            ]
+        )
+        txt = self._makeOne(unicode_normalization=None)
+        self.assertEqual(
+            [unicodedata.name(c) for c in txt.fromUnicode(deadbeef)],
+            [
+                'LATIN CAPITAL LETTER A',
+                'COMBINING DIAERESIS',
+                'LATIN CAPITAL LETTER O',
+                'COMBINING DIAERESIS',
+                'LATIN CAPITAL LETTER U',
+                'COMBINING DIAERESIS',
+            ]
+        )
+
+
 
 class TextLineTests(EqualityTestsMixin,
                     WrongTypeTestsMixin,