From 412ca73227fd570f64a1d7a632b4ad38f821feef Mon Sep 17 00:00:00 2001 From: Jason Madden Date: Wed, 7 Apr 2021 11:23:09 -0500 Subject: [PATCH 1/4] Python 2: Forbid type objects as keys. Fixes #153. Also take the opportunity to clean up the TODOs in _datatypes.py with regards to checking for default comparison: Use a metaclass so we get caching, and only check for the special methods on the type object. --- CHANGES.rst | 7 ++ src/BTrees/_datatypes.py | 134 +++++++++++++++++++------------ src/BTrees/objectkeymacros.h | 15 +++- src/BTrees/tests/common.py | 26 ++++++ src/BTrees/tests/test_OOBTree.py | 36 ++++++++- 5 files changed, 160 insertions(+), 58 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 9172eeb..ae5ec83 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,6 +5,13 @@ 4.8.0 (unreleased) ================== +- Make Python 2 forbid the use of type objects as keys (unless a + custom metaclass is used that implements comparison as required by + BTrees.) On Python 3, types are not orderable so they were already + forbidden, but on Python 2 types can be ordered by memory address, + which makes them unsuitable for use as keys. See `issue + `_. + - Make the ``multiunion``, ``union``, ``intersection``, and ``difference`` functions accept arbitrary Python iterables (that iterate across the correct types). Previously, the Python diff --git a/src/BTrees/_datatypes.py b/src/BTrees/_datatypes.py index 5035144..172b899 100644 --- a/src/BTrees/_datatypes.py +++ b/src/BTrees/_datatypes.py @@ -20,6 +20,13 @@ from struct import Struct from struct import error as struct_error +try: + from abc import ABC +except ImportError: + # Python < 3.4 + from abc import ABCMeta + ABC = ABCMeta('ABC', (object,), {}) + from ._compat import int_types from .utils import Lazy @@ -147,6 +154,80 @@ def supports_value_union(self): return False + +class _HasDefaultComparison(ABC): + """ + An ABC for checking whether an item has default comparison. + + This automatically gets us some caching. + """ + + # Comparisons only use special methods defined on the + # type, not instance variables. + + # On CPython 2, every subclass of object inherits __lt__ + # (unless it overrides), and it has ``__objclass__`` of ``type`` + # (of course it is also the same object as ``_object_lt`` at + # that point). Also, interestingly, CPython 2 classes inherit + # ``__lt__`` from ``type``, but *instances* do not. + # + # On CPython 3, classes inherit __lt__ with ``__objclass__`` of ``object``. + # On PyPy2, classes don't inherit any __lt__. + # On PyPy3, they do. + # + # Test these conditions at runtime and define the method variant + # appropriately. + # + # Remember the method is checking if the object has default comparison + assert '__lt__' not in DataType.__dict__ + if hasattr(DataType, '__lt__'): + # CPython 2 or CPython 3 or PyPy3 + if getattr(DataType.__lt__, '__objclass__', None) is object: + # CPython 3 + @classmethod + def __subclasshook__(cls, C, _NoneType=type(None)): + if C is _NoneType: + return False + defining_class = getattr(C.__lt__, '__objclass__', None) + if defining_class is None: + # Implemented in Python + return False + return C.__lt__.__objclass__ is object + elif getattr(DataType.__lt__, '__objclass__', None) is type: + # CPython 2 + @classmethod + def __subclasshook__(cls, C, _NoneType=type(None)): + if C is _NoneType: + return False + # If C is an old-style class, it may not even have __lt__ + if isinstance(C, type): + lt = C.__lt__ + else: + lt = getattr(C, '__lt__', None) + if lt is not None: + defining_class = getattr(lt, '__objclass__', None) + if defining_class is None: + # Implemented in Python + return False + if defining_class is not type: + return False + return not hasattr(C, '__cmp__') + else: + # PyPy3 + @classmethod + def __subclasshook__(cls, C, _object_lt=object.__lt__, _NoneType=type(None)): + if C is _NoneType: + return False + return C.__lt__ is _object_lt + else: + # PyPy2 + @classmethod + def __subclasshook__(cls, C, _NoneType=type(None)): + if C is _NoneType: + return False + return not hasattr(C, '__lt__') and not hasattr(C, '__cmp__') + + class O(KeyDataType): """ Arbitrary (sortable) Python objects. @@ -161,58 +242,9 @@ def as_value_type(self): def supports_value_union(self): return False - @staticmethod - def _check_default_comparison( - item, - # PyPy2 doesn't define __lt__ on object; PyPy3 and - # CPython 2 and 3 do. - _object_lt=getattr(object, '__lt__', object()) - ): - # Enforce test that key has non-default comparison. - # (With the exception of None, because we define a sort order - # for it.) - - if item is None: - return - - if type(item) is object: # pylint:disable=unidiomatic-typecheck - raise TypeError("Can't use object() as keys") - - # Now more complicated checks to be sure we didn't - # inherit default comparison on any version of Python. - - - # TODO: Using a custom ABC and doing ``isinstance(item, NoDefaultComparison)`` - # would automatically gain us caching. - - # XXX: Comparisons only use special methods defined on the - # type, not instance variables. So shouldn't this be getattr(type(key)...)? - # Note that changes some things below; for example, on CPython 2, - # every subclass of object inherits __lt__ (unless it overrides), and it - # has __objclass__ of ``type`` (of course it is also the same object - # as ``_object_lt`` at that point). Also, weirdly, CPython 2 classes inherit - # ``__lt__``, but *instances* do not. - - lt = getattr(item, '__lt__', None) - if lt is not None: - # CPython 2 and 3 follow PEP 252, defining '__objclass__' - # for methods of builtin types like str; methods of - # classes defined in Python don't get it. ``__objclass__`` - if getattr(lt, '__objclass__', None) is object: - lt = None # pragma: no cover Py3k - # PyPy3 doesn't follow PEP 252, but defines '__func__' - elif getattr(lt, '__func__', None) is _object_lt: - lt = None # pragma: no cover PyPy3 - - if (lt is None - # TODO: Shouldn't we only check __cmp__ on Python 2? - # Python 3 won't use it. - and getattr(item, '__cmp__', None) is None): - raise TypeError("Object has default comparison") - - def __call__(self, item): - self._check_default_comparison(item) + if isinstance(item, _HasDefaultComparison): + raise TypeError("Object of class %s has default comparison" % (type(item).__name__,)) return item diff --git a/src/BTrees/objectkeymacros.h b/src/BTrees/objectkeymacros.h index 8fa516e..16a23b1 100644 --- a/src/BTrees/objectkeymacros.h +++ b/src/BTrees/objectkeymacros.h @@ -19,14 +19,23 @@ check_argument_cmp(PyObject *arg) return 1; } + #ifdef PY3K if (Py_TYPE(arg)->tp_richcompare == Py_TYPE(object_)->tp_richcompare) #else - if (Py_TYPE(arg)->tp_richcompare == NULL - && Py_TYPE(arg)->tp_compare == Py_TYPE(object_)->tp_compare) + if ((Py_TYPE(arg)->tp_richcompare == NULL + && Py_TYPE(arg)->tp_compare == Py_TYPE(object_)->tp_compare) + /* Also exclude new-style classes. On Python 2, they can be compared, + but order by address, making them not suitable for BTrees. */ + || PyType_CheckExact(arg) + /* But let classes with a meta class that implements comparison through. */ + || (PyType_Check(arg) && Py_TYPE(arg)->tp_richcompare == PyType_Type.tp_richcompare) + ) #endif { - PyErr_SetString(PyExc_TypeError, "Object has default comparison"); + PyErr_Format(PyExc_TypeError, + "Object of class %s has default comparison", + Py_TYPE(arg)->tp_name); return 0; } return 1; diff --git a/src/BTrees/tests/common.py b/src/BTrees/tests/common.py index 3db7204..2a34c3c 100644 --- a/src/BTrees/tests/common.py +++ b/src/BTrees/tests/common.py @@ -1143,6 +1143,32 @@ def test_assign_value_type_float(self): def test_assign_value_type_None(self): self.__test_value(None) + def testNewStyleClassAsKeyNotAllowed(self): + m = self._makeOne() + class New(object): + pass + + with self.assertRaises(TypeError): + m[New] = self.getTwoValues()[0] + + def testOldStyleClassAsKeyNotALlowed(self): + m = self._makeOne() + class Old: # Python 2: Old style class + pass + + with self.assertRaises(TypeError): + m[Old] = self.getTwoValues()[0] + + def testNewStyleClassWithCustomMetaClassNotAllowed(self): + + class Meta(type): + pass + + cls = Meta('Class', (object,), {}) + m = self._makeOne() + with self.assertRaises(TypeError): + m[cls] = self.getTwoValues()[0] + def testEmptyFirstBucketReportedByGuido(self): # This was for Integer keys from .._compat import xrange diff --git a/src/BTrees/tests/test_OOBTree.py b/src/BTrees/tests/test_OOBTree.py index 79e4289..094e328 100644 --- a/src/BTrees/tests/test_OOBTree.py +++ b/src/BTrees/tests/test_OOBTree.py @@ -25,9 +25,10 @@ def test_byValue(self): [(y, x) for x, y in reversed(ITEMS[22:])]) def testRejectDefaultComparisonOnSet(self): - # Check that passing int keys w default comparison fails. - # Only applies to new-style class instances. Old-style - # instances are too hard to introspect. + # Check that passing in keys w default comparison fails. Only + # applies to new-style class instances if we're using the C + # extensions; old-style instances are too hard to introspect + # in C. # This is white box because we know that the check is being # used in a function that's used in lots of places. @@ -36,13 +37,22 @@ def testRejectDefaultComparisonOnSet(self): from .._compat import PY2 t = self._makeOne() + class OldStyle: + pass + + if self._getTargetClass() is OOBTree.OOBTreePy or not PY2: + with self.assertRaises(TypeError): + t[OldStyle()] = 1 + class C(object): pass with self.assertRaises(TypeError) as raising: t[C()] = 1 - self.assertEqual(raising.exception.args[0], "Object has default comparison") + self.assertEqual( + raising.exception.args[0], + "Object of class C has default comparison") if PY2: # we only check for __cmp__ on Python2 @@ -60,6 +70,14 @@ def __lt__(*args): c = With___lt__() t[c] = 1 + t.clear() + + class With___lt__Old: + def __lt__(*args): + return 1 + + c = With___lt__Old() + t[c] = 1 t.clear() @@ -73,6 +91,16 @@ class C(object): self.assertRaises(KeyError, t.__getitem__, C()) self.assertFalse(C() in t) + + def testNewStyleClassWithCustomMetaClassAllowed(self): + class Meta(type): + def __lt__(cls, other): + return 1 + + cls = Meta('Class', (object,), {}) + m = self._makeOne() + m[cls] = self.getTwoValues()[0] + def test_None_is_smallest(self): t = self._makeOne() for i in range(999): # Make sure we multiple buckets From 1b02aab06358a6af6fb5a8e76963843df50e1be1 Mon Sep 17 00:00:00 2001 From: Jason Madden Date: Fri, 9 Apr 2021 08:08:56 -0500 Subject: [PATCH 2/4] Tweak comment per review. --- src/BTrees/_datatypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/BTrees/_datatypes.py b/src/BTrees/_datatypes.py index 172b899..d5867cb 100644 --- a/src/BTrees/_datatypes.py +++ b/src/BTrees/_datatypes.py @@ -23,7 +23,7 @@ try: from abc import ABC except ImportError: - # Python < 3.4 + # Python < 3.4 (specifically, Python 2) from abc import ABCMeta ABC = ABCMeta('ABC', (object,), {}) From fb615ef3e1e4fc9154341d2ef0439beaad1f2990 Mon Sep 17 00:00:00 2001 From: Jason Madden Date: Sat, 10 Apr 2021 07:50:30 -0500 Subject: [PATCH 3/4] Use BTree_ShouldSuppressKeyError --- src/BTrees/SetTemplate.c | 10 ++-------- src/BTrees/TreeSetTemplate.c | 10 ++-------- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/src/BTrees/SetTemplate.c b/src/BTrees/SetTemplate.c index 0086946..2aedf5e 100644 --- a/src/BTrees/SetTemplate.c +++ b/src/BTrees/SetTemplate.c @@ -106,10 +106,7 @@ Set_discard(Bucket* self, PyObject* args) return NULL; if (_bucket_set(self, key, NULL, 0, 1, 0) < 0) { - /* XXX: After PR#162, this should change to - BTree_ShouldSuppressKeyError() */ - PyObject* exc_type = PyErr_Occurred(); - if (exc_type == PyExc_KeyError) { + if (BTree_ShouldSuppressKeyError()) { PyErr_Clear(); } else if (PyErr_ExceptionMatches(PyExc_TypeError)) { @@ -462,10 +459,7 @@ set_isub(Bucket* self, PyObject* other) } } if (_bucket_set(self, v, NULL, 0, 1, 0) < 0) { - /* XXX: With PR#162 this can be changed to - BTree_ShouldSuppressKeyError() */ - PyObject* exc_type = PyErr_Occurred(); - if (exc_type == PyExc_KeyError) { + if (BTree_ShouldSuppressKeyError()) { PyErr_Clear(); } else { diff --git a/src/BTrees/TreeSetTemplate.c b/src/BTrees/TreeSetTemplate.c index c42312a..0e5e7de 100644 --- a/src/BTrees/TreeSetTemplate.c +++ b/src/BTrees/TreeSetTemplate.c @@ -110,10 +110,7 @@ TreeSet_discard(BTree *self, PyObject *args) return NULL; if (_BTree_set(self, key, NULL, 0, 1) < 0) { - /* XXX: After PR#162, this should change to - BTree_ShouldSuppressKeyError() */ - PyObject* exc_type = PyErr_Occurred(); - if (exc_type == PyExc_KeyError) { + if (BTree_ShouldSuppressKeyError()) { PyErr_Clear(); } else if (PyErr_ExceptionMatches(PyExc_TypeError)) { @@ -382,10 +379,7 @@ TreeSet_isub(BTree* self, PyObject* other) } } if (_BTree_set(self, v, NULL, 0, 1) < 0) { - /* XXX: With PR#162 this can be changed to - BTree_ShouldSuppressKeyError() */ - PyObject* exc_type = PyErr_Occurred(); - if (exc_type == PyExc_KeyError) { + if (BTree_ShouldSuppressKeyError()) { PyErr_Clear(); } else { From 163f719a0fa528a2839187c7a412af11f4df374c Mon Sep 17 00:00:00 2001 From: Jason Madden Date: Wed, 14 Apr 2021 05:24:40 -0500 Subject: [PATCH 4/4] Update docstring of _HasDefaultComparison to include info from review. --- src/BTrees/_datatypes.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/BTrees/_datatypes.py b/src/BTrees/_datatypes.py index d5867cb..b226a62 100644 --- a/src/BTrees/_datatypes.py +++ b/src/BTrees/_datatypes.py @@ -157,9 +157,17 @@ def supports_value_union(self): class _HasDefaultComparison(ABC): """ - An ABC for checking whether an item has default comparison. - - This automatically gets us some caching. + An `ABC _` for + checking whether an item has default comparison. + + All we have to do is override ``__subclasshook__`` to implement an + algorithm determining whether a class has default comparison. + Python and the ABC machinery will take care of translating + ``isinstance(thing, _HasDefaultComparison)`` into something like + ``_HasDefaultComparison.__subclasshook__(type(thing))``. The ABC + handles caching the answer (based on exact classes, no MRO), and + getting the type from ``thing`` (including mostly dealing with + old-style) classes on Python 2. """ # Comparisons only use special methods defined on the