Continue with further development of MissingValue support

zopefoundation · Apr 28, 2019 · f8a8630 · f8a8630
1 parent d462dae
commit f8a8630
Show file tree

Hide file tree

Showing 4 changed files with 92 additions and 34 deletions.
diff --git a/src/Products/PluginIndexes/FieldIndex/tests.py b/src/Products/PluginIndexes/FieldIndex/tests.py
@@ -210,7 +210,7 @@ def testPopulated(self):
         self._checkApply(self._not_4, values[:7])
         self._checkApply(self._not_5, values[1:])
         self._checkApply(self._not_6, values[0:1])
-        self._checkApply(self._not_7, values[0:])
+        self._checkApply(self._not_7, [])
 
     def testNone(self):
         # Make sure None is ignored.

diff --git a/src/Products/PluginIndexes/KeywordIndex/KeywordIndex.py b/src/Products/PluginIndexes/KeywordIndex/KeywordIndex.py
@@ -25,6 +25,7 @@
     IIndexingMissingValue,
 )
 
+_marker = []
 LOG = getLogger('Zope.KeywordIndex')
 
 try:
@@ -64,6 +65,8 @@ def _index_object(self, documentId, obj, threshold=None, attr=''):
         # we'll do so.
 
         newKeywords = self._get_object_keywords(obj, attr)
+        if newKeywords is None:
+            return self._not_indexed.insert(documentId)
 
         oldKeywords = self._unindex.get(documentId, None)
 
@@ -72,8 +75,7 @@ def _index_object(self, documentId, obj, threshold=None, attr=''):
             try:
                 for kw in newKeywords:
                     self.insertForwardIndexEntry(kw, documentId)
-                if newKeywords:
-                    self._unindex[documentId] = list(newKeywords)
+                self._unindex[documentId] = list(newKeywords)
             except TypeError:
                 return 0
         else:
@@ -86,10 +88,7 @@ def _index_object(self, documentId, obj, threshold=None, attr=''):
             rdiff = difference(newKeywords, oldKeywords)
             if fdiff or rdiff:
                 # if we've got forward or reverse changes
-                if newKeywords:
-                    self._unindex[documentId] = list(newKeywords)
-                else:
-                    del self._unindex[documentId]
+                self._unindex[documentId] = list(newKeywords)
                 if fdiff:
                     self.unindex_objectKeywords(documentId, fdiff)
                 if rdiff:
@@ -98,12 +97,14 @@ def _index_object(self, documentId, obj, threshold=None, attr=''):
         return 1
 
     def _get_object_keywords(self, obj, attr):
-        newKeywords = getattr(obj, attr, ())
+        newKeywords = getattr(obj, attr, None)
         if safe_callable(newKeywords):
             try:
                 newKeywords = newKeywords()
             except (AttributeError, TypeError):
-                return ()
+                return None
+        if newKeywords is None:
+            return None
         if not newKeywords:
             return ()
         elif isinstance(newKeywords, basestring):
@@ -128,15 +129,27 @@ def unindex_objectKeywords(self, documentId, keywords):
     def unindex_object(self, documentId):
         """ carefully unindex the object with integer id 'documentId'"""
 
-        keywords = self._unindex.get(documentId, None)
+        keywords = self._unindex.get(documentId, _marker)
 
-        # Couldn't we return 'None' immediately
-        # if keywords is 'None' (or _marker)???
+        if keywords is _marker:
+            try:
+                self._not_indexed.remove(documentId)
+                self._increment_counter()
+            except KeyError:
+                LOG.debug('%s: Attempt to unindex nonexistent '
+                          'document with id %s' %
+                          (self.__class__.__name__, documentId),
+                          exc_info=True)
+            return None
 
-        if keywords is not None:
-            self._increment_counter()
+        self._increment_counter()
+
+        if keywords is None:
+            self._not_indexed.remove(documentId)
+            return
 
         self.unindex_objectKeywords(documentId, keywords)
+
         try:
             del self._unindex[documentId]
         except KeyError:

diff --git a/src/Products/PluginIndexes/KeywordIndex/tests.py b/src/Products/PluginIndexes/KeywordIndex/tests.py
@@ -70,8 +70,9 @@ def setUp(self):
                         (4, Dummy(['a', 'b', 'c', 'd'])),
                         (5, Dummy(['a', 'b', 'c', 'e'])),
                         (6, Dummy(['a', 'b', 'c', 'e', 'f'])),
-                        (7, Dummy([None])),
-                        (8, Dummy(['0'])),
+                        (7, Dummy(['0'])),
+                        (8, Dummy([])),
+                        (9, Dummy(None)),
                         ]
         self._noop_req = {'bar': 123}
         self._all_req = {'foo': ['a']}
@@ -100,12 +101,15 @@ def checkApply():
             result, used = self._index._apply_index(req)
             assert used == ('foo', )
             assert len(result) == len(expectedValues), \
-                '%s | %s' % (list(result),
-                             list(map(lambda x: x[0], expectedValues)))
+                '%s | %s | %s' % (list(result),
+                                  list(map(lambda x: x[0], expectedValues)),
+                                  req)
 
             if hasattr(result, 'keys'):
                 result = result.keys()
             for k, v in expectedValues:
+                if k not in result:
+                    import pdb; pdb.set_trace()
                 assert k in result
 
         index = self._index
@@ -167,34 +171,37 @@ def testPopulated(self):
         self._populateIndex()
         values = self._values
 
-        assert len(self._index.referencedObjects()) == len(values)
+        self.assertEqual(len(self._index.referencedObjects()), len(values) - 1)
         assert self._index.getEntryForObject(1234) is None
         assert (self._index.getEntryForObject(1234, self._marker)
                 is self._marker)
         self._index.unindex_object(1234)  # nothrow
-        self.assertEqual(self._index.indexSize(), len(values) - 1)
+        self.assertEqual(self._index.indexSize(), len(values) - 3)
 
         for k, v in values:
-            entry = self._index.getEntryForObject(k)
+            entry = self._index.getEntryForObject(k, None)
+            if entry is None:
+                self.assertEqual(entry, v.foo())
+                continue
             entry.sort()
             kw = sorted(set(v.foo()))
             self.assertEqual(entry, kw)
 
-        assert len(list(self._index.uniqueValues('foo'))) == len(values) - 1
+        assert len(list(self._index.uniqueValues('foo'))) == len(values) - 3
         assert self._index._apply_index(self._noop_req) is None
 
-        self._checkApply(self._all_req, values[:-2])
+        self._checkApply(self._all_req, values[:-3])
         self._checkApply(self._some_req, values[5:7])
         self._checkApply(self._overlap_req, values[2:7])
-        self._checkApply(self._string_req, values[:-2])
-        self._checkApply(self._miss_req_1, values[7])
-        self._checkApply(self._miss_req_2, values[:8])
+        self._checkApply(self._string_req, values[:-3])
+        self._checkApply(self._miss_req_1, values[9:10])
+        self._checkApply(self._miss_req_2, values[:7] + values[9:10])
 
         self._checkApply(self._not_1, [])
         self._checkApply(self._not_2, values[5:6])
-        self._checkApply(self._not_3, values[:8])
-        self._checkApply(self._not_4, values[:5])
-        self._checkApply(self._not_5, values[:8])
+        self._checkApply(self._not_3, values[:7] + values[9:10])
+        self._checkApply(self._not_4, values[:5] + values[9:10])
+        self._checkApply(self._not_5, values[:7] + values[9:10])
         self._checkApply(self._not_6, values[2:7])
         self._checkApply(self._not_7, values[:8])
 

diff --git a/src/Products/PluginIndexes/unindex.py b/src/Products/PluginIndexes/unindex.py
@@ -25,6 +25,7 @@
     IITreeSet,
     IISet,
     multiunion,
+    union,
 )
 from BTrees.IOBTree import IOBTree
 from BTrees.Length import Length
@@ -132,6 +133,7 @@ def clear(self):
         self._length = Length()
         self._index = OOBTree()
         self._unindex = IOBTree()
+        self._not_indexed = IITreeSet()
 
         if self._counter is None:
             self._counter = Length()
@@ -258,12 +260,14 @@ def _index_object(self, documentId, obj, threshold=None, attr=''):
                 # BTrees 4.0+ will throw a TypeError
                 # "object has default comparison" and won't let it be indexed.
                 return 0
+        else:
+            datum = self._convert(datum, default=_marker)
 
         # We don't want to do anything that we don't have to here, so we'll
         # check to see if the new and existing information is the same.
         oldDatum = self._unindex.get(documentId, _marker)
         if datum != oldDatum:
-            if oldDatum is not _marker:
+            if oldDatum not in [_marker, MissingValue]:
                 self.removeForwardIndexEntry(oldDatum, documentId)
                 if datum is _marker:
                     try:
@@ -275,8 +279,11 @@ def _index_object(self, documentId, obj, threshold=None, attr=''):
                                   'now its not, for document: %s' % documentId)
 
             if datum is not _marker:
-                self.insertForwardIndexEntry(datum, documentId)
-                self._unindex[documentId] = datum
+                if datum is MissingValue:
+                    self._not_indexed.insert(documentId)
+                else:
+                    self.insertForwardIndexEntry(datum, documentId)
+                    self._unindex[documentId] = datum
 
             returnStatus = 1
 
@@ -316,12 +323,24 @@ def unindex_object(self, documentId):
         raise an exception if we fail
         """
         unindexRecord = self._unindex.get(documentId, _marker)
-        self._increment_counter()
 
         if unindexRecord is _marker:
+            if IIndexingMissingValue.providedBy(self):
+                try:
+                    self._not_indexed.remove(documentId)
+                    self._increment_counter()
+                except ConflictError:
+                    raise
+                except Exception:
+                    LOG.debug('Attempt to unindex nonexistent document'
+                              ' with id %s' % documentId, exc_info=True)
+
             return None
 
+        self._increment_counter()
+
         self.removeForwardIndexEntry(unindexRecord, documentId)
+
         try:
             del self._unindex[documentId]
         except ConflictError:
@@ -461,14 +480,23 @@ def query_index(self, record, resultset=None):
                         not_parm = list(map(self._convert, not_parm))
                         exclude = self._apply_not(not_parm, resultset)
                         cached = difference(cached, exclude)
-
+                        # pure not
+                        if not record.keys \
+                           and IIndexingMissingValue.providedBy(self) \
+                           and MissingValue not in not_parm:
+                            cached = union(cached, self._not_indexed)
                     return cached
 
+        with_miss = False
+        # pure not
         if not record.keys and not_parm:
             # convert into indexed format
             not_parm = list(map(self._convert, not_parm))
             # we have only a 'not' query
             record.keys = [k for k in index.keys() if k not in not_parm]
+            if IIndexingMissingValue.providedBy(self) \
+               and MissingValue not in not_parm:
+                with_miss = True
         else:
             # convert query arguments into indexed format
             record.keys = list(map(self._convert, record.keys))
@@ -517,6 +545,8 @@ def query_index(self, record, resultset=None):
                 if not_parm:
                     exclude = self._apply_not(not_parm, resultset)
                     result = difference(result, exclude)
+                    if with_miss:
+                        result = union(result, self._not_indexed)
                 return result
 
             if operator == 'or':
@@ -564,6 +594,10 @@ def query_index(self, record, resultset=None):
                     # other object. BTrees 4.0+ will throw a TypeError
                     # "object has default comparison".
                     continue
+                if k is MissingValue:
+                    setlist.append(self._not_indexed)
+                    continue
+
                 s = index.get(k, None)
                 # If None, try to bail early
                 if s is None:
@@ -595,6 +629,8 @@ def query_index(self, record, resultset=None):
                 if not_parm:
                     exclude = self._apply_not(not_parm, resultset)
                     result = difference(result, exclude)
+                    if with_miss:
+                        result = union(result, self._not_indexed)
                 return result
 
             if operator == 'or':
@@ -642,6 +678,8 @@ def query_index(self, record, resultset=None):
         if not_parm:
             exclude = self._apply_not(not_parm, resultset)
             r = difference(r, exclude)
+            if with_miss:
+                r = union(r, self._not_indexed)
         return r
 
     def hasUniqueValuesFor(self, name):