Skip to content

Commit

Permalink
Continue with further development of MissingValue support
Browse files Browse the repository at this point in the history
  • Loading branch information
andbag committed Apr 28, 2019
1 parent d462dae commit f8a8630
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 34 deletions.
2 changes: 1 addition & 1 deletion src/Products/PluginIndexes/FieldIndex/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ def testPopulated(self):
self._checkApply(self._not_4, values[:7])
self._checkApply(self._not_5, values[1:])
self._checkApply(self._not_6, values[0:1])
self._checkApply(self._not_7, values[0:])
self._checkApply(self._not_7, [])

def testNone(self):
# Make sure None is ignored.
Expand Down
39 changes: 26 additions & 13 deletions src/Products/PluginIndexes/KeywordIndex/KeywordIndex.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
IIndexingMissingValue,
)

_marker = []
LOG = getLogger('Zope.KeywordIndex')

try:
Expand Down Expand Up @@ -64,6 +65,8 @@ def _index_object(self, documentId, obj, threshold=None, attr=''):
# we'll do so.

newKeywords = self._get_object_keywords(obj, attr)
if newKeywords is None:
return self._not_indexed.insert(documentId)

oldKeywords = self._unindex.get(documentId, None)

Expand All @@ -72,8 +75,7 @@ def _index_object(self, documentId, obj, threshold=None, attr=''):
try:
for kw in newKeywords:
self.insertForwardIndexEntry(kw, documentId)
if newKeywords:
self._unindex[documentId] = list(newKeywords)
self._unindex[documentId] = list(newKeywords)
except TypeError:
return 0
else:
Expand All @@ -86,10 +88,7 @@ def _index_object(self, documentId, obj, threshold=None, attr=''):
rdiff = difference(newKeywords, oldKeywords)
if fdiff or rdiff:
# if we've got forward or reverse changes
if newKeywords:
self._unindex[documentId] = list(newKeywords)
else:
del self._unindex[documentId]
self._unindex[documentId] = list(newKeywords)
if fdiff:
self.unindex_objectKeywords(documentId, fdiff)
if rdiff:
Expand All @@ -98,12 +97,14 @@ def _index_object(self, documentId, obj, threshold=None, attr=''):
return 1

def _get_object_keywords(self, obj, attr):
newKeywords = getattr(obj, attr, ())
newKeywords = getattr(obj, attr, None)
if safe_callable(newKeywords):
try:
newKeywords = newKeywords()
except (AttributeError, TypeError):
return ()
return None
if newKeywords is None:
return None
if not newKeywords:
return ()
elif isinstance(newKeywords, basestring):
Expand All @@ -128,15 +129,27 @@ def unindex_objectKeywords(self, documentId, keywords):
def unindex_object(self, documentId):
""" carefully unindex the object with integer id 'documentId'"""

keywords = self._unindex.get(documentId, None)
keywords = self._unindex.get(documentId, _marker)

# Couldn't we return 'None' immediately
# if keywords is 'None' (or _marker)???
if keywords is _marker:
try:
self._not_indexed.remove(documentId)
self._increment_counter()
except KeyError:
LOG.debug('%s: Attempt to unindex nonexistent '
'document with id %s' %
(self.__class__.__name__, documentId),
exc_info=True)
return None

if keywords is not None:
self._increment_counter()
self._increment_counter()

if keywords is None:
self._not_indexed.remove(documentId)
return

self.unindex_objectKeywords(documentId, keywords)

try:
del self._unindex[documentId]
except KeyError:
Expand Down
37 changes: 22 additions & 15 deletions src/Products/PluginIndexes/KeywordIndex/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,9 @@ def setUp(self):
(4, Dummy(['a', 'b', 'c', 'd'])),
(5, Dummy(['a', 'b', 'c', 'e'])),
(6, Dummy(['a', 'b', 'c', 'e', 'f'])),
(7, Dummy([None])),
(8, Dummy(['0'])),
(7, Dummy(['0'])),
(8, Dummy([])),
(9, Dummy(None)),
]
self._noop_req = {'bar': 123}
self._all_req = {'foo': ['a']}
Expand Down Expand Up @@ -100,12 +101,15 @@ def checkApply():
result, used = self._index._apply_index(req)
assert used == ('foo', )
assert len(result) == len(expectedValues), \
'%s | %s' % (list(result),
list(map(lambda x: x[0], expectedValues)))
'%s | %s | %s' % (list(result),
list(map(lambda x: x[0], expectedValues)),
req)

if hasattr(result, 'keys'):
result = result.keys()
for k, v in expectedValues:
if k not in result:
import pdb; pdb.set_trace()
assert k in result

index = self._index
Expand Down Expand Up @@ -167,34 +171,37 @@ def testPopulated(self):
self._populateIndex()
values = self._values

assert len(self._index.referencedObjects()) == len(values)
self.assertEqual(len(self._index.referencedObjects()), len(values) - 1)
assert self._index.getEntryForObject(1234) is None
assert (self._index.getEntryForObject(1234, self._marker)
is self._marker)
self._index.unindex_object(1234) # nothrow
self.assertEqual(self._index.indexSize(), len(values) - 1)
self.assertEqual(self._index.indexSize(), len(values) - 3)

for k, v in values:
entry = self._index.getEntryForObject(k)
entry = self._index.getEntryForObject(k, None)
if entry is None:
self.assertEqual(entry, v.foo())
continue
entry.sort()
kw = sorted(set(v.foo()))
self.assertEqual(entry, kw)

assert len(list(self._index.uniqueValues('foo'))) == len(values) - 1
assert len(list(self._index.uniqueValues('foo'))) == len(values) - 3
assert self._index._apply_index(self._noop_req) is None

self._checkApply(self._all_req, values[:-2])
self._checkApply(self._all_req, values[:-3])
self._checkApply(self._some_req, values[5:7])
self._checkApply(self._overlap_req, values[2:7])
self._checkApply(self._string_req, values[:-2])
self._checkApply(self._miss_req_1, values[7])
self._checkApply(self._miss_req_2, values[:8])
self._checkApply(self._string_req, values[:-3])
self._checkApply(self._miss_req_1, values[9:10])
self._checkApply(self._miss_req_2, values[:7] + values[9:10])

self._checkApply(self._not_1, [])
self._checkApply(self._not_2, values[5:6])
self._checkApply(self._not_3, values[:8])
self._checkApply(self._not_4, values[:5])
self._checkApply(self._not_5, values[:8])
self._checkApply(self._not_3, values[:7] + values[9:10])
self._checkApply(self._not_4, values[:5] + values[9:10])
self._checkApply(self._not_5, values[:7] + values[9:10])
self._checkApply(self._not_6, values[2:7])
self._checkApply(self._not_7, values[:8])

Expand Down
48 changes: 43 additions & 5 deletions src/Products/PluginIndexes/unindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
IITreeSet,
IISet,
multiunion,
union,
)
from BTrees.IOBTree import IOBTree
from BTrees.Length import Length
Expand Down Expand Up @@ -132,6 +133,7 @@ def clear(self):
self._length = Length()
self._index = OOBTree()
self._unindex = IOBTree()
self._not_indexed = IITreeSet()

if self._counter is None:
self._counter = Length()
Expand Down Expand Up @@ -258,12 +260,14 @@ def _index_object(self, documentId, obj, threshold=None, attr=''):
# BTrees 4.0+ will throw a TypeError
# "object has default comparison" and won't let it be indexed.
return 0
else:
datum = self._convert(datum, default=_marker)

# We don't want to do anything that we don't have to here, so we'll
# check to see if the new and existing information is the same.
oldDatum = self._unindex.get(documentId, _marker)
if datum != oldDatum:
if oldDatum is not _marker:
if oldDatum not in [_marker, MissingValue]:
self.removeForwardIndexEntry(oldDatum, documentId)
if datum is _marker:
try:
Expand All @@ -275,8 +279,11 @@ def _index_object(self, documentId, obj, threshold=None, attr=''):
'now its not, for document: %s' % documentId)

if datum is not _marker:
self.insertForwardIndexEntry(datum, documentId)
self._unindex[documentId] = datum
if datum is MissingValue:
self._not_indexed.insert(documentId)
else:
self.insertForwardIndexEntry(datum, documentId)
self._unindex[documentId] = datum

returnStatus = 1

Expand Down Expand Up @@ -316,12 +323,24 @@ def unindex_object(self, documentId):
raise an exception if we fail
"""
unindexRecord = self._unindex.get(documentId, _marker)
self._increment_counter()

if unindexRecord is _marker:
if IIndexingMissingValue.providedBy(self):
try:
self._not_indexed.remove(documentId)
self._increment_counter()
except ConflictError:
raise
except Exception:
LOG.debug('Attempt to unindex nonexistent document'
' with id %s' % documentId, exc_info=True)

return None

self._increment_counter()

self.removeForwardIndexEntry(unindexRecord, documentId)

try:
del self._unindex[documentId]
except ConflictError:
Expand Down Expand Up @@ -461,14 +480,23 @@ def query_index(self, record, resultset=None):
not_parm = list(map(self._convert, not_parm))
exclude = self._apply_not(not_parm, resultset)
cached = difference(cached, exclude)

# pure not
if not record.keys \
and IIndexingMissingValue.providedBy(self) \
and MissingValue not in not_parm:
cached = union(cached, self._not_indexed)
return cached

with_miss = False
# pure not
if not record.keys and not_parm:
# convert into indexed format
not_parm = list(map(self._convert, not_parm))
# we have only a 'not' query
record.keys = [k for k in index.keys() if k not in not_parm]
if IIndexingMissingValue.providedBy(self) \
and MissingValue not in not_parm:
with_miss = True
else:
# convert query arguments into indexed format
record.keys = list(map(self._convert, record.keys))
Expand Down Expand Up @@ -517,6 +545,8 @@ def query_index(self, record, resultset=None):
if not_parm:
exclude = self._apply_not(not_parm, resultset)
result = difference(result, exclude)
if with_miss:
result = union(result, self._not_indexed)
return result

if operator == 'or':
Expand Down Expand Up @@ -564,6 +594,10 @@ def query_index(self, record, resultset=None):
# other object. BTrees 4.0+ will throw a TypeError
# "object has default comparison".
continue
if k is MissingValue:
setlist.append(self._not_indexed)
continue

s = index.get(k, None)
# If None, try to bail early
if s is None:
Expand Down Expand Up @@ -595,6 +629,8 @@ def query_index(self, record, resultset=None):
if not_parm:
exclude = self._apply_not(not_parm, resultset)
result = difference(result, exclude)
if with_miss:
result = union(result, self._not_indexed)
return result

if operator == 'or':
Expand Down Expand Up @@ -642,6 +678,8 @@ def query_index(self, record, resultset=None):
if not_parm:
exclude = self._apply_not(not_parm, resultset)
r = difference(r, exclude)
if with_miss:
r = union(r, self._not_indexed)
return r

def hasUniqueValuesFor(self, name):
Expand Down

0 comments on commit f8a8630

Please sign in to comment.