/
BooleanIndex.py
241 lines (200 loc) · 8.87 KB
/
BooleanIndex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
##############################################################################
#
# Copyright (c) 2002 Zope Foundation and Contributors.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
from logging import getLogger
from App.special_dtml import DTMLFile
from BTrees.IIBTree import IIBTree, IITreeSet, IISet
from BTrees.IIBTree import union, intersection, difference
import BTrees.Length
from ZODB.POSException import ConflictError
from Products.PluginIndexes.common.util import parseIndexRequest
from Products.PluginIndexes.common.UnIndex import _marker
from Products.PluginIndexes.common.UnIndex import UnIndex
LOG = getLogger('BooleanIndex.UnIndex')
class BooleanIndex(UnIndex):
"""Index for booleans
self._index = set([documentId1, documentId2])
self._unindex = {documentId:[True/False]}
self._length is the length of the unindex
self._index_length is the length of the index
False doesn't have actual entries in _index.
"""
meta_type = "BooleanIndex"
manage_options = (
{'label': 'Settings',
'action': 'manage_main'},
{'label': 'Browse',
'action': 'manage_browse'},
)
query_options = ["query"]
manage = manage_main = DTMLFile('dtml/manageBooleanIndex', globals())
manage_main._setName('manage_main')
manage_browse = DTMLFile('../dtml/browseIndex', globals())
_index_value = 1
_index_length = None
def clear(self):
self._index = IITreeSet()
self._index_length = BTrees.Length.Length()
self._index_value = 1
self._unindex = IIBTree()
self._length = BTrees.Length.Length()
self._counter = BTrees.Length.Length()
def histogram(self):
"""Return a mapping which provides a histogram of the number of
elements found at each point in the index.
"""
histogram = {}
indexed = bool(self._index_value)
histogram[indexed] = self._index_length.value
histogram[not indexed] = self._length.value - self._index_length.value
return histogram
def _invert_index(self, documentId=None):
self._index_value = indexed = int(not self._index_value)
self._index.clear()
length = 0
for rid, value in self._unindex.iteritems():
if value == indexed:
self._index.add(rid)
length += 1
# documentId is the rid of the currently processed object that
# triggered the invert. in the case of unindexing, the rid hasn't
# been removed from the unindex yet. While indexing, the rid will
# be added to the index and unindex after this method is done
if documentId is not None:
self._index.remove(documentId)
length -= 1
self._index_length = BTrees.Length.Length(length)
def insertForwardIndexEntry(self, entry, documentId):
"""If the value matches the indexed one, insert into treeset
"""
# When we get the first entry, decide to index the opposite of what
# we got, as indexing zero items is fewer than one.
if self._length.value == 0:
self._index_value = int(not bool(entry))
# if the added entry value is index value, insert it into index
if bool(entry) is bool(self._index_value):
self._index_length.change(1)
self._index.insert(documentId)
# insert value into global unindex (before computing index invert)
self._unindex[documentId] = entry
self._length.change(1)
# is the index (after adding the current entry) larger than 60%
# of the total length? than switch the indexed value
if bool(entry) is bool(self._index_value):
if (self._index_length.value) >= ((self._length.value) * 0.6):
self._invert_index()
def removeForwardIndexEntry(self, entry, documentId, check=True):
"""Take the entry provided and remove any reference to documentId
in its entry in the index.
"""
if bool(entry) is bool(self._index_value):
try:
self._index.remove(documentId)
self._index_length.change(-1)
except ConflictError:
raise
except Exception:
LOG.exception(
'%s: unindex_object could not remove documentId %s '
'from index %s. This should not happen.' % (
self.__class__.__name__,
str(documentId),
str(self.id)))
elif check:
# is the index (after removing the current entry) larger than
# 60% of the total length? than switch the indexed value
if (self._index_length.value) <= ((self._length.value - 1) * 0.6):
self._invert_index(documentId)
return
def _index_object(self, documentId, obj, threshold=None, attr=''):
""" index and object 'obj' with integer id 'documentId'"""
returnStatus = 0
# First we need to see if there's anything interesting to look at
datum = self._get_object_datum(obj, attr)
# Make it boolean, int as an optimization
if datum is not _marker:
datum = int(bool(datum))
# We don't want to do anything that we don't have to here, so we'll
# check to see if the new and existing information is the same.
oldDatum = self._unindex.get(documentId, _marker)
if datum != oldDatum:
if oldDatum is not _marker:
self.removeForwardIndexEntry(oldDatum, documentId, check=False)
if datum is _marker:
try:
del self._unindex[documentId]
self._length.change(-1)
except ConflictError:
raise
except Exception:
LOG.error('Should not happen: oldDatum was there, now '
'its not, for document with id %s' %
documentId)
if datum is not _marker:
self.insertForwardIndexEntry(datum, documentId)
returnStatus = 1
return returnStatus
def unindex_object(self, documentId):
""" Unindex the object with integer id 'documentId' and don't
raise an exception if we fail
"""
unindexRecord = self._unindex.get(documentId, _marker)
if unindexRecord is _marker:
return None
self._increment_counter()
self.removeForwardIndexEntry(unindexRecord, documentId)
try:
del self._unindex[documentId]
self._length.change(-1)
except ConflictError:
raise
except Exception:
LOG.debug('Attempt to unindex nonexistent document'
' with id %s' % documentId, exc_info=True)
def _apply_index(self, request, resultset=None):
record = parseIndexRequest(request, self.id, self.query_options)
if record.keys is None:
return None
index = self._index
indexed = self._index_value
for key in record.keys:
if bool(key) is bool(indexed):
# If we match the indexed value, check index
return (intersection(index, resultset), (self.id, ))
else:
# Otherwise, remove from resultset or _unindex
if resultset is None:
return (union(difference(self._unindex, index), IISet([])),
(self.id, ))
else:
return (difference(resultset, index), (self.id, ))
return (IISet(), (self.id, ))
def indexSize(self):
"""Return distinct values, as an optimization we always claim 2."""
return 2
def items(self):
# return a list of value to int set of rid tuples
indexed = self._index_value
items = [(bool(indexed), self._index)]
false = IISet()
for rid, value in self._unindex.iteritems():
if value != indexed:
false.add(rid)
items.append((not bool(indexed), false))
return items
manage_addBooleanIndexForm = DTMLFile('dtml/addBooleanIndex', globals())
def manage_addBooleanIndex(self, id, extra=None,
REQUEST=None, RESPONSE=None, URL3=None):
"""Add a boolean index"""
return self.manage_addIndex(
id, 'BooleanIndex', extra=extra,
REQUEST=REQUEST, RESPONSE=RESPONSE, URL1=URL3)