Skip to content

Commit

Permalink
Add support for and/not operators
Browse files Browse the repository at this point in the history
  • Loading branch information
andbag committed Jul 29, 2016
1 parent 76ae751 commit 1983a37
Show file tree
Hide file tree
Showing 2 changed files with 162 additions and 92 deletions.
24 changes: 14 additions & 10 deletions src/Products/PluginIndexes/CompositeIndex/CompositeIndex.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@

LOG = logging.getLogger('CompositeIndex')

QUERY_OPTIONS = {'FieldIndex': ["query", "range"],
'KeywordIndex': ["query", "operator", "range"]
QUERY_OPTIONS = {'FieldIndex': ('query', 'range', 'not'),
'KeywordIndex': ('query', 'range', 'not', 'operator'),
}

MIN_COMPONENTS = 2
Expand Down Expand Up @@ -420,16 +420,20 @@ def make_query(self, query):
query_options = QUERY_OPTIONS[c.meta_type]
rec = parseIndexRequest(query, c.id, query_options)

# not supported: 'not' parameter
not_parm = rec.get('not', None)
if not rec.keys and not_parm:
continue

# not supported: 'and' operator
operator = rec.get('operator', self.useOperator)
if rec.keys and operator == 'and':
continue

# continue if no keys in query were set
if rec.keys is None:
continue

# sot supported: not / exclude parameter
not_parm = rec.get('not', None)
if not rec.keys and not_parm:
return query
#raise NotImplementedError

c_records.append((c.id, rec))

# return if less than MIN_COMPONENTS query attributes were catched
Expand Down Expand Up @@ -459,8 +463,8 @@ def make_query(self, query):
if c_id in cquery:
del cquery[c_id]

# LOG.debug('%s: query build %r' % (self.__class__.__name__,
# [(c_id, rec.keys)
#LOG.debug('%s: query build from %r' % (self.__class__.__name__,
# [(c_id, rec.keys, rec.get('operator'))
# for c_id, rec in c_records]))

return cquery
Expand Down
230 changes: 148 additions & 82 deletions src/Products/PluginIndexes/CompositeIndex/tests/testCompositeIndex.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,42 +64,55 @@ def __init__(self, id):
class CompositeIndexTests(unittest.TestCase):

def setUp(self):

self._index = CompositeIndex('comp01',
extra=[{'id': 'portal_type',
'meta_type': 'FieldIndex',
'attributes': ''},
{'id': 'review_state',
'meta_type': 'FieldIndex',
'attributes': ''},
{'id': 'is_default_page',
'meta_type': 'FieldIndex',
'attributes': ''},
{'id': 'subject',
'meta_type': 'KeywordIndex',
'attributes': ''}
])

self._field_indexes = (FieldIndex('review_state'),
FieldIndex('portal_type'),
FieldIndex('is_default_page'),
KeywordIndex('subject'))

def _defaultSearch(self, req, expectedValues=None):
self._indexes = [FieldIndex('review_state'),
FieldIndex('portal_type'),
FieldIndex('is_default_page'),
KeywordIndex('subject'),
CompositeIndex('comp01',
extra=[{'id': 'portal_type',
'meta_type': 'FieldIndex',
'attributes': ''},
{'id': 'review_state',
'meta_type': 'FieldIndex',
'attributes': ''},
{'id': 'is_default_page',
'meta_type': 'FieldIndex',
'attributes': ''},
{'id': 'subject',
'meta_type': 'KeywordIndex',
'attributes': ''}
])
]

def _getIndex(self, name):
for idx in self._indexes:
if idx.id == name:
return idx

def _defaultSearch(self, req, expectedValues=None, verbose=False):

rs = None
for index in self._field_indexes:
for index in self._indexes:
st = time()
duration = (time() - st) * 1000

limit_result = ILimitedResultIndex.providedBy(index)
if limit_result:
r = index._apply_index(req, rs)
else:
r = index._apply_index(req)
duration = (time() - st) * 1000

if r is not None:
r, u = r
w, rs = weightedIntersection(rs, r)
if not rs:
break

if verbose and (index.id in req):
logger.info("index %s: %s hits in %3.2fms" %
(index.id, r and len(r) or 0, duration))

if not rs:
return set()

Expand All @@ -108,103 +121,131 @@ def _defaultSearch(self, req, expectedValues=None):

return set(rs)

def _compositeSearch(self, req, expectedValues=None):
query = self._index.make_query(req)
rs = None
r = self._index._apply_index(query)
def _compositeSearch(self, req, expectedValues=None, verbose=False):
comp_index = self._getIndex('comp01')
query = comp_index.make_query(req)

if r is not None:
r, u = r
w, rs = weightedIntersection(rs, r)
if not rs:
return set()

if hasattr(rs, 'keys'):
rs = rs.keys()
# catch successful?
self.assertTrue('comp01' in query)

return set(rs)
return self._defaultSearch(query,
expectedValues=expectedValues,
verbose=verbose)

def enableLog(self):
logger.root.setLevel(logging.INFO)
logger.root.addHandler(logging.StreamHandler(sys.stdout))

def _populateIndexes(self, k, v):
self._index.index_object(k, v)
for index in self._field_indexes:
for index in self._indexes:
index.index_object(k, v)

def printIndexInfo(self):
def info(index):
size = index.indexSize()
n_obj = index.numObjects()
ratio = float(size) / float(n_obj)
logger.info('<id: %s S: %s N: %s R: %.3f pm>' %
logger.info('<id: %15s unique keys: '
'%3s length: %5s ratio: %6.3f pm>' %
(index.id, size, n_obj, ratio * 1000))
return ratio

info(self._index)
for index in self._field_indexes:
#indexes = sorted(self._indexes, key=info, reverse=True)
#self._indexes = indexes
for index in self._indexes:
info(index)

def _clearIndexes(self):
self._index.clear()
for index in self._field_indexes:
for index in self._indexes:
index.clear()

def testPerformance(self):
self.enableLog()

lengths = [10000, ]

queries = [{'portal_type': {'query': 'Document'},
'review_state': {'query': 'pending'}},
{'portal_type': {'query': 'Document'},
'subject': {'query': ['subject_1', 'subject_3']}},
{'portal_type': {'query': 'Document'},
'subject': {'query': 'subject_2'}},
{'portal_type': {'query': 'Document'},
'is_default_page': {'query': False}},
{'review_state': {'query': 'pending'},
'is_default_page': {'query': False}},
{'portal_type': {'query': 'Document'},
'review_state': {'query': 'pending'},
'is_default_page': {'query': False}},
{'portal_type': {'query': 'Document'},
'review_state': {'query': 'pending'},
'is_default_page': {'query': True}},
{'portal_type': {'query': 'Document'},
'review_state': {'query': 'pending'},
'is_default_page': {'query': False},
'subject': {'query': ['subject_2', 'subject_3'],
'operator': 'or'}},
{'portal_type': {'query': 'Document'},
'review_state': {'query': 'pending'},
'is_default_page': {'query': True},
'subject': {'query': ['subject_2', 'subject_3'],
'operator': 'or'}},
queries = [('query01_default_two_indexes',
{'portal_type': {'query': 'Document'},
'review_state': {'query': 'pending'}}),
('query02_default_two_indexes',
{'portal_type': {'query': 'Document'},
'subject': {'query': 'subject_2'}}),
('query03_default_two_indexes',
{'portal_type': {'query': 'Document'},
'subject': {'query': ['subject_1', 'subject_3']}}),
('query04_default_two_indexes',
{'portal_type': {'query': 'Document'},
'is_default_page': {'query': False}}),
('query05_default_two_indexes',
{'portal_type': {'query': 'Document'},
'is_default_page': {'query': True}}),
('query06_default_two_indexes',
{'review_state': {'query': 'pending'},
'is_default_page': {'query': False}}),
('query07_default_three_indexes',
{'portal_type': {'query': 'Document'},
'review_state': {'query': 'pending'},
'is_default_page': {'query': False}}),
('query08_default_three_indexes',
{'portal_type': {'query': 'Document'},
'review_state': {'query': 'pending'},
'is_default_page': {'query': True}}),
('query09_default_four_indexes',
{'portal_type': {'query': 'Document'},
'review_state': {'query': 'pending'},
'is_default_page': {'query': True},
'subject': {'query': ['subject_2', 'subject_3'],
'operator': 'or'}}),
('query10_and_operator_four_indexes',
{'portal_type': {'query': 'Document'},
'review_state': {'query': 'pending'},
'is_default_page': {'query': True},
'subject': {'query': ['subject_1', 'subject_3'],
'operator': 'and'}}),
('query11_and_operator_four_indexes',
{'portal_type': {'query': ('Document', 'News')},
'review_state': {'query': 'pending'},
'is_default_page': {'query': True},
'subject': {'query': ['subject_1', 'subject_3'],
'operator': 'and'}}),
('query12_not_operator_four_indexes',
{'portal_type': {'not': 'Document'},
'review_state': {'query': 'pending'},
'is_default_page': {'query': True},
'subject': {'query': ['subject_2', 'subject_3'],
'operator': 'or'}}),
('query13_not_operator_four_indexes',
{'portal_type': {'query': 'Document'},
'review_state': {'not': ('pending', 'visible')},
'is_default_page': {'query': True},
'subject': {'query': ['subject_2', 'subject_3']}}),
]

def profileSearch(query, verbose=False):
def profileSearch(query, warmup=False, verbose=False):

st = time()
res1 = self._defaultSearch(query)
res1 = self._defaultSearch(query, verbose=False)
duration1 = (time() - st) * 1000

if verbose:
logger.info("atomic: %s hits in %3.2fms" %
(len(res1), duration1))

st = time()
res2 = self._compositeSearch(query)
res2 = self._compositeSearch(query, verbose=False)
duration2 = (time() - st) * 1000

if verbose:
logger.info("composite: %s hits in %3.2fms" %
(len(res2), duration2))

if verbose:
logger.info('[composite/atomic] factor %3.2f\n' %
logger.info('[composite/atomic] factor %3.2f' %
(duration1 / duration2,))

# composite search must be faster than default search
assert duration2 < duration1
if not warmup:
# composite search must be roughly faster than default search
assert 0.95 * duration2 < duration1, (duration2, duration1)

# is result identical
self.assertEqual(len(res1), len(res2))
Expand All @@ -214,20 +255,30 @@ def profileSearch(query, verbose=False):
self._clearIndexes()
logger.info('************************************\n'
'indexing %s objects' % l)

for i in range(l):
name = '%s' % i
obj = RandomTestObject(name)
self._populateIndexes(i, obj)

logger.info('indexing finished\n')

self.printIndexInfo()

logger.info('\nstart queries')
for query in queries:
logger.info("query %s" % query.keys())
# warming up indexes
profileSearch(query)
# in memory measure

# warming up indexes
logger.info("warming up indexes")
for name, query in queries:
profileSearch(query, warmup=True)

# in memory measure
logger.info("in memory measure")
for name, query in queries:
logger.info("\nquery: %s" % name)
profileSearch(query, verbose=True)
logger.info('queries finished')

logger.info('\nqueries finished')

logger.info('************************************')

Expand Down Expand Up @@ -256,13 +307,28 @@ def testSearch(self):
'portal_type': {'query': ('News', 'Document')},
'is_default_page': {'query': False},
'subject': {'query': ('subject_1', 'subject_2'),
'operator': 'or'}}
'operator': 'or'}},
{'review_state': {'query': ('pending', 'visible')},
'portal_type': {'query': ('News', 'Document')},
'is_default_page': {'query': False},
'subject': {'query': ('subject_1', 'subject_2'),
'operator': 'or'}},
{'review_state': {'query': ('pending', 'visible')},
'portal_type': {'query': ('News', 'Document')},
'is_default_page': {'query': False},
'subject': {'query': ('subject_1', 'subject_2'),
'operator': 'and'}},
{'review_state': {'not': ('pending', 'visible')},
'portal_type': {'query': ('News', 'Document')},
'is_default_page': {'query': False},
'subject': {'query': ('subject_2',)}},
]

for query in queries:

res1 = self._defaultSearch(query)
res2 = self._compositeSearch(query)

self.assertEqual(res1, res2)


Expand Down

0 comments on commit 1983a37

Please sign in to comment.