Skip to content

Commit

Permalink
Optimize "not" queries (#124 #125)
Browse files Browse the repository at this point in the history
* optimize plain not query

* Further optimize excluding results in not queries (#125)

* Further optimize excluding results in not queries

Usually the number of the parameters that have to be excluded in the not query is much lower than the number of values in the index, so it makes sense to actually try to pop them out from the list

* duration1 appears to be lower after the optimizations

Co-authored-by: Alessandro Pisa <alessandro.pisa@gmail.com>
  • Loading branch information
jensens and ale-rt committed Oct 30, 2021
1 parent 7f614a6 commit ae1199b
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 11 deletions.
2 changes: 1 addition & 1 deletion CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ Changelog

6.1 (unreleased)
----------------

- Improve performance of simple ``not`` queries on large catalogs.
- Fix case where multiple indexes with similar name seperated by ``_`` were interpreted as options.
(`#78 <https://github.com/zopefoundation/Products.ZCatalog/issues/78>`_)
- Fix reversed sorting by multiple index by forcing the ``_sort_iterate_resultset``
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ def profileSearch(query, warmup=False, verbose=False):
# search must be roughly faster than default search
if res1 and res2:
self.assertLess(
0.5 * duration2,
0.4 * duration2,
duration1,
(duration2, duration1, query))

Expand Down
29 changes: 20 additions & 9 deletions src/Products/PluginIndexes/unindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,15 +479,6 @@ def query_index(self, record, resultset=None):

return cached

if not record.keys and not_parm:
# convert into indexed format
not_parm = list(map(self._convert, not_parm))
# we have only a 'not' query
record.keys = [k for k in index.keys() if k not in not_parm]
else:
# convert query arguments into indexed format
record.keys = list(map(self._convert, record.keys))

# Range parameter
range_parm = record.get('range', None)
if range_parm:
Expand All @@ -503,6 +494,26 @@ def query_index(self, record, resultset=None):
opr = record.usage.lower().split(':')
opr, opr_args = opr[0], opr[1:]

# not query
if not record.keys and not_parm:
# convert into indexed format
not_parm = list(map(self._convert, not_parm))
# we have only a 'not' query
# shortcut/optimization if we have no 'opr' (i.e. no range)
if resultset is not None and opr is None:
i_not_parm = self._apply_not(not_parm, resultset)
if i_not_parm:
return difference(resultset, i_not_parm)
record.keys = list(index)
for parm in not_parm:
try:
record.keys.remove(parm)
except ValueError:
pass
else:
# convert query arguments into indexed format
record.keys = list(map(self._convert, record.keys))

if opr == 'range': # range search
if 'min' in opr_args:
lo = min(record.keys)
Expand Down

0 comments on commit ae1199b

Please sign in to comment.