diff --git a/CHANGES.rst b/CHANGES.rst index 489fabc5..59172ceb 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -18,10 +18,14 @@ Bug fixes - Fix rewriting of query to avoid wrong optimization of CompositeIndex. (`#59 `_) -- Consolidate code for general usage of ``UnIndex._convert`` method to - avoid unnecessary doubling of code. +- Consistent use of ``UnIndex._convert`` method to avoid unnecessary + doubling of code. (`#69 `_) +- Fix performance issue of ``valueindexes`` method for catalogs with + many indexed objects + (`#39 `_) + 4.4 (2019-03-08) ---------------- diff --git a/src/Products/PluginIndexes/BooleanIndex/BooleanIndex.py b/src/Products/PluginIndexes/BooleanIndex/BooleanIndex.py index f665f316..6ab2916b 100644 --- a/src/Products/PluginIndexes/BooleanIndex/BooleanIndex.py +++ b/src/Products/PluginIndexes/BooleanIndex/BooleanIndex.py @@ -238,6 +238,31 @@ def items(self): items.append((not bool(indexed), false)) return items + def uniqueValues(self, name=None, withLengths=0): + """returns the unique values for name + + if withLengths is true, returns a sequence of + tuples of (value, length) + """ + if name is None: + name = self.id + elif name != self.id: + return + + indexed = bool(self._index_value) + unique_values = (indexed, not indexed) + if not withLengths: + for key in unique_values: + yield key + else: + for key in unique_values: + ilen = len(self._index) + if key is indexed: + yield (key, ilen) + else: + ulen = len(self._unindex) + yield (key, ulen - ilen) + manage_addBooleanIndexForm = DTMLFile('dtml/addBooleanIndex', globals()) diff --git a/src/Products/ZCatalog/plan.py b/src/Products/ZCatalog/plan.py index fec58654..1b88046b 100644 --- a/src/Products/ZCatalog/plan.py +++ b/src/Products/ZCatalog/plan.py @@ -25,6 +25,7 @@ from zope.dottedname.resolve import resolve from Products.PluginIndexes.interfaces import IUniqueValueIndex +from Products.PluginIndexes.interfaces import IDateRangeIndex MAX_DISTINCT_VALUES = 10 @@ -191,34 +192,33 @@ def valueindexes(self): # in the report key. The number of unique values for the index needs to # be lower than the MAX_DISTINCT_VALUES watermark. - # TODO: Ideally who would only consider those indexes with a small + # Ideally who would only consider those indexes with a small # number of unique values, where the number of items for each value # differs a lot. If the number of items per value is similar, the - # duration of a query is likely similar as well. + # duration of a query is likely similar as well. However, calculating + # all the value indexes with the number of items per value is + # quite slow. Therefore, we do not make this distinction. value_indexes = PriorityMap.get_entry(self.cid, VALUE_INDEX_KEY) if isinstance(value_indexes, (frozenset, set)): - # Calculating all the value indexes is quite slow, so we do this - # once for the first query. Since this is an optimization only, - # slightly outdated results based on index changes in the running - # process can be ignored. + # Since this is an optimization only, slightly outdated results + # based on index changes in the running process can be ignored. return value_indexes value_indexes = set() for name, index in indexes.items(): if IUniqueValueIndex.providedBy(index): - values = index.uniqueValues() - i = 0 - for value in values: - # the total number of unique values might be large and - # expensive to load, so we only check if we can get - # more than MAX_DISTINCT_VALUES - if i >= MAX_DISTINCT_VALUES: - break - i += 1 - if i > 0 and i < MAX_DISTINCT_VALUES: - # Only consider indexes which actually return a number - # greater than zero - value_indexes.add(name) + + # DateRangeIndex is unsuitable for this purpose + if IDateRangeIndex.providedBy(index): + continue + + # the size of an UniqueValueIndex is typically equal to the + # number of unique values + isize = index.indexSize() + if isize >= MAX_DISTINCT_VALUES: + continue + + value_indexes.add(name) value_indexes = frozenset(value_indexes) PriorityMap.set_entry(self.cid, VALUE_INDEX_KEY, value_indexes) diff --git a/src/Products/ZCatalog/tests/test_plan.py b/src/Products/ZCatalog/tests/test_plan.py index 6c72b0a6..ba3f48f1 100644 --- a/src/Products/ZCatalog/tests/test_plan.py +++ b/src/Products/ZCatalog/tests/test_plan.py @@ -326,7 +326,7 @@ def _make_plan(self, catalog): def test_uniquevalues(self): zcat = self._make_catalog() indexes = zcat._catalog.indexes - self.assertEqual(len(list(indexes['big'].uniqueValues())), 3) + self.assertEqual(len(list(indexes['big'].uniqueValues())), 2) self.assertEqual(len(list(indexes['date'].uniqueValues())), 0) self.assertEqual(len(list(indexes['date'].uniqueValues('start'))), 9) self.assertEqual(len(list(indexes['date'].uniqueValues('end'))), 9)