From 939c8c4091bbaba8ae283f6e65a22b79de5ddefc Mon Sep 17 00:00:00 2001 From: Hanno Schlichting Date: Fri, 26 Aug 2016 22:23:14 +0200 Subject: [PATCH] Empty catalog queries now return no results. They used to return the contents of the entire catalog. --- CHANGES.rst | 2 + src/Products/ZCatalog/Catalog.py | 253 +++++++++----------- src/Products/ZCatalog/tests/test_catalog.py | 40 +--- 3 files changed, 128 insertions(+), 167 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 889d255b..773a2715 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,6 +4,8 @@ Changelog 4.0 (unreleased) ---------------- +- Empty catalog queries now return no results. + - No longer special-case empty strings in catalog queries. - Add new CompositeIndex index type. diff --git a/src/Products/ZCatalog/Catalog.py b/src/Products/ZCatalog/Catalog.py index e1c4915b..01bba517 100644 --- a/src/Products/ZCatalog/Catalog.py +++ b/src/Products/ZCatalog/Catalog.py @@ -479,7 +479,7 @@ def getIndexDataForRID(self, rid): def make_query(self, request): # This is a bit of a mess, but the ZCatalog API has traditionally - # supported passing in query restrictions in almost arbitary ways + # supported passing in query restrictions in almost arbitrary ways. real_req = None if isinstance(request, dict): query = request.copy() @@ -525,6 +525,34 @@ def _get_index_query_names(self, index): return index.getIndexQueryNames() return (index.getId(),) + def _sort_limit_arguments(self, query, sort_index, reverse, limit): + b_start = int(query.get('b_start', 0)) + b_size = query.get('b_size', None) + if b_size is not None: + b_size = int(b_size) + + if b_size is not None: + limit = b_start + b_size + elif limit and b_size is None: + b_size = limit + + if sort_index is None: + sort_report_name = None + else: + if isinstance(sort_index, list): + sort_name = '-'.join(i.getId() for i in sort_index) + else: + sort_name = sort_index.getId() + if isinstance(reverse, list): + reverse_name = '-'.join( + 'desc' if r else 'asc' for r in reverse) + else: + reverse_name = 'desc' if reverse else 'asc' + sort_report_name = 'sort_on#' + sort_name + '#' + reverse_name + if limit is not None: + sort_report_name += '#limit-%s' % limit + return (b_start, b_size, limit, sort_report_name) + def _sorted_search_indexes(self, query): # Simple implementation ordering only by limited result support query_keys = query.keys() @@ -580,167 +608,118 @@ def search(self, query, for i in plan: if i not in indexes: # We can have bogus keys or the plan can contain index names - # that have been removed in the meantime + # that have been removed in the meantime. continue index = self.getIndex(i) - _apply_index = getattr(index, "_apply_index", None) + _apply_index = getattr(index, '_apply_index', None) if _apply_index is None: continue cr.start_split(i) limit_result = ILimitedResultIndex.providedBy(index) if limit_result: - r = _apply_index(query, rs) + index_result = _apply_index(query, rs) + else: + index_result = _apply_index(query) + + # Parse (resultset, used_attributes) index return value. + index_rs = None + if index_result: + index_rs, _ = index_result + + if not index_rs: + # Short circuit if empty index result. + rs = None else: - r = _apply_index(query) - - if r is not None: - r, u = r - # Short circuit if empty result - # BBB: We can remove the "r is not None" check in Zope 4 - # once we don't need to support the "return everything" case - # anymore - if r is not None and not r: - cr.stop_split(i, result=None, limit=limit_result) - return LazyCat([]) - - # provide detailed info about the pure intersection time + # Provide detailed info about the pure intersection time. intersect_id = i + '#intersection' cr.start_split(intersect_id) # weightedIntersection preserves the values from any mappings - # we get, as some indexes don't return simple sets - if hasattr(rs, 'items') or hasattr(r, 'items'): - _, rs = weightedIntersection(rs, r) + # we get, as some indexes don't return simple sets. + if hasattr(rs, 'items') or hasattr(index_rs, 'items'): + _, rs = weightedIntersection(rs, index_rs) else: - rs = intersection(rs, r) + rs = intersection(rs, index_rs) cr.stop_split(intersect_id) - # consider the time it takes to intersect the index result - # with the total result set to be part of the index time - cr.stop_split(i, result=r, limit=limit_result) - if not rs: - break - else: - cr.stop_split(i, result=None, limit=limit_result) + # Consider the time it takes to intersect the index result + # with the total result set to be part of the index time. + cr.stop_split(i, result=index_rs, limit=limit_result) - # Try to deduce the sort limit from batching arguments - b_start = int(query.get('b_start', 0)) - b_size = query.get('b_size', None) - if b_size is not None: - b_size = int(b_size) - - if b_size is not None: - limit = b_start + b_size - elif limit and b_size is None: - b_size = limit + if not rs: + break - if sort_index is None: - sort_report_name = None - else: - if isinstance(sort_index, list): - sort_name = '-'.join(i.getId() for i in sort_index) - else: - sort_name = sort_index.getId() - if isinstance(reverse, list): - reverse_name = '-'.join( - 'desc' if r else 'asc' for r in reverse) + if not rs: + # None of the indexes found anything to do with the query. + result = LazyCat([]) + cr.stop() + return result + + # Try to deduce the sort limit from batching arguments. + b_start, b_size, limit, sort_report_name = self._sort_limit_arguments( + query, sort_index, reverse, limit) + + # We got some results from the indexes, sort and convert to sequences. + rlen = len(rs) + if sort_index is None and hasattr(rs, 'items'): + # Having a 'items' means we have a data structure with + # scores. Build a new result set, sort it by score, reverse + # it, compute the normalized score, and Lazify it. + + if not merge: + # Don't bother to sort here, return a list of + # three tuples to be passed later to mergeResults. + # Note that data_record_normalized_score_ cannot be + # calculated and will always be 1 in this case. + result = [(score, (1, score, rid), self.__getitem__) + for rid, score in rs.items()] else: - reverse_name = 'desc' if reverse else 'asc' - sort_report_name = 'sort_on#' + sort_name + '#' + reverse_name - if limit is not None: - sort_report_name += '#limit-%s' % limit - - if rs is None: - # None of the indexes found anything to do with the query - # We take this to mean that the query was empty (an empty filter) - # and so we return everything in the catalog - warnings.warn('Your query %s produced no query restriction. ' - 'Currently the entire catalog content is returned. ' - 'In Zope 4 this will result in an empty LazyCat ' - 'to be returned.' % repr(cr.make_key(query)), - DeprecationWarning, stacklevel=3) + cr.start_split('sort_on#score') + + # Sort it by score. + rs = rs.byValue(0) + max = float(rs[0][0]) + + # Here we define our getter function inline so that + # we can conveniently store the max value as a default arg + # and make the normalized score computation lazy + def getScoredResult(item, max=max, self=self): + """ + Returns instances of self._v_brains, or whatever is + passed into self.useBrains. + """ + score, key = item + norm_score = int(100.0 * score / max) + return self.instantiate((key, self.data[key]), + score_data=(score, norm_score)) - rlen = len(self) - if sort_index is None: - sequence, slen = self._limit_sequence( - self.data.items(), rlen, b_start, b_size) - result = LazyMap( - self.instantiate, sequence, slen, - actual_result_count=rlen) - else: - cr.start_split(sort_report_name) - result = self.sortResults( - self.data, sort_index, reverse, limit, merge, - actual_result_count=rlen, b_start=b_start, b_size=b_size) - cr.stop_split(sort_report_name, None) - elif rs: - # We got some results from the indexes. - # Sort and convert to sequences. - # XXX: The check for 'values' is really stupid since we call - # items() and *not* values() - rlen = len(rs) - if sort_index is None and hasattr(rs, 'items'): - # having a 'items' means we have a data structure with - # scores. Build a new result set, sort it by score, reverse - # it, compute the normalized score, and Lazify it. - - if not merge: - # Don't bother to sort here, return a list of - # three tuples to be passed later to mergeResults - # note that data_record_normalized_score_ cannot be - # calculated and will always be 1 in this case - getitem = self.__getitem__ - result = [(score, (1, score, rid), getitem) - for rid, score in rs.items()] - else: - cr.start_split('sort_on#score') - - # sort it by score - rs = rs.byValue(0) - max = float(rs[0][0]) - - # Here we define our getter function inline so that - # we can conveniently store the max value as a default arg - # and make the normalized score computation lazy - def getScoredResult(item, max=max, self=self): - """ - Returns instances of self._v_brains, or whatever is - passed into self.useBrains. - """ - score, key = item - norm_score = int(100.0 * score / max) - return self.instantiate((key, self.data[key]), - score_data=(score, norm_score)) - - sequence, slen = self._limit_sequence( - rs, rlen, b_start, b_size) - result = LazyMap(getScoredResult, sequence, slen, - actual_result_count=rlen) - cr.stop_split('sort_on#score', None) - - elif sort_index is None and not hasattr(rs, 'values'): - # no scores - if hasattr(rs, 'keys'): - rs = rs.keys() sequence, slen = self._limit_sequence( rs, rlen, b_start, b_size) - result = LazyMap(self.__getitem__, sequence, slen, + result = LazyMap(getScoredResult, sequence, slen, actual_result_count=rlen) - else: - # sort. If there are scores, then this block is not - # reached, therefore 'sort-on' does not happen in the - # context of a text index query. This should probably - # sort by relevance first, then the 'sort-on' attribute. - cr.start_split(sort_report_name) - result = self.sortResults( - rs, sort_index, reverse, limit, merge, - actual_result_count=rlen, b_start=b_start, b_size=b_size) - cr.stop_split(sort_report_name, None) + cr.stop_split('sort_on#score', None) + + elif sort_index is None and not hasattr(rs, 'values'): + # no scores + if hasattr(rs, 'keys'): + rs = rs.keys() + sequence, slen = self._limit_sequence( + rs, rlen, b_start, b_size) + result = LazyMap(self.__getitem__, sequence, slen, + actual_result_count=rlen) else: - # Empty result set - result = LazyCat([]) + # Sort. If there are scores, then this block is not + # reached, therefore 'sort-on' does not happen in the + # context of a text index query. This should probably + # sort by relevance first, then the 'sort-on' attribute. + cr.start_split(sort_report_name) + result = self.sortResults( + rs, sort_index, reverse, limit, merge, + actual_result_count=rlen, b_start=b_start, b_size=b_size) + cr.stop_split(sort_report_name, None) + cr.stop() return result diff --git a/src/Products/ZCatalog/tests/test_catalog.py b/src/Products/ZCatalog/tests/test_catalog.py index becbfcad..65f08736 100644 --- a/src/Products/ZCatalog/tests/test_catalog.py +++ b/src/Products/ZCatalog/tests/test_catalog.py @@ -12,7 +12,6 @@ ############################################################################## import unittest -from Testing.ZopeTestCase.warnhook import WarningsHook from itertools import chain import random @@ -311,6 +310,16 @@ def testResultLength(self): self.assertEqual(len(a), self.upper, 'length should be %s, its %s' % (self.upper, len(a))) + def testEmptyMapping(self): + # Queries with empty mappings used to return all. + def extra(catalog): + col1 = FieldIndex('col1') + catalog.addIndex('col1', col1) + catalog = self._make_one(extra=extra) + self.assertTrue(len(catalog) > 0) + all_data = catalog({}) + self.assertEqual(len(all_data), 0) + def testMappingWithEmptyKeysDoesntReturnAll(self): # Queries with empty keys used to return all, because of a bug in the # parseIndexRequest function, mistaking a CatalogSearchArgumentsMap @@ -913,35 +922,6 @@ def test_range_search(self): "%d vs [%d,%d]" % (r.number, m, n)) -class TestCatalogReturnAll(unittest.TestCase): - - def _make_one(self): - from Products.ZCatalog.Catalog import Catalog - return Catalog() - - def setUp(self): - self.warningshook = WarningsHook() - self.warningshook.install() - - def tearDown(self): - self.warningshook.uninstall() - - def testEmptyMappingReturnsAll(self): - catalog = self._make_one() - col1 = FieldIndex('col1') - catalog.addIndex('col1', col1) - for x in range(0, 10): - catalog.catalogObject(Dummy(x), repr(x)) - self.assertEqual(len(catalog), 10) - all_data = catalog({}) - self.assertEqual(len(all_data), 10) - for rec in all_data: - self.assertEqual(rec.aq_parent, catalog) - self.assertNotEqual(rec.data_record_id_, None) - self.assertEqual(rec.data_record_score_, None) - self.assertEqual(rec.data_record_normalized_score_, None) - - class TestCatalogSearchArgumentsMap(unittest.TestCase): def _make_one(self, request=None, keywords=None):