Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Not indexed value support (MissingValue, EmptyValue) #74

Open
wants to merge 49 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 48 commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
2f56826
Initial not indexed value support (MissingValue, EmptyValue)
andbag May 5, 2019
29b0bf3
Reorganize NotIndexedValue classes
andbag May 6, 2019
ce61b42
Fix BTree TypeError
andbag May 6, 2019
05c8079
Add not indexed value tests
andbag May 6, 2019
2de46a3
Fix get_object_datum for not indexed value support
andbag May 7, 2019
c61afae
Add additional tests for not indexed value support
andbag May 7, 2019
a47ebc7
Fix EmptyValue support
andbag May 7, 2019
d140ffb
Disable Missing/EmptyValue interface of CompositeIndex
andbag May 8, 2019
7f246be
Fix flake8
andbag May 8, 2019
a0c8546
Merge branch 'master' into notindexed_value_support
May 9, 2019
0f6a2ce
Merge branch 'master' into notindexed_value_support
May 10, 2019
7c9cf99
Merge branch 'master' into notindexed_value_support
May 14, 2019
a2a1ee7
New naming of methods and variables
andbag May 10, 2019
6ceb6dc
Store special values in `_unindex`
andbag May 10, 2019
7b9313c
Fix KeywordIndex tests
andbag May 10, 2019
0911a0c
Fix unindex and refactor KeywordIndex
andbag May 13, 2019
47d5b95
Return False for SpecialValues on truth value testing
andbag May 14, 2019
e305aca
Fix flake8
andbag May 14, 2019
8e47c81
Merge branch 'master' into notindexed_value_support
May 17, 2019
b529bd2
Raise Error if multiple indexed attributes are set
andbag May 22, 2019
6b7b84d
Prepare index_object for multiple indexed attributes
andbag May 23, 2019
52114da
Merge branch 'fix_indexed_attr' into notindexed_value_support
andbag May 24, 2019
b494b75
Add test for multiple indexed attributes
andbag May 24, 2019
e319a05
Replace __nonzero__ with __bool__ for py3
andbag May 24, 2019
bcbd74d
Store keywords always as OOset
andbag May 24, 2019
5f709ac
Fix tests for OOset
andbag May 24, 2019
5f6c287
Create debug entry if datum for attribute cannot be determined
andbag May 24, 2019
0c5493a
Fix consistent check for missing `_unindex` entry
andbag May 24, 2019
090bc9e
Remove obsolete code
andbag May 24, 2019
e7c5e07
flake8
andbag May 24, 2019
cf950e6
Fix clearing of special values of KexwordIndex
andbag May 26, 2019
78efffb
Continue to fix clearing of special values
andbag May 27, 2019
7dd55b7
Add definition map for special values
andbag May 27, 2019
0d58199
Refinement of special value support
andbag May 28, 2019
6d5fa3e
Fix test for empty value
andbag May 29, 2019
4110f65
Log then ignore keys not indexable
andbag Jun 4, 2019
2815927
Consolidate code
andbag Jun 4, 2019
2a45691
Avoid obsolete type casting
andbag Jun 4, 2019
6c5e52c
Consolidate special value handling step one
andbag Jun 4, 2019
fd0a9d2
Consolidate special value handling step two
andbag Jun 6, 2019
225df14
Consolidate code of CompositeIndex
andbag Jun 6, 2019
9bbfdfb
Completion of interfaces
andbag Jun 6, 2019
906a858
Code further generalized
andbag Jun 6, 2019
ab99560
flake8
andbag Jun 6, 2019
e595088
Continue cleaning
andbag Jun 6, 2019
d059521
Code further generalized II
andbag Jun 11, 2019
9883352
Reorganize code and complete tests
andbag Jun 11, 2019
62321b9
Fix for py2 backward compatibility
andbag Jun 11, 2019
3913a78
Merge branch 'master' into notindexed_value_support
Jun 26, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/Products/PluginIndexes/BooleanIndex/BooleanIndex.py
Expand Up @@ -155,7 +155,7 @@ def _index_object(self, documentId, obj, threshold=None, attr=''):
returnStatus = 0

# First we need to see if there's anything interesting to look at
datum = self._get_object_datum(obj, attr)
datum = self.get_object_datum(obj, attr)

# Make it boolean, int as an optimization
if datum is not _marker:
Expand Down
101 changes: 51 additions & 50 deletions src/Products/PluginIndexes/CompositeIndex/CompositeIndex.py
Expand Up @@ -21,16 +21,31 @@
from Acquisition import aq_parent
from Acquisition import aq_inner
from App.special_dtml import DTMLFile
from BTrees.OOBTree import difference
from BTrees.OOBTree import OOSet
from Persistence import PersistentMapping
from zope.interface import implementer

from Products.PluginIndexes.interfaces import ITransposeQuery
from zope.interface import implementer_only

from Products.PluginIndexes.interfaces import (
ILimitedResultIndex,
IQueryIndex,
ISortIndex,
IUniqueValueIndex,
IRequestCacheIndex,
ITransposeQuery,
missing,
empty,
)
from Products.PluginIndexes.util import safe_callable
from Products.PluginIndexes.KeywordIndex.KeywordIndex import KeywordIndex
from Products.PluginIndexes.unindex import _marker
from Products.ZCatalog.query import IndexQuery

try:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Having used similar code for Python 2/3 compatibility, I have been directed to use six instead. Consistently using six for Python 2/3 compatibility will facilitate code cleanup once Python 2 support is dropped.

basestring
except NameError:
# Python 3 compatibility
basestring = (bytes, str)

LOG = logging.getLogger('CompositeIndex')

QUERY_OPTIONS = {
Expand Down Expand Up @@ -172,7 +187,8 @@ def __repr__(self):
'attributes: {0.attributes}>').format(self)


@implementer(ITransposeQuery)
@implementer_only(ILimitedResultIndex, IQueryIndex, IUniqueValueIndex,
ISortIndex, IRequestCacheIndex, ITransposeQuery)
class CompositeIndex(KeywordIndex):

"""Index for composition of simple fields.
Expand Down Expand Up @@ -209,45 +225,8 @@ def __init__(self, id, ignore_ex=None, call_methods=None,
c_attributes)
self.clear()

def _index_object(self, documentId, obj, threshold=None, attr=''):

# get permuted keywords
newKeywords = self._get_permuted_keywords(obj)

oldKeywords = self._unindex.get(documentId, None)

if oldKeywords is None:
# we've got a new document, let's not futz around.
try:
for kw in newKeywords:
self.insertForwardIndexEntry(kw, documentId)
if newKeywords:
self._unindex[documentId] = list(newKeywords)
except TypeError:
return 0
else:
# we have an existing entry for this document, and we need
# to figure out if any of the keywords have actually changed
if type(oldKeywords) is not OOSet:
oldKeywords = OOSet(oldKeywords)
newKeywords = OOSet(newKeywords)
fdiff = difference(oldKeywords, newKeywords)
rdiff = difference(newKeywords, oldKeywords)
if fdiff or rdiff:
# if we've got forward or reverse changes
if newKeywords:
self._unindex[documentId] = list(newKeywords)
else:
del self._unindex[documentId]
if fdiff:
self.unindex_objectKeywords(documentId, fdiff)
if rdiff:
for kw in rdiff:
self.insertForwardIndexEntry(kw, documentId)
return 1

def _get_permuted_keywords(self, obj):
""" returns permutation tuple of object keywords """
def get_object_datum(self, obj, attr):
""" returns permutation of object keywords """

components = self.getIndexComponents()
kw_list = []
Expand All @@ -270,34 +249,48 @@ def _get_permuted_keywords(self, obj):
p = combinations(c, r)
pkl.extend(p)

return tuple(pkl)
return OOSet(pkl)

def _get_component_datum(self, obj, attr):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This almost looks like get_object_datum. Are you sure you need this special definition?

# self.id is the name of the index, which is also the name of the
# attribute we're interested in. If the attribute is callable,
# we'll do so.
try:
datum = getattr(obj, attr)
if safe_callable(datum):
datum = datum()
except (AttributeError, TypeError):
datum = _marker
return datum

def _get_component_keywords(self, obj, component):

if component.meta_type == 'FieldIndex':
# last attribute is the winner if value is not None
for attr in component.attributes:
datum = self._get_object_datum(obj, attr)
datum = self._get_component_datum(obj, attr)
if datum is None:
continue
if datum is None:
return ()
if isinstance(datum, list):
datum = tuple(datum)
if isinstance(datum, (list, OOSet)):
return tuple(datum)
return (datum,)

elif component.meta_type == 'KeywordIndex':
# last attribute is the winner
attr = component.attributes[-1]
datum = self._get_object_keywords(obj, attr)
datum = self._get_component_datum(obj, attr)
if isinstance(datum, basestring):
datum = (datum,)
if isinstance(datum, list):
datum = tuple(datum)
return datum

elif component.meta_type == 'BooleanIndex':
# last attribute is the winner
attr = component.attributes[-1]
datum = self._get_object_datum(obj, attr)
datum = self._get_component_datum(obj, attr)
if datum is not _marker:
datum = int(bool(datum))
return (datum,)
Expand Down Expand Up @@ -380,6 +373,14 @@ def make_query(self, query):
if c.meta_type == 'BooleanIndex':
rec.keys = [int(bool(v)) for v in rec.keys[:]]

# cannot currently support KeywordIndex's
# missing/empty feature
if c.meta_type == 'KeywordIndex':
if missing in rec.keys:
continue
if empty in rec.keys:
continue

# rec with 'not' parameter
not_parm = rec.get('not', None)
if not_parm:
Expand Down
Expand Up @@ -21,32 +21,29 @@
types = ['Document', 'News', 'File', 'Image']
default_pages = [True, False, False, False, False, False]
subjects = list(map(lambda x: 'subject_{0}'.format(x), range(6)))
keywords = list(map(lambda x: 'keyword_{0}'.format(x), range(6)))


class TestObject(object):

def __init__(self, id, portal_type, review_state,
is_default_page=False, subject=(), keyword=()):
is_default_page=False, subject=()):
self.id = id
self.portal_type = portal_type
self.review_state = review_state
self.is_default_page = is_default_page
self.subject = subject
self.keyword = keyword

def getPhysicalPath(self):
return ['', self.id, ]

def __repr__(self):
return ('< {id}, {portal_type}, {review_state},\
{is_default_page}, {subject} , {keyword}>'.format(
{is_default_page}, {subject}>'.format(
id=self.id,
portal_type=self.portal_type,
review_state=self.review_state,
is_default_page=self.is_default_page,
subject=self.subject,
keyword=self.keyword))
subject=self.subject))


class RandomTestObject(TestObject):
Expand All @@ -63,11 +60,10 @@ def __init__(self, id):
is_default_page = default_pages[i]

subject = random.sample(subjects, random.randint(1, len(subjects)))
keyword = random.sample(keywords, random.randint(1, len(keywords)))

super(RandomTestObject, self).__init__(id, portal_type,
review_state, is_default_page,
subject, keyword)
subject)


# Pseudo ContentLayer class to support quick
Expand All @@ -92,7 +88,7 @@ def setUp(self):
KeywordIndex('subject',
extra={
'indexed_attrs':
'keyword,subject'}
'subject'}
),
CompositeIndex('comp01',
extra=[{'id': 'portal_type',
Expand All @@ -107,7 +103,7 @@ def setUp(self):
{'id': 'subject',
'meta_type': 'KeywordIndex',
'attributes':
'keyword,subject'}
'subject'}
])
]

Expand Down Expand Up @@ -206,9 +202,6 @@ def testPerformance(self):
('query02_default_two_indexes',
{'portal_type': {'query': 'Document'},
'subject': {'query': 'subject_2'}}),
('query02_default_two_indexes_zero_hits',
{'portal_type': {'query': 'Document'},
'subject': {'query': ['keyword_1', 'keyword_2']}}),
('query03_default_two_indexes',
{'portal_type': {'query': 'Document'},
'subject': {'query': ['subject_1', 'subject_3']}}),
Expand Down Expand Up @@ -340,8 +333,7 @@ def testSearch(self):
subject=('subject_1', 'subject_2'))
self.populateIndexes(3, obj)
obj = TestObject('obj_4', 'Event', 'private',
subject=('subject_1', 'subject_2'),
keyword=('keyword_1', ))
subject=('subject_1', 'subject_2'))
self.populateIndexes(4, obj)

queries = [
Expand Down Expand Up @@ -379,12 +371,6 @@ def testSearch(self):
'is_default_page': {'query': False},
'subject': {'query': ('subject_1', 'subject_2'),
'operator': 'and'}},
# query on five attributes with
{'review_state': {'not': ('pending', 'visible')},
'portal_type': {'query': ('News', 'Document')},
'is_default_page': {'query': False},
'subject': {'query': ('subject_1', )},
'keyword': {'query': ('keyword_1',)}},
]

for query in queries:
Expand Down