Skip to content
This repository has been archived by the owner on May 13, 2020. It is now read-only.

Commit

Permalink
- Collector #1784: fixed handling of multiple attributes in ZCTextIndex
Browse files Browse the repository at this point in the history
  • Loading branch information
zopyx committed May 17, 2005
1 parent 2d1508b commit eb8733f
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 14 deletions.
27 changes: 13 additions & 14 deletions ZCTextIndex.py
Expand Up @@ -159,22 +159,21 @@ def index_object(self, documentId, obj, threshold=None):
except: fields = [ self._fieldname ]

res = 0
all_texts = []
for attr in fields:
res += self._index_object(documentId, obj, threshold, attr)

return res

def _index_object(self, docid, obj, threshold=None, attr=None):
# XXX We currently ignore subtransaction threshold
text = getattr(obj, self._fieldname, None)
if text is None:
return 0
if safe_callable(text):
text = text()
if text is None:
text = getattr(obj, attr, None)
if text is None:
continue
if safe_callable(text):
text = text()
if text is None:
continue
all_texts.append(text)

if all_texts:
return self.index.index_doc(documentId, ' '.join(all_texts))
else:
return 0
count = self.index.index_doc(docid, text)
return count

def unindex_object(self, docid):
if self.index.has_doc(docid):
Expand Down
27 changes: 27 additions & 0 deletions tests/testZCTextIndex.py
Expand Up @@ -39,6 +39,11 @@ class Indexable:
def __init__(self, text):
self.text = text

class Indexable2:
def __init__(self, text1, text2):
self.text1 = text1
self.text2 = text2

class LexiconHolder(Acquisition.Implicit):
def __init__(self, lexicon):
self.lexicon = lexicon
Expand Down Expand Up @@ -115,6 +120,7 @@ def setUp(self):
'lexicon')
self.index = self.zc_index.index


def parserFailure(self, query):
self.assertRaises(ParseError, self.zc_index.query, query)

Expand All @@ -124,6 +130,27 @@ def parserSuccess(self, query, n):
if n:
self.assertEqual(r[0][0], 1)

def testMultipleAttributes(self):
lexicon = PLexicon('lexicon', '',
Splitter(),
CaseNormalizer(),
StopWordRemover())
caller = LexiconHolder(self.lexicon)
zc_index = ZCTextIndex('name',
None,
caller,
self.IndexFactory,
'text1,text2',
'lexicon')
doc = Indexable2('foo bar', 'alpha omega')
zc_index.index_object(1, doc)
nbest, total = zc_index.query('foo')
self.assertEqual(len(nbest), 1)
nbest, total = zc_index.query('foo alpha')
self.assertEqual(len(nbest), 1)
nbest, total = zc_index.query('foo alpha gamma')
self.assertEqual(len(nbest), 0)

def testStopWords(self):
# the only non-stopword is question
text = ("to be or not to be "
Expand Down

0 comments on commit eb8733f

Please sign in to comment.