- Collector #1815: ZCTextIndex accepts (again) sequences of strings to

be indexed.
zopefoundation · Jul 4, 2005 · c1c6667 · c1c6667
1 parent eb8733f
commit c1c6667
Show file tree

Hide file tree

Showing 3 changed files with 50 additions and 7 deletions.
diff --git a/IIndex.py b/IIndex.py
@@ -68,6 +68,9 @@ def index_doc(docid, text):
         """Add a document with the specified id and text to the index. If a
         document by that id already exists, replace its text with the new
         text provided
+        text  may be either a string (Unicode or otherwise) or a list
+        of strings from which to extract the terms under which to
+        index the source document.
         """
 
     def unindex_doc(docid):

diff --git a/ZCTextIndex.py b/ZCTextIndex.py
@@ -152,7 +152,14 @@ def query(self, query, nbest=10):
     ## Pluggable Index APIs ##
 
     def index_object(self, documentId, obj, threshold=None):
-        """ wrapper to handle indexing of multiple attributes """
+        """Wrapper for  index_doc()  handling indexing of multiple attributes.
+
+        Enter the document with the specified documentId in the index
+        under the terms extracted from the indexed text attributes,
+        each of which should yield either a string or a list of
+        strings (Unicode or otherwise) to be passed to index_doc().
+        """
+        # XXX We currently ignore subtransaction threshold
 
         # needed for backward compatibility
         try: fields = self._indexed_attrs
@@ -168,12 +175,22 @@ def index_object(self, documentId, obj, threshold=None):
                 text = text()
             if text is None:
                 continue
-            all_texts.append(text)
-
-        if all_texts:        
-            return self.index.index_doc(documentId, ' '.join(all_texts))
-        else:
-            return 0
+            # To index each attribute separately, we could use the
+            # following line, but we have preferred to make a single
+            # call to  index_doc()  for all attributes together.  
+            # res += self.index.index_doc(documentId, text)
+            if text:
+                if isinstance(text, (list, tuple, )):
+                    all_texts.extend(text)
+                else:
+                    all_texts.append(text)
+
+        # Check that we're sending only strings
+        all_texts = filter(lambda text: isinstance(text, basestring), \
+                           all_texts)
+        if all_texts:
+            return self.index.index_doc(documentId, all_texts)            
+        return res
 
     def unindex_object(self, docid):
         if self.index.has_doc(docid):

diff --git a/tests/testZCTextIndex.py b/tests/testZCTextIndex.py
@@ -151,6 +151,29 @@ def testMultipleAttributes(self):
         nbest, total = zc_index.query('foo alpha gamma')
         self.assertEqual(len(nbest), 0)
 
+    def testListAttributes(self):
+        lexicon = PLexicon('lexicon', '',
+                            Splitter(),
+                            CaseNormalizer(),
+                            StopWordRemover())
+        caller = LexiconHolder(self.lexicon)
+        zc_index = ZCTextIndex('name',
+                                None,
+                                caller,
+                                self.IndexFactory,
+                               'text1,text2',
+                               'lexicon')
+        doc = Indexable2('Hello Tim', \
+                         ['Now is the winter of our discontent',
+                          'Made glorious summer by this sun of York', ])
+        zc_index.index_object(1, doc)
+        nbest, total = zc_index.query('glorious')
+        self.assertEqual(len(nbest), 1)
+        nbest, total = zc_index.query('York Tim')
+        self.assertEqual(len(nbest), 1)
+        nbest, total = zc_index.query('Tuesday Tim York')
+        self.assertEqual(len(nbest), 0)
+
     def testStopWords(self):
         # the only non-stopword is question
         text = ("to be or not to be "