Okapi index now works w/zope.

Removed QueryParser as a persistent attribute of the ZCTextIndex so that it doesn't need to be persistent (It stores no state). Updated tests. Functionally tested in Zope.
zopefoundation · May 15, 2002 · 4e9ee95 · 4e9ee95
1 parent 670622b
commit 4e9ee95
Show file tree

Hide file tree

Showing 3 changed files with 14 additions and 6 deletions.
diff --git a/OkapiIndex.py b/OkapiIndex.py
@@ -27,6 +27,9 @@
 from Products.ZCTextIndex.SetOps import mass_weightedIntersection, \
                                         mass_weightedUnion
 
+import ZODB
+from Persistence import Persistent
+
 # Instead of storing floats, we generally store scaled ints.  Binary pickles
 # can store those more efficiently.  The default SCALE_FACTOR of 1024
 # is large enough to get about 3 decimal digits of fractional info, and
@@ -43,7 +46,7 @@ def scaled_int(f, scale=SCALE_FACTOR):
     # expensive.
     return int(f * scale + 0.5)
 
-class Index:
+class Index(Persistent):
 
     __implements__ = IIndex
 
@@ -78,6 +81,10 @@ def length(self):
         """Return the number of documents in the index."""
         return len(self._docwords)
 
+    def get_words(self, docid):
+        """Returns the wordids for a given docid"""
+        return WidCode.decode(self._docwords[docid])
+
     def index_doc(self, docid, text):
         wids = self._lexicon.sourceToWordIds(text)
         self._doclen[docid] = len(wids)
@@ -88,6 +95,7 @@ def index_doc(self, docid, text):
             self._add_wordinfo(wid, count, docid)
 
         self._docwords[docid] = WidCode.encode(wids)
+        return len(wids)
 
     def unindex_doc(self, docid):
         for wid in WidCode.decode(self._docwords[docid]):

diff --git a/ZCTextIndex.py b/ZCTextIndex.py
@@ -62,7 +62,6 @@ def __init__(self, id, extra, caller, index_factory=Index):
 
         self.lexicon = lexicon
         self.index = index_factory(self.lexicon)
-        self.parser = QueryParser()
 
     ## Pluggable Index APIs ##
 
@@ -89,13 +88,13 @@ def _apply_index(self, request, cid=''):
         if record.keys is None:
             return None
         query_str = ' '.join(record.keys)
-        tree = self.parser.parseQuery(query_str)
+        tree = QueryParser().parseQuery(query_str)
         results = tree.executeQuery(self.index)
         return  results, (self._fieldname,)
 
     def query(self, query, nbest=10):
         # returns a mapping from docids to scores
-        tree = self.parser.parseQuery(query)
+        tree = QueryParser().parseQuery(query)
         results = tree.executeQuery(self.index)
         chooser = NBest(nbest)
         chooser.addmany(results.items())

diff --git a/tests/testZCTextIndex.py b/tests/testZCTextIndex.py
@@ -4,6 +4,7 @@
 from Products.ZCTextIndex.Index import scaled_int, SCALE_FACTOR, Index
 from Products.ZCTextIndex.Lexicon import Lexicon, Splitter
 from Products.ZCTextIndex.Lexicon import CaseNormalizer, StopWordRemover
+from Products.ZCTextIndex.QueryParser import QueryParser
 
 import unittest
 
@@ -110,7 +111,7 @@ def _ranking_queries(self):
                    [(1, 0.19), (2, 0.18), (3, 0.63), (5, 0.22), (6, 0.39)]]
         for i in range(len(queries)):
             raw = queries[i]
-            q = self.zc_index.parser.parseQuery(raw)
+            q = QueryParser().parseQuery(raw)
             wq = self.index.query_weight(q.terms())
             eq(wq, scaled_int(wqs[i]))
             r = self.zc_index.query(raw)
@@ -142,7 +143,7 @@ def setUp(self):
         caller = LexiconHolder(Lexicon(Splitter(), CaseNormalizer(),
                                StopWordRemover()))
         self.zc_index = ZCTextIndex('name', extra, caller)
-        self.p = self.parser = self.zc_index.parser
+        self.p = self.parser = QueryParser()
         self.index = self.zc_index.index
         self.add_docs()