Skip to content
This repository has been archived by the owner on May 13, 2020. It is now read-only.

Commit

Permalink
Okapi index now works w/zope.
Browse files Browse the repository at this point in the history
Removed QueryParser as a persistent attribute of the ZCTextIndex so that
it doesn't need to be persistent (It stores no state).

Updated tests. Functionally tested in Zope.
  • Loading branch information
caseman committed May 15, 2002
1 parent 670622b commit 4e9ee95
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 6 deletions.
10 changes: 9 additions & 1 deletion OkapiIndex.py
Expand Up @@ -27,6 +27,9 @@
from Products.ZCTextIndex.SetOps import mass_weightedIntersection, \
mass_weightedUnion

import ZODB
from Persistence import Persistent

# Instead of storing floats, we generally store scaled ints. Binary pickles
# can store those more efficiently. The default SCALE_FACTOR of 1024
# is large enough to get about 3 decimal digits of fractional info, and
Expand All @@ -43,7 +46,7 @@ def scaled_int(f, scale=SCALE_FACTOR):
# expensive.
return int(f * scale + 0.5)

class Index:
class Index(Persistent):

__implements__ = IIndex

Expand Down Expand Up @@ -78,6 +81,10 @@ def length(self):
"""Return the number of documents in the index."""
return len(self._docwords)

def get_words(self, docid):
"""Returns the wordids for a given docid"""
return WidCode.decode(self._docwords[docid])

def index_doc(self, docid, text):
wids = self._lexicon.sourceToWordIds(text)
self._doclen[docid] = len(wids)
Expand All @@ -88,6 +95,7 @@ def index_doc(self, docid, text):
self._add_wordinfo(wid, count, docid)

self._docwords[docid] = WidCode.encode(wids)
return len(wids)

def unindex_doc(self, docid):
for wid in WidCode.decode(self._docwords[docid]):
Expand Down
5 changes: 2 additions & 3 deletions ZCTextIndex.py
Expand Up @@ -62,7 +62,6 @@ def __init__(self, id, extra, caller, index_factory=Index):

self.lexicon = lexicon
self.index = index_factory(self.lexicon)
self.parser = QueryParser()

## Pluggable Index APIs ##

Expand All @@ -89,13 +88,13 @@ def _apply_index(self, request, cid=''):
if record.keys is None:
return None
query_str = ' '.join(record.keys)
tree = self.parser.parseQuery(query_str)
tree = QueryParser().parseQuery(query_str)
results = tree.executeQuery(self.index)
return results, (self._fieldname,)

def query(self, query, nbest=10):
# returns a mapping from docids to scores
tree = self.parser.parseQuery(query)
tree = QueryParser().parseQuery(query)
results = tree.executeQuery(self.index)
chooser = NBest(nbest)
chooser.addmany(results.items())
Expand Down
5 changes: 3 additions & 2 deletions tests/testZCTextIndex.py
Expand Up @@ -4,6 +4,7 @@
from Products.ZCTextIndex.Index import scaled_int, SCALE_FACTOR, Index
from Products.ZCTextIndex.Lexicon import Lexicon, Splitter
from Products.ZCTextIndex.Lexicon import CaseNormalizer, StopWordRemover
from Products.ZCTextIndex.QueryParser import QueryParser

import unittest

Expand Down Expand Up @@ -110,7 +111,7 @@ def _ranking_queries(self):
[(1, 0.19), (2, 0.18), (3, 0.63), (5, 0.22), (6, 0.39)]]
for i in range(len(queries)):
raw = queries[i]
q = self.zc_index.parser.parseQuery(raw)
q = QueryParser().parseQuery(raw)
wq = self.index.query_weight(q.terms())
eq(wq, scaled_int(wqs[i]))
r = self.zc_index.query(raw)
Expand Down Expand Up @@ -142,7 +143,7 @@ def setUp(self):
caller = LexiconHolder(Lexicon(Splitter(), CaseNormalizer(),
StopWordRemover()))
self.zc_index = ZCTextIndex('name', extra, caller)
self.p = self.parser = self.zc_index.parser
self.p = self.parser = QueryParser()
self.index = self.zc_index.index
self.add_docs()

Expand Down

0 comments on commit 4e9ee95

Please sign in to comment.