Skip to content
This repository has been archived by the owner on May 13, 2020. It is now read-only.

Commit

Permalink
Fix queries of the form 'extension module C'.
Browse files Browse the repository at this point in the history
  • Loading branch information
gvanrossum committed May 16, 2002
1 parent 0c561da commit f94baf2
Show file tree
Hide file tree
Showing 7 changed files with 27 additions and 11 deletions.
8 changes: 8 additions & 0 deletions CosineIndex.py
Expand Up @@ -115,6 +115,10 @@ def unindex_doc(self, docid):

def search(self, term):
wids = self._lexicon.termToWordIds(term)
if not wids:
return None # All docs match
if 0 in wids:
wids = filter(None, wids)
return mass_weightedUnion(self._search_wids(wids))

def search_glob(self, pattern):
Expand All @@ -123,6 +127,8 @@ def search_glob(self, pattern):

def search_phrase(self, phrase):
wids = self._lexicon.termToWordIds(phrase)
if 0 in wids:
return IIBTree()
hits = mass_weightedIntersection(self._search_wids(wids))
if not hits:
return hits
Expand Down Expand Up @@ -157,6 +163,8 @@ def query_weight(self, terms):
N = float(len(self._docweight))
sum = 0.0
for wid in wids:
if wid == 0:
continue
wt = math.log(1.0 + N / len(self._wordinfo[wid]))
sum += wt ** 2.0
return scaled_int(math.sqrt(sum))
Expand Down
4 changes: 1 addition & 3 deletions Lexicon.py
Expand Up @@ -62,9 +62,7 @@ def termToWordIds(self, text):
last = element.process(last)
wids = []
for word in last:
wid = self._wids.get(word)
if wid is not None:
wids.append(wid)
wids.append(self._wids.get(word, 0))
return wids

def get_word(self, wid):
Expand Down
6 changes: 6 additions & 0 deletions OkapiIndex.py
Expand Up @@ -109,6 +109,10 @@ def unindex_doc(self, docid):

def search(self, term):
wids = self._lexicon.termToWordIds(term)
if not wids:
return None # All docs match
if 0 in wids:
wids = filter(None, wids)
return mass_weightedUnion(self._search_wids(wids))

def search_glob(self, pattern):
Expand All @@ -117,6 +121,8 @@ def search_glob(self, pattern):

def search_phrase(self, phrase):
wids = self._lexicon.termToWordIds(phrase)
if 0 in wids:
return IIBTree()
hits = mass_weightedIntersection(self._search_wids(wids))
if not hits:
return hits
Expand Down
2 changes: 1 addition & 1 deletion SetOps.py
Expand Up @@ -20,10 +20,10 @@

def mass_weightedIntersection(L):
"A list of (mapping, weight) pairs -> their weightedIntersection IIBTree."
L = [(map, weight) for (map, weight) in L if map is not None]
if not L:
return IIBTree()
# Intersect with smallest first.
L = L[:] # don't mutate the caller's L
L.sort(lambda x, y: cmp(len(x[0]), len(y[0])))
x, w = L[0]
dummy, result = weightedUnion(IIBTree(), x, 1, w)
Expand Down
2 changes: 2 additions & 0 deletions ZCTextIndex.py
Expand Up @@ -72,6 +72,8 @@ def query(self, query, nbest=10):
"""
tree = QueryParser().parseQuery(query)
results = tree.executeQuery(self.index)
if results is None:
return [], 0
chooser = NBest(nbest)
chooser.addmany(results.items())
return chooser.getbest(), len(results)
Expand Down
12 changes: 7 additions & 5 deletions tests/mhindex.py
Expand Up @@ -143,7 +143,7 @@ def interact(self, nbest=NBEST, maxlines=MAXLINES):
if not text:
continue
try:
n, results = self.timequery(text, top + nbest)
results, n = self.timequery(text, top + nbest)
except:
reportexc()
text = ""
Expand All @@ -163,7 +163,7 @@ def interact(self, nbest=NBEST, maxlines=MAXLINES):
top += nbest

def query(self, text, nbest=NBEST, maxlines=MAXLINES):
n, results = self.timequery(text, nbest)
results, n = self.timequery(text, nbest)
if not n:
print "No hits for %r." % text
return
Expand All @@ -173,11 +173,11 @@ def query(self, text, nbest=NBEST, maxlines=MAXLINES):
def timequery(self, text, nbest):
t0 = time.time()
c0 = time.clock()
n, results = self.index.query(text, nbest)
results, n = self.index.query(text, nbest)
t1 = time.time()
c1 = time.clock()
print "[Query time: %.3f real, %.3f user]" % (t1-t0, c1-c0)
return n, results
return results, n

def formatresults(self, text, results, maxlines=MAXLINES,
lo=0, hi=sys.maxint):
Expand Down Expand Up @@ -397,9 +397,11 @@ def query(self, query, nbest=10):
parser = QueryParser()
tree = parser.parseQuery(query)
results = tree.executeQuery(self.index)
if results is None:
return [], 0
chooser = NBest(nbest)
chooser.addmany(results.items())
return len(results), chooser.getbest()
return chooser.getbest(), len(results)

def query_weight(self, query):
parser = QueryParser()
Expand Down
4 changes: 2 additions & 2 deletions tests/testLexicon.py
Expand Up @@ -76,7 +76,7 @@ def testMissingTermToWordIds(self):
lexicon = Lexicon(Splitter())
wids = lexicon.sourceToWordIds('cats and dogs')
wids = lexicon.termToWordIds('boxes')
self.assertEqual(wids, [])
self.assertEqual(wids, [0])

def testOnePipelineElement(self):
lexicon = Lexicon(Splitter(), StupidPipelineElement('dogs', 'fish'))
Expand All @@ -94,7 +94,7 @@ def testSplitterAdaptorNofold(self):
lexicon = Lexicon(Splitter())
wids = lexicon.sourceToWordIds('CATS and dogs')
wids = lexicon.termToWordIds('cats and dogs')
self.assertEqual(wids, [2, 3])
self.assertEqual(wids, [0, 2, 3])

def testTwoElementPipeline(self):
lexicon = Lexicon(Splitter(),
Expand Down

0 comments on commit f94baf2

Please sign in to comment.