From 3f6f48f3f9223677b9b53ec3317051dc1eb3116a Mon Sep 17 00:00:00 2001
From: Tim Peters <tim.one@comcast.net>
Date: Fri, 17 May 2002 05:44:31 +0000
Subject: [PATCH] Some simplifications unique to the cosine index.

---
 CosineIndex.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/CosineIndex.py b/CosineIndex.py
index a1acece..1b9b5c4 100644
--- a/CosineIndex.py
+++ b/CosineIndex.py
@@ -54,11 +54,11 @@ def __init__(self, lexicon):
         # ._wordinfo for cosine is wid -> {docid -> weight};
         # t -> D -> w(d, t)/W(d)
 
-        # ._docweight for Okapi is
+        # ._docweight for cosine is
         # docid -> W(docid)
 
     # Most of the computation for computing a relevance score for the
-    # document occurs in the search() method.  The code currently
+    # document occurs in the _search_wids() method.  The code currently
     # implements the cosine similarity function described in Managing
     # Gigabytes, eq. 4.3, p. 187.  The index_object() method
     # precomputes some values that are independent of the particular
@@ -109,17 +109,13 @@ def _search_wids(self, wids):
         L = []
         DictType = type({})
         for wid in wids:
-            d2w = self._wordinfo.get(wid) # maps docid to w(docid, wid)
-            if d2w is None:
-                # Need a test case to cover this
-                L.append((IIBucket(), scaled_int(1)))
-                continue
+            assert self._wordinfo.has_key(wid)  # caller responsible for OOV
+            d2w = self._wordinfo[wid] # maps docid to w(docid, wid)
             idf = query_term_weight(len(d2w), N)  # this is an unscaled float
             #print "idf = %.3f" % idf
             if isinstance(d2w, DictType):
                 d2w = IIBucket(d2w)
             L.append((d2w, scaled_int(idf)))
-        L.sort(lambda x, y: cmp(len(x[0]), len(y[0])))
         return L
 
     def query_weight(self, terms):