Skip to content
This repository has been archived by the owner on May 13, 2020. It is now read-only.

Commit

Permalink
Two changes and a question posing as a comment.
Browse files Browse the repository at this point in the history
In unindex_doc() call _del_wordinfo() for each unique wid in the doc,
not for each wid.  Before we had WidCode and phrase searching,
_docwords stored a list of the unique wids.  The unindex code wasn't
updated when _docwords started storing all the wids, even duplicates.

Replace the try/except around __getitem__ in _add_wordinfo() with a
.get() call.

Add XXX comment about the purpose of the try/except(s) in
_del_wordinfo().  I suspect they only existed because _del_wordinfo()
was called repeatedly when a wid existed more than once.
  • Loading branch information
Jeremy Hylton committed May 17, 2002
1 parent 61fa4de commit 6d9ac11
Showing 1 changed file with 19 additions and 4 deletions.
23 changes: 19 additions & 4 deletions BaseIndex.py
Expand Up @@ -44,6 +44,13 @@ def scaled_int(f, scale=SCALE_FACTOR):
# expensive.
return int(f * scale + 0.5)

def unique(l):
"""Return a list of the unique elements in l."""
d = {}
for elt in l:
d[elt] = 1
return d.keys()

class BaseIndex(Persistent):

__implements__ = IIndex
Expand Down Expand Up @@ -108,7 +115,7 @@ def _get_frequencies(self, wids):

# A subclass may wish to extend or override this.
def unindex_doc(self, docid):
for wid in self.get_words(docid):
for wid in unique(self.get_words(docid)):
self._del_wordinfo(wid, docid)
del self._docwords[docid]
del self._docweight[docid]
Expand Down Expand Up @@ -184,9 +191,8 @@ def _add_wordinfo(self, wid, f, docid):
# space when it is live in memory. An IIBTree stores two C
# arrays of ints, one for the keys and one for the values. It
# holds upto 120 key-value pairs in a single bucket.
try:
map = self._wordinfo[wid]
except KeyError:
map = self._wordinfo.get(wid)
if map is None:
map = {}
else:
# _add_wordinfo() is called for each update. If the map
Expand All @@ -197,10 +203,19 @@ def _add_wordinfo(self, wid, f, docid):
self._wordinfo[wid] = map # Not redundant, because of Persistency!

def _del_wordinfo(self, wid, docid):
# XXX Not clear if the try/excepts here are guarding against
# corrupt data structures or if it is possible for the index
# to get in a state where it thinks an entry exits for the
# wid, docid pair and it doesn't.
try:
map = self._wordinfo[wid]
except KeyError:
## print "No info for wid", wid
return
try:
del map[docid]
except KeyError:
## print "doc %s does not use %s" % (docid, wid)
return
if len(map) == 0:
del self._wordinfo[wid]
Expand Down

0 comments on commit 6d9ac11

Please sign in to comment.