diff --git a/BaseIndex.py b/BaseIndex.py index a4485b3..70a2496 100644 --- a/BaseIndex.py +++ b/BaseIndex.py @@ -84,7 +84,9 @@ def get_words(self, docid): # A subclass may wish to extend or override this. def index_doc(self, docid, text): - # XXX If docid is already known, do something smart. + if self._docwords.has_key(docid): + # XXX Do something smarter than this. + self.unindex_doc(docid) wids = self._lexicon.sourceToWordIds(text) wid2weight, docweight = self._get_frequencies(wids) for wid, weight in wid2weight.items(): diff --git a/tests/testZCTextIndex.py b/tests/testZCTextIndex.py index a3afc39..8faaf0d 100644 --- a/tests/testZCTextIndex.py +++ b/tests/testZCTextIndex.py @@ -96,7 +96,7 @@ def testStopWords(self): self.assertEqual(len(self.index.get_words(1)), 1) def testDocUpdate(self): - docid = 1 + docid = 1 # doesn't change -- we index the same doc repeatedly N = len(text) stop = get_stopdict() @@ -131,9 +131,6 @@ def testDocUpdate(self): for w in v: nbest, total = self.zc_index.query(w) self.assertEqual(total, 0, "did not expect to find %s" % w) - # XXX The next line is necessary because we're not yet reindexing - # XXX docs correctly. - self.zc_index.unindex_object(docid) class CosineIndexTests(ZCIndexTestsBase, testIndex.CosineIndexTest):