From 6e7888bd0c902edf6bfb475e559302098be65613 Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Fri, 17 May 2002 08:02:01 +0000 Subject: [PATCH] Implement correct (albeit inefficient) reindexing, and stop cheating in the reindexing text. --- BaseIndex.py | 4 +++- tests/testZCTextIndex.py | 5 +---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/BaseIndex.py b/BaseIndex.py index a4485b3..70a2496 100644 --- a/BaseIndex.py +++ b/BaseIndex.py @@ -84,7 +84,9 @@ def get_words(self, docid): # A subclass may wish to extend or override this. def index_doc(self, docid, text): - # XXX If docid is already known, do something smart. + if self._docwords.has_key(docid): + # XXX Do something smarter than this. + self.unindex_doc(docid) wids = self._lexicon.sourceToWordIds(text) wid2weight, docweight = self._get_frequencies(wids) for wid, weight in wid2weight.items(): diff --git a/tests/testZCTextIndex.py b/tests/testZCTextIndex.py index a3afc39..8faaf0d 100644 --- a/tests/testZCTextIndex.py +++ b/tests/testZCTextIndex.py @@ -96,7 +96,7 @@ def testStopWords(self): self.assertEqual(len(self.index.get_words(1)), 1) def testDocUpdate(self): - docid = 1 + docid = 1 # doesn't change -- we index the same doc repeatedly N = len(text) stop = get_stopdict() @@ -131,9 +131,6 @@ def testDocUpdate(self): for w in v: nbest, total = self.zc_index.query(w) self.assertEqual(total, 0, "did not expect to find %s" % w) - # XXX The next line is necessary because we're not yet reindexing - # XXX docs correctly. - self.zc_index.unindex_object(docid) class CosineIndexTests(ZCIndexTestsBase, testIndex.CosineIndexTest):