Skip to content
This repository has been archived by the owner on May 13, 2020. It is now read-only.

Commit

Permalink
Special-case None search() results in AND, AND NOT, and OR contexts, and
Browse files Browse the repository at this point in the history
uncomment the test cases that were failing in these contexts.

Read it and weep <wink>:  In an AND context, None is treated like the
universal set, which jibes with the convenient fiction that stop words
appear in every doc.  However, in AND NOT and OR contexts, None is
treated like the empty set, which doesn't jibe with anything except that
we want

    real_word AND NOT stop_word

and

    real_word OR stop_word

to act like

    real_word

If we treated None as if it were the universal set, these results would
be (respectively) the empty set and the universal set instead.

At a higher level, we *are* consistent with the notion that a query with
a stop word acts the same as if the clause with the stop word weren't
present.  That's what really drives this schizophrenic (context-dependent)
treatment of None.
  • Loading branch information
Tim Peters committed May 17, 2002
1 parent 3deedb7 commit 18f21df
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 13 deletions.
26 changes: 21 additions & 5 deletions ParseTree.py
Expand Up @@ -69,10 +69,19 @@ def executeQuery(self, index):
Nots = []
for subnode in self.getValue():
if subnode.nodeType() == "NOT":
Nots.append((subnode.getValue().executeQuery(index), 1))
r = subnode.getValue().executeQuery(index)
# If None, technically it matches every doc, but we treat
# it as if it matched none (we want
# real_word AND NOT stop_word
# to act like plain real_word).
if r is not None:
Nots.append((r, 1))
else:
L.append((subnode.executeQuery(index), 1))
assert L
r = subnode.executeQuery(index)
# If None, technically it matches every doc, so needn't be
# included.
if r is not None:
L.append((r, 1))
set = mass_weightedIntersection(L)
if Nots:
notset = mass_weightedUnion(Nots)
Expand All @@ -84,8 +93,15 @@ class OrNode(ParseTreeNode):
_nodeType = "OR"

def executeQuery(self, index):
weighted = [(node.executeQuery(index), 1)
for node in self.getValue()]
weighted = []
for node in self.getValue():
r = node.executeQuery(index)
# If None, technically it matches every doc, but we treat
# it as if it matched none (we want
# real_word OR stop_word
# to act like plain real_word).
if r is not None:
weighted.append((r, 1))
return mass_weightedUnion(weighted)

class AtomNode(ParseTreeNode):
Expand Down
16 changes: 8 additions & 8 deletions tests/testZCTextIndex.py
Expand Up @@ -127,13 +127,13 @@ def testStopWords(self):
self.assertEqual(num, 1)
self.assertEqual(r[0][0], 1)

# r, num = self.zc_index.query('question AND NOT to AND NOT be')
# self.assertEqual(num, 1)
# self.assertEqual(r[0][0], 1)
r, num = self.zc_index.query('question AND NOT to AND NOT be')
self.assertEqual(num, 1)
self.assertEqual(r[0][0], 1)

# r, num = self.zc_index.query('question OR to OR be')
# self.assertEqual(num, 1)
# self.assertEqual(r[0][0], 1)
r, num = self.zc_index.query('question OR to OR be')
self.assertEqual(num, 1)
self.assertEqual(r[0][0], 1)

r, num = self.zc_index.query('question to be')
self.assertEqual(num, 1)
Expand All @@ -145,8 +145,8 @@ def testStopWords(self):
r, num = self.zc_index.query('to AND be')
self.assertEqual(num, 0)

# r, num = self.zc_index.query('to OR be')
# self.assertEqual(num, 0)
r, num = self.zc_index.query('to OR be')
self.assertEqual(num, 0)

r, num = self.zc_index.query('to AND NOT be')
self.assertEqual(num, 0)
Expand Down

0 comments on commit 18f21df

Please sign in to comment.