Skip to content
This repository has been archived by the owner on May 13, 2020. It is now read-only.

Commit

Permalink
Squash bug duplication by moving the clever mass-union and mass-
Browse files Browse the repository at this point in the history
intersection gimmicks into their own functions with their own test suite.

This turned up two bugs:

1. The mass weighted union gimmick was incorrect when passed a list with
   a single mapping.  In that case, it neglected to multiply the mapping
   by the given weight.

2. The underlying weighted{Intersection, Union} code does something crazy
   if you pass it weights less than 0.  I had vaguely hoped to be able
   to subtract scores by passing 1 and -1 as weights, but this doesn't
   work.  It's hard to say exactly what it does then.  The line
       weightedUnion(IIBTree(), mapping, 1, -2)
   seems to return a mapping with the same keys, but *all* of whose
   values are -2, regardless of the original mapping's values.
  • Loading branch information
Tim Peters committed May 15, 2002
1 parent d0e498d commit 6aadf5a
Show file tree
Hide file tree
Showing 2 changed files with 146 additions and 0 deletions.
56 changes: 56 additions & 0 deletions SetOps.py
@@ -0,0 +1,56 @@
##############################################################################
#
# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################

"""SetOps -- Weighted intersections and unions applied to many inputs."""

from BTrees.IIBTree import IIBTree, weightedIntersection, weightedUnion

from Products.ZCTextIndex.NBest import NBest

def mass_weightedIntersection(L):
"A list of (mapping, weight) pairs -> their weightedIntersection IIBTree."
if not L:
return IIBTree()
# Intersect with smallest first.
L = L[:] # don't mutate the caller's L
L.sort(lambda x, y: cmp(len(x[0]), len(y[0])))
x, w = L[0]
dummy, result = weightedUnion(IIBTree(), x, 1, w)
for x, w in L[1:]:
dummy, result = weightedIntersection(result, x, 1, w)
return result

def mass_weightedUnion(L):
"A list of (mapping, weight) pairs -> their weightedUnion IIBTree."
if not L:
return IIBTree()
if len(L) == 1:
# Have to do a union in order to get the input's values
# multiplied by the weight.
x, weight = L[0]
dummy, result = weightedUnion(IIBTree(), x, 1, weight)
return result
# Balance unions as closely as possible, smallest to largest.
assert len(L) > 1
merge = NBest(len(L))
for x, weight in L:
merge.add((x, weight), len(x))
while len(merge) > 1:
# Merge the two smallest so far, and add back to the queue.
(x, wx), dummy = merge.pop_smallest()
(y, wy), dummy = merge.pop_smallest()
dummy, z = weightedUnion(x, y, wx, wy)
merge.add((z, 1), len(z))
(result, weight), dummy = merge.pop_smallest()
return result
90 changes: 90 additions & 0 deletions tests/testSetOps.py
@@ -0,0 +1,90 @@
##############################################################################
#
# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################

from unittest import TestCase, TestSuite, main, makeSuite

from BTrees.IIBTree import IIBTree, IIBucket

from Products.ZCTextIndex.SetOps import mass_weightedIntersection
from Products.ZCTextIndex.SetOps import mass_weightedUnion

class TestSetOps(TestCase):

def testEmptyLists(self):
self.assertEqual(len(mass_weightedIntersection([])), 0)
self.assertEqual(len(mass_weightedUnion([])), 0)

def testIdentity(self):
t = IIBTree([(1, 2)])
b = IIBucket([(1, 2)])
for x in t, b:
for func in mass_weightedUnion, mass_weightedIntersection:
result = func([(x, 1)])
self.assertEqual(len(result), 1)
self.assertEqual(list(result.items()), list(x.items()))

def testScalarMultiply(self):
t = IIBTree([(1, 2), (2, 3), (3, 4)])
allkeys = [1, 2, 3]
b = IIBucket(t)
for x in t, b:
self.assertEqual(list(x.keys()), allkeys)
for func in mass_weightedUnion, mass_weightedIntersection:
for factor in 0, 1, 5, 10:
result = func([(x, factor)])
self.assertEqual(allkeys, list(result.keys()))
for key in x.keys():
self.assertEqual(x[key] * factor, result[key])

def testPairs(self):
# Construct a pair with one key in common.
t1 = IIBTree([(1, 10), (3, 30), (7, 70)])
t2 = IIBTree([(3, 30), (5, 50), (7, 7), (9, 90)])
allkeys = [1, 3, 5, 7, 9]
b1 = IIBucket(t1)
b2 = IIBucket(t2)
for x in t1, t2, b1, b2:
for key in x.keys():
self.assertEqual(key in allkeys, 1)
for y in t1, t2, b1, b2:
for w1, w2 in (0, 0), (1, 10), (10, 1), (2, 3):
# Test the union.
expected = []
for key in allkeys:
if x.has_key(key) or y.has_key(key):
result = x.get(key, 0) * w1 + y.get(key, 0) * w2
expected.append((key, result))
expected.sort()
got = mass_weightedUnion([(x, w1), (y, w2)])
self.assertEqual(expected, list(got.items()))
got = mass_weightedUnion([(y, w2), (x, w1)])
self.assertEqual(expected, list(got.items()))

# Test the intersection.
expected = []
for key in allkeys:
if x.has_key(key) and y.has_key(key):
result = x.get(key, 0) * w1 + y.get(key, 0) * w2
expected.append((key, result))
expected.sort()
got = mass_weightedIntersection([(x, w1), (y, w2)])
self.assertEqual(expected, list(got.items()))
got = mass_weightedIntersection([(y, w2), (x, w1)])
self.assertEqual(expected, list(got.items()))

def test_suite():
return makeSuite(TestSetOps)

if __name__=="__main__":
main(defaultTest='test_suite')

0 comments on commit 6aadf5a

Please sign in to comment.