This repository has been archived by the owner on May 13, 2020. It is now read-only.
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Squash bug duplication by moving the clever mass-union and mass-
intersection gimmicks into their own functions with their own test suite. This turned up two bugs: 1. The mass weighted union gimmick was incorrect when passed a list with a single mapping. In that case, it neglected to multiply the mapping by the given weight. 2. The underlying weighted{Intersection, Union} code does something crazy if you pass it weights less than 0. I had vaguely hoped to be able to subtract scores by passing 1 and -1 as weights, but this doesn't work. It's hard to say exactly what it does then. The line weightedUnion(IIBTree(), mapping, 1, -2) seems to return a mapping with the same keys, but *all* of whose values are -2, regardless of the original mapping's values.
- Loading branch information
Tim Peters
committed
May 15, 2002
1 parent
d0e498d
commit 6aadf5a
Showing
2 changed files
with
146 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
############################################################################## | ||
# | ||
# Copyright (c) 2001, 2002 Zope Corporation and Contributors. | ||
# All Rights Reserved. | ||
# | ||
# This software is subject to the provisions of the Zope Public License, | ||
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution. | ||
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED | ||
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS | ||
# FOR A PARTICULAR PURPOSE | ||
# | ||
############################################################################## | ||
|
||
"""SetOps -- Weighted intersections and unions applied to many inputs.""" | ||
|
||
from BTrees.IIBTree import IIBTree, weightedIntersection, weightedUnion | ||
|
||
from Products.ZCTextIndex.NBest import NBest | ||
|
||
def mass_weightedIntersection(L): | ||
"A list of (mapping, weight) pairs -> their weightedIntersection IIBTree." | ||
if not L: | ||
return IIBTree() | ||
# Intersect with smallest first. | ||
L = L[:] # don't mutate the caller's L | ||
L.sort(lambda x, y: cmp(len(x[0]), len(y[0]))) | ||
x, w = L[0] | ||
dummy, result = weightedUnion(IIBTree(), x, 1, w) | ||
for x, w in L[1:]: | ||
dummy, result = weightedIntersection(result, x, 1, w) | ||
return result | ||
|
||
def mass_weightedUnion(L): | ||
"A list of (mapping, weight) pairs -> their weightedUnion IIBTree." | ||
if not L: | ||
return IIBTree() | ||
if len(L) == 1: | ||
# Have to do a union in order to get the input's values | ||
# multiplied by the weight. | ||
x, weight = L[0] | ||
dummy, result = weightedUnion(IIBTree(), x, 1, weight) | ||
return result | ||
# Balance unions as closely as possible, smallest to largest. | ||
assert len(L) > 1 | ||
merge = NBest(len(L)) | ||
for x, weight in L: | ||
merge.add((x, weight), len(x)) | ||
while len(merge) > 1: | ||
# Merge the two smallest so far, and add back to the queue. | ||
(x, wx), dummy = merge.pop_smallest() | ||
(y, wy), dummy = merge.pop_smallest() | ||
dummy, z = weightedUnion(x, y, wx, wy) | ||
merge.add((z, 1), len(z)) | ||
(result, weight), dummy = merge.pop_smallest() | ||
return result |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
############################################################################## | ||
# | ||
# Copyright (c) 2001, 2002 Zope Corporation and Contributors. | ||
# All Rights Reserved. | ||
# | ||
# This software is subject to the provisions of the Zope Public License, | ||
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution. | ||
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED | ||
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS | ||
# FOR A PARTICULAR PURPOSE. | ||
# | ||
############################################################################## | ||
|
||
from unittest import TestCase, TestSuite, main, makeSuite | ||
|
||
from BTrees.IIBTree import IIBTree, IIBucket | ||
|
||
from Products.ZCTextIndex.SetOps import mass_weightedIntersection | ||
from Products.ZCTextIndex.SetOps import mass_weightedUnion | ||
|
||
class TestSetOps(TestCase): | ||
|
||
def testEmptyLists(self): | ||
self.assertEqual(len(mass_weightedIntersection([])), 0) | ||
self.assertEqual(len(mass_weightedUnion([])), 0) | ||
|
||
def testIdentity(self): | ||
t = IIBTree([(1, 2)]) | ||
b = IIBucket([(1, 2)]) | ||
for x in t, b: | ||
for func in mass_weightedUnion, mass_weightedIntersection: | ||
result = func([(x, 1)]) | ||
self.assertEqual(len(result), 1) | ||
self.assertEqual(list(result.items()), list(x.items())) | ||
|
||
def testScalarMultiply(self): | ||
t = IIBTree([(1, 2), (2, 3), (3, 4)]) | ||
allkeys = [1, 2, 3] | ||
b = IIBucket(t) | ||
for x in t, b: | ||
self.assertEqual(list(x.keys()), allkeys) | ||
for func in mass_weightedUnion, mass_weightedIntersection: | ||
for factor in 0, 1, 5, 10: | ||
result = func([(x, factor)]) | ||
self.assertEqual(allkeys, list(result.keys())) | ||
for key in x.keys(): | ||
self.assertEqual(x[key] * factor, result[key]) | ||
|
||
def testPairs(self): | ||
# Construct a pair with one key in common. | ||
t1 = IIBTree([(1, 10), (3, 30), (7, 70)]) | ||
t2 = IIBTree([(3, 30), (5, 50), (7, 7), (9, 90)]) | ||
allkeys = [1, 3, 5, 7, 9] | ||
b1 = IIBucket(t1) | ||
b2 = IIBucket(t2) | ||
for x in t1, t2, b1, b2: | ||
for key in x.keys(): | ||
self.assertEqual(key in allkeys, 1) | ||
for y in t1, t2, b1, b2: | ||
for w1, w2 in (0, 0), (1, 10), (10, 1), (2, 3): | ||
# Test the union. | ||
expected = [] | ||
for key in allkeys: | ||
if x.has_key(key) or y.has_key(key): | ||
result = x.get(key, 0) * w1 + y.get(key, 0) * w2 | ||
expected.append((key, result)) | ||
expected.sort() | ||
got = mass_weightedUnion([(x, w1), (y, w2)]) | ||
self.assertEqual(expected, list(got.items())) | ||
got = mass_weightedUnion([(y, w2), (x, w1)]) | ||
self.assertEqual(expected, list(got.items())) | ||
|
||
# Test the intersection. | ||
expected = [] | ||
for key in allkeys: | ||
if x.has_key(key) and y.has_key(key): | ||
result = x.get(key, 0) * w1 + y.get(key, 0) * w2 | ||
expected.append((key, result)) | ||
expected.sort() | ||
got = mass_weightedIntersection([(x, w1), (y, w2)]) | ||
self.assertEqual(expected, list(got.items())) | ||
got = mass_weightedIntersection([(y, w2), (x, w1)]) | ||
self.assertEqual(expected, list(got.items())) | ||
|
||
def test_suite(): | ||
return makeSuite(TestSetOps) | ||
|
||
if __name__=="__main__": | ||
main(defaultTest='test_suite') |