Skip to content

Commit

Permalink
fix canopy index
Browse files Browse the repository at this point in the history
  • Loading branch information
GreatYYX committed Nov 25, 2019
1 parent 62abc8c commit 8ddb3d2
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 6 deletions.
6 changes: 3 additions & 3 deletions rltk/blocking/canopy_block_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,17 +82,17 @@ def generate(self, block1: Block, block2: Block, output_block: Block = None):

clusters = self._run_canopy_clustering(dataset, self._t1, self._t2, self._distance_metric)

for c in clusters:
for cid, c in enumerate(clusters):
for vec in c:
key = self._encode_key(vec)
set_ = block1.get(key)
if set_:
for ds_id, rid in set_:
output_block.add(key, ds_id, rid)
output_block.add(cid, ds_id, rid)
set_ = block2.get(key)
if set_:
for ds_id, rid in set_:
output_block.add(key, ds_id, rid)
output_block.add(cid, ds_id, rid)
return output_block

@staticmethod
Expand Down
14 changes: 11 additions & 3 deletions rltk/tests/test_blocking.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pytest
import random

from rltk.record import Record
from rltk.dataset import Dataset
Expand Down Expand Up @@ -77,12 +78,19 @@ def test_token_block_generator():


def test_canopy_block_generator():
random.seed(0)
bg = CanopyBlockGenerator(t1=5, t2=1, distance_metric=lambda x, y: abs(x[0] - y[0]))
block = bg.block(ds, function_=lambda r: [ord(r.name[0].lower()) - 0x61])
output_block = bg.generate(block, block)
for k, _ in output_block.key_set_adapter:
assert k in ('[1]', '[2]', '[0]', '[15]')

result = [
['4', '5'],
['1', '2', '3', '6'],
['2', '6'],
['6']
]
for k, v in output_block.key_set_adapter:
ids = [r[1] for r in v]
assert sorted(ids) == sorted(result[k])

def test_sorted_neighbourhood_block_generator():
class SNConcreteRecord1(Record):
Expand Down

0 comments on commit 8ddb3d2

Please sign in to comment.