In [1]:
import numpy as np
from operator import itemgetter

In [2]:
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
def similarity(cls, link="single"):
    scores = []
    
    for i, cl_i in enumerate(cls):
        for j, cl_j in enumerate(cls):
            if i >= j:
                continue

            sim = cosine_similarity(cl_i, cl_j)
            
            if link == 'complete':
                sim = sim.min()
            elif link == 'single':
                sim = sim.max()
            else:
                raise NotImplemented()

            scores.append(((i, j), sim))
    
    scores.sort(key=itemgetter(1), reverse=True)
    return scores

In [4]:
def print_sims(sims, limit=3):
    for i, (cls, score) in enumerate(sims):
        if i >= limit:
            break
        print(*cls, score, sep='\t')

In [5]:
def merge(cls, link='single'):
    i = 0
    while len(cls) > 1:
        i += 1
        pair, score = similarity(cls, link)[0]
        print(i, *pair, score, sep='\t')
        cls[pair[0]].extend(cls[pair[1]])
        del cls[pair[1]]

In [6]:
a = np.asarray([0.6, 1.9])
b = np.asarray([1.8, 1.6])
c = np.asarray([2.7, 2.0])
d = np.asarray([3.0, 2.1])
e = np.asarray([3.0, 2.6])
f = np.asarray([3.1, 4.5])
g = np.asarray([3.8, 0.6])
h = np.asarray([4.2, 2.7])

In [7]:
cluster = [[a], [b], [c], [d], [e], [f], [g], [h]]
merge(cluster, 'complete')

1	1	4	0.9999212291748408
2	2	3	0.9996402893621831
3	2	5	0.9978088313354032
4	1	2	0.9879644204140065
5	0	2	0.9561159747598555
6	1	2	0.8418791389638738
7	0	1	0.44616961942071937


```
iter 0: [[a], [b], [c], [d], [e], [f], [g], [h]]
iter 1: [[a], [b, e], [c], [d], [f], [g], [h]]
iter 2: [[a], [b, e], [c, d], [f], [g], [h]]
iter 3: [[a], [b, e], [c, d, h], [f], [g]]
iter 4: [[a], [b, e, c, d, h], [f], [g]]
iter 5: [[a, f], [b, e, c, d, h], [g]]
iter 6: [[a, f], [b, e, c, d, h, g]]
iter 7: [[a, f, b, e, c, d, h, g]]
```

In [8]:
cluster = [[a], [b], [c], [d], [e], [f], [g], [h]]
merge(cluster, 'single')

1	1	4	0.9999212291748408
2	2	3	0.9996402893621831
3	2	5	0.9992243739289179
4	1	2	0.9970720986202984
5	1	2	0.9711178912251817
6	0	1	0.9561159747598555
7	0	1	0.9152228907752498


```
iter 0: [[a], [b], [c], [d], [e], [f], [g], [h]]
iter 1: [[a], [b, e], [c], [d], [f], [g], [h]]
iter 2: [[a], [b, e], [c, d], [f], [g], [h]]
iter 3: [[a], [b, e], [c, d, h], [f], [g]]
iter 4: [[a], [b, e, c, d, h], [f], [g]]
iter 5: [[a], [b, e, c, d, h, f], [g]]
iter 6: [[a, b, e, c, d, h, f], [g]]
iter 7: [[a, b, e, c, d, h, f, g]]
```