# COMP5331 Group 6 Project: Resilient k-Clustering

In [9]:
import numpy as np
import math

In [None]:
# https://dev.to/theramoliya/python-kruskals-algorithm-for-minimum-spanning-trees-2bmb
class Graph:
    def __init__(self, vertices):
        self.V = vertices
        self.graph = []

    def add_edge(self, u, v, w):
        self.graph.append([u, v, w])
    
    def remove_edge(self, u, v, w):
        self.graph.remove([u, v, w])

    def kruskal_mst(self):
        def find(parent, i):
            if parent[i] == i:
                return i
            return find(parent, parent[i])

        def union(parent, rank, x, y):
            x_root = find(parent, x)
            y_root = find(parent, y)

            if rank[x_root] < rank[y_root]:
                parent[x_root] = y_root
            elif rank[x_root] > rank[y_root]:
                parent[y_root] = x_root
            else:
                parent[y_root] = x_root
                rank[x_root] += 1

        result = []
        i = 0
        e = 0

        self.graph = sorted(self.graph, key=lambda item: item[2])
        parent = [i for i in range(self.V)]
        rank = [0] * self.V

        while e < self.V - 1:
            u, v, w = self.graph[i]
            i += 1
            x = find(parent, u)
            y = find(parent, v)

            if x != y:
                e += 1
                result.append([u, v, w])
                union(parent, rank, x, y)

        return result


In [None]:
class resilient_k_center():
    def __init__(self, dataset, k, epsilon, lamb=0.1):
        self.dataset = dataset
        self.k = k
        self.epsilon = epsilon
        self.lamb = lamb
        self.number_of_centers = int(2 * self.k * np.log(1 / self.epsilon))

    def resilient_k_center(self):
        # randomly assign centers (line 1)
        centers = self.dataset[np.random.choice(self.dataset.shape[0],
                                                self.number_of_centers,
                                                replace=False)]
        print(centers)

        # construct edges and weights (line 2-4)
        E = []
        w = {}
        for index_p, p in enumerate(self.dataset):
            for index_q, q in enumerate(self.dataset):
                if index_p < index_q:
                    if len(np.intersect1d([p, q], centers)) != 0:
                        E.append((index_p, index_q))
                    if (p in centers) and (q in centers):
                        w[(index_p, index_q)] = 0
                    else:
                        alpha = np.random.rand()
                        i = math.ceil(alpha + math.log(np.linalg.norm(p - q), self.lamb))
                        w[(index_p, index_q)] = self.lamb ** (i - alpha)
        print(E)
        print(w)
        
        # construct weighted graph (line 5)
        g = Graph(len(self.dataset))
        for edge in E:
            index_p, index_q = edge
            g.add_edge(index_p, index_q, w[(index_p, index_q)])
            g.add_edge(index_q, index_p, w[(index_p, index_q)])
        print(g.graph)    

        # construct MST (line 6)
        T = g.kruskal_mst()
        print(T)

        # assign clusters (line 7-8)
        cluster = []
        for index_p, p in enumerate(self.dataset):
            if p in centers:
                cluster.append((p, p))
        #     else:
        #         for c in centers:
        #             if p in T[c]:
        #                 cluster[index_p] = c
        #             break

        # (line 9-11)
        # TODO

        # print(cluster)
        return cluster
        

In [49]:
test = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]])
k = 1
epilson = 0.3
model = resilient_k_center(test, k, epilson)
cluster = model.resilient_k_center()

[[5 6]
 [1 2]]
[(0, 1), (0, 2), (0, 3), (0, 4), (1, 2), (2, 3), (2, 4)]
{(0, 1): 1.009710164223724, (0, 2): 0, (0, 3): 7.42961412118717, (0, 4): 9.933503899362243, (1, 2): 0.9277562474936696, (1, 3): 5.154571545931092, (1, 4): 3.079539843060638, (2, 3): 0.5901621113601702, (2, 4): 3.934687437433978, (3, 4): 0.7260155046611325}
[[0, 1, 1.009710164223724], [1, 0, 1.009710164223724], [0, 2, 0], [2, 0, 0], [0, 3, 7.42961412118717], [3, 0, 7.42961412118717], [0, 4, 9.933503899362243], [4, 0, 9.933503899362243], [1, 2, 0.9277562474936696], [2, 1, 0.9277562474936696], [2, 3, 0.5901621113601702], [3, 2, 0.5901621113601702], [2, 4, 3.934687437433978], [4, 2, 3.934687437433978]]
[[0, 2, 0], [2, 3, 0.5901621113601702], [1, 2, 0.9277562474936696], [2, 4, 3.934687437433978]]


## Evalutaion