# COMP5331 Group 6 Project: Resilient k-Clustering

In [1]:
import numpy as np
import math
import copy

In [3]:
# https://dev.to/theramoliya/python-kruskals-algorithm-for-minimum-spanning-trees-2bmb
class Graph:
    def __init__(self, vertices, graph=None):
        self.V = vertices
        if graph is None:
            self.graph = []
        else:
            self.graph = copy.deepcopy(graph)
        
    def copy(self):
        return Graph(self.V, self.graph)

    def add_edge(self, u, v, w):
        self.graph.append([u, v, w])
    
    def remove_edge(self, u, v, w):
        self.graph.remove([u, v, w])

    def kruskal_mst(self):
        def find(parent, i):
            if parent[i] == i:
                return i
            return find(parent, parent[i])

        def union(parent, rank, x, y):
            x_root = find(parent, x)
            y_root = find(parent, y)

            if rank[x_root] < rank[y_root]:
                parent[x_root] = y_root
            elif rank[x_root] > rank[y_root]:
                parent[y_root] = x_root
            else:
                parent[y_root] = x_root
                rank[x_root] += 1

        result = []
        i = 0
        e = 0

        self.graph = sorted(self.graph, key=lambda item: item[2])
        parent = [i for i in range(self.V)]
        rank = [0] * self.V

        while e < self.V - 1:
            u, v, w = self.graph[i]
            i += 1
            x = find(parent, u)
            y = find(parent, v)

            if x != y:
                e += 1
                result.append([u, v, w])
                union(parent, rank, x, y)

        return result


In [4]:
class resilient_k_center():
    def __init__(self, dataset, k, epsilon, lamb=0.1):
        self.dataset = dataset
        self.k = k
        self.epsilon = epsilon
        self.lamb = lamb
        self.number_of_centers = int(2 * self.k * np.log(1 / self.epsilon))

    def resilient_k_center(self):
        # randomly assign centers (line 1)
        centers = self.dataset[np.random.choice(self.dataset.shape[0],
                                                self.number_of_centers,
                                                replace=False)]
        print(centers)

        # construct edges and weights (line 2-4)
        E = []
        w = {}
        for index_p, p in enumerate(self.dataset):
            for index_q, q in enumerate(self.dataset):
                if index_p < index_q:
                    if len(np.intersect1d([p, q], centers)) != 0:
                        E.append((index_p, index_q))

                        # Integrated line 1-7 of algorithm Resilient-MST in the weight update below
                        if (p in centers) and (q in centers):
                            w[(index_p, index_q)] = 0
                        else:
                            alpha = np.random.rand()
                            i = math.ceil(alpha + math.log(np.linalg.norm(p - q), self.lamb))
                            w[(index_p, index_q)] = self.lamb ** (i - alpha)
        print(E)
        print(w)
        
        # construct weighted graph (line 5)
        g = Graph(len(self.dataset))
        for edge in E:
            index_p, index_q = edge
            g.add_edge(index_p, index_q, w[(index_p, index_q)])
            g.add_edge(index_q, index_p, w[(index_p, index_q)])
        print("weighted graph: \n", g.graph)    

        # construct MST (line 6)
        T = g.kruskal_mst()
        print("resilient MST: \n", T)

        # assign clusters (line 7-8)
        cluster = []
        for index_p, p in enumerate(self.dataset):
            if p in centers:
                cluster.append((p, p))
            else:
                for edge in T:
                    index_u, index_v, _ = edge
                    if index_p == index_u and self.dataset[index_v] in centers:
                        cluster.append((p, self.dataset[index_v]))
                    elif index_p == index_v and self.dataset[index_u] in centers:
                        cluster.append((p, self.dataset[index_u]))

        # (line 9-11)
        # TODO

        print(cluster)
        return cluster
        

In [None]:
# class resilient_k_center():
#     def __init__(self, dataset, k, epsilon, lamb=0.1):
#         self.dataset = dataset
#         self.k = k
#         self.epsilon = epsilon
#         self.lamb = lamb
#         self.number_of_centers = int(2 * self.k * np.log(1 / self.epsilon))

#     def resilient_k_center(self):
#         # randomly assign centers (line 1)
#         centers = self.dataset[np.random.choice(self.dataset.shape[0],
#                                                 self.number_of_centers,
#                                                 replace=False)]
#         print("Random Centers:", centers)

#         # construct edges and weights (line 2-4)
#         E = []
#         w = {}
#         for index_p, p in enumerate(self.dataset):
#             for index_q, q in enumerate(self.dataset):
#                 if index_p < index_q:
#                     if len(np.intersect1d([p, q], centers)) != 0:
#                         E.append((index_p, index_q))
#                     if (p in centers) and (q in centers):
#                         w[(index_p, index_q)] = 0
#                     else:
#                         alpha = np.random.rand()
#                         i = math.ceil(alpha + math.log(np.linalg.norm(p - q), self.lamb))
#                         w[(index_p, index_q)] = self.lamb ** (i - alpha)
        
#         # construct weighted graph (line 5)
#         g = Graph(len(self.dataset))
#         for edge in E:
#             index_p, index_q = edge
#             g.add_edge(index_p, index_q, w[(index_p, index_q)])
#             g.add_edge(index_q, index_p, w[(index_p, index_q)])

#         # construct MST (line 6)
#         T = g.kruskal_mst()

#         # assign clusters (line 7-8)
#         cluster = []
#         for index_p, p in enumerate(self.dataset):
#             if p in centers:
#                 cluster.append((p, p))
#             else:
#                 for edge in T:
#                     index_u, index_v, _ = edge
#                     if index_p == index_u and self.dataset[index_v] in centers:
#                         cluster.append((p, self.dataset[index_v]))
#                     elif index_p == index_v and self.dataset[index_u] in centers:
#                         cluster.append((p, self.dataset[index_u]))

#         # (line 9-11)
#         # Identify the vertices in P \ C incident to the εn heaviest edges of T
#         n = len(self.dataset)
#         heaviest_edges = sorted(T, key=lambda item: item[2], reverse=True)[:int(self.epsilon * n)]
#         L = set()
#         for edge in heaviest_edges:
#             L.add(edge[0])
#             L.add(edge[1])

#         # Select centers using [27] on P with k
#         C_prime = self.dataset[np.random.choice(list(L), self.k, replace=False)]

#         # Assign the closest center of C' to each point in L
#         for p in L:
#             min_dist = float('inf')
#             closest_center = None
#             for c in C_prime:
#                 dist = np.linalg.norm(p - c)
#                 if dist < min_dist:
#                     min_dist = dist
#                     closest_center = c
#             cluster.append((p, closest_center))

#         print("Final Clusters:", cluster)
#         return cluster

In [5]:
test = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]])
k = 1
epilson = 0.3
model = resilient_k_center(test, k, epilson)
cluster = model.resilient_k_center()

[[7 8]
 [1 2]]
[(0, 1), (0, 2), (0, 3), (0, 4), (1, 3), (2, 3), (3, 4)]
{(0, 1): 1.0842375655621663, (0, 2): 0.5752409154347412, (0, 3): 0, (0, 4): 7.39503661152927, (1, 3): 2.4349454195736207, (2, 3): 0.7720691575952507, (3, 4): 0.6274459048921588}
weighted graph: 
 [[0, 1, 1.0842375655621663], [1, 0, 1.0842375655621663], [0, 2, 0.5752409154347412], [2, 0, 0.5752409154347412], [0, 3, 0], [3, 0, 0], [0, 4, 7.39503661152927], [4, 0, 7.39503661152927], [1, 3, 2.4349454195736207], [3, 1, 2.4349454195736207], [2, 3, 0.7720691575952507], [3, 2, 0.7720691575952507], [3, 4, 0.6274459048921588], [4, 3, 0.6274459048921588]]
resilient MST: 
 [[0, 3, 0], [0, 2, 0.5752409154347412], [3, 4, 0.6274459048921588], [0, 1, 1.0842375655621663]]
[(array([1, 2]), array([1, 2])), (array([3, 4]), array([1, 2])), (array([5, 6]), array([1, 2])), (array([7, 8]), array([7, 8])), (array([ 9, 10]), array([7, 8]))]


## Evalutaion