In [1]:
# Import necessary libraries
import numpy as np
import random

In [4]:
# Define KCluster
class Cluster:
    def __init__(self):
        self.elements = [] # Initially, cluster has no points inside
        self.head = None 

In [17]:
def distance(point1, point2):
    # This function is to calculate the distance between 2 points
    distance_x_y = np.linalg.norm(np.array(point1) - np.array(point2))
    return distance_x_y

def initialize_clusters(data, seed = None):
    # Since we need to initialize the first cluster, there must be someone who does that first
    cluster = Cluster() # call class Cluster
    cluster.elements = data.tolist() # All data now become the point of cluster 1
    if seed is not None:
        random.seed(seed)
    cluster.head = random.choice(cluster.elements)
    return [cluster]

def expand_clusters(clusters, j):
    # At this function, we will perform expansion

    # We will find the point with maximal distance to the head
    max_distance = -1
    v_i = None 

    for i in range(j - 1):
        current_cluster = clusters[i]

        for point in current_cluster.elements:
            dist = distance(point, current_cluster.head)
            if dist > max_distance:
                max_distance = dist
                v_i = point
    
    # Create new cluster B_(j + 1)
    new_cluster = Cluster()
    new_cluster.head = v_i 
    new_cluster.elements = []
    
    # Move elements to the new cluster
    
    for i in range(j - 1):
        current_cluster = clusters[i]

        for point in current_cluster.elements:
            if distance(point, v_i) <= distance(point, current_cluster.head):
                new_cluster.elements.append(point)
        
        # Delete the elements that was appended to new cluster
        current_cluster.elements = [element for element in current_cluster.elements if element not in new_cluster.elements]

    # Add this new cluster to a list of cluster
    clusters.append(new_cluster)

    return clusters

def get_heads(clusters):
    # Give me the list of current clusters head
    heads = []
    for cluster in clusters:
        heads.append(cluster.head)
    
    return heads


In [22]:
data = np.array([[1, 2], [2, 3], [5, 5], [8, 8], [1, 0], [0, 1]])  
k = 3  # Number of clusters
seed = 5331
clusters = initialize_clusters(data, seed)
for k in range(2, k + 1): # note that it should be range(2, k), we start from 2 because we already initialize a cluster
    clusters = expand_clusters(clusters, k)

# Get the heads of the clusters
heads = get_heads(clusters)

# Print resulting cluster heads
for i, head in enumerate(heads):
    print(f"Head of Cluster B_{i + 1}: {head}")

# Print resulting cluster
for i in range(k):
    current_cluster = clusters[i]
    print(f"Head of Cluster B_{i + 1}: {current_cluster.head}")
    print(f"Cluster elements: {current_cluster.elements}")

Head of Cluster B_1: [1, 0]
Head of Cluster B_2: [8, 8]
Head of Cluster B_3: [5, 5]
Head of Cluster B_1: [1, 0]
Cluster elements: [[1, 2], [2, 3], [1, 0], [0, 1]]
Head of Cluster B_2: [8, 8]
Cluster elements: [[8, 8]]
Head of Cluster B_3: [5, 5]
Cluster elements: [[5, 5]]
