In [1]:
# K-Means Clustering on IRIS Dataset (K = 3)
# -------------------------------------------
# - Uses Euclidean distance
# - Random initialization from existing data points
# - Runs for at least 10 iterations
# - Prints final cluster means

import pandas as pd
import numpy as np
import os

In [2]:
df = pd.read_csv("IRIS.csv")

In [3]:
X = df.select_dtypes(include='number').to_numpy().astype(float)

In [4]:
# Euclidean distance
def euclidean_distances(X, centers):
    diff = X[:, None, :] - centers[None, :, :]
    return np.linalg.norm(diff, axis=2)


In [5]:
# K-means implementation
def kmeans(X, k, iterations=10, seed=None):
    rng = np.random.default_rng(seed)
    n, d = X.shape

    # initialize random centers
    init_idx = rng.choice(n, size=k, replace=False)
    centers = X[init_idx].copy()

    # iterate
    for _ in range(iterations):
        dists = euclidean_distances(X, centers)
        labels = np.argmin(dists, axis=1)

        new_centers = np.zeros_like(centers)
        for j in range(k):
            cluster_points = X[labels == j]
            if len(cluster_points) == 0:
                new_centers[j] = X[rng.integers(0, n)]
            else:
                new_centers[j] = cluster_points.mean(axis=0)

        centers = new_centers

    return centers, labels


In [6]:
# Run K = 3
centers_k3, labels_k3 = kmeans(X, k=3, iterations=10)

# Print final centers
print("\nFinal Cluster Means for K = 3:")
cols = df.select_dtypes(include='number').columns
print(pd.DataFrame(centers_k3, columns=cols))


Final Cluster Means for K = 3:
   sepal_length  sepal_width  petal_length  petal_width
0      5.006000     3.418000      1.464000     0.244000
1      6.853846     3.076923      5.715385     2.053846
2      5.883607     2.740984      4.388525     1.434426


In [7]:
# Run K = 4
centers_k4, labels_k4 = kmeans(X, k=4, iterations=10)

# Print final centers
print("\nFinal Cluster Means for K = 4:")
cols = df.select_dtypes(include='number').columns
print(pd.DataFrame(centers_k4, columns=cols))


Final Cluster Means for K = 4:
   sepal_length  sepal_width  petal_length  petal_width
0      6.965517     3.110345      5.879310     2.165517
1      6.262791     2.865116      4.865116     1.637209
2      5.532143     2.635714      3.960714     1.228571
3      5.006000     3.418000      1.464000     0.244000
