In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import adjusted_rand_score, homogeneity_score, completeness_score
from sklearn.metrics.pairwise import euclidean_distances, manhattan_distances
import numpy as np

In [2]:
iris = load_iris()
iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
scaler = StandardScaler()
scaled_features = scaler.fit_transform(iris_df)

In [3]:
kmeans_default = KMeans(n_clusters=3, random_state=42, n_init='auto')
clusters_default = kmeans_default.fit_predict(scaled_features)



In [4]:
def manhattan_kmeans(data, n_clusters, random_state=42):
    kmeans = KMeans(n_clusters=n_clusters, random_state=random_state, init='random', n_init=1) #manual init, n_init=1
    kmeans.fit(data)
    distances = manhattan_distances(data, kmeans.cluster_centers_)
    clusters = np.argmin(distances, axis=1)
    return clusters, kmeans.cluster_centers_

In [8]:
clusters_manhattan, centroids_manhattan = manhattan_kmeans(scaled_features, 3)



In [9]:
kmeans_random_init = KMeans(n_clusters=3, random_state=42, init='random', n_init='auto')
clusters_random_init = kmeans_random_init.fit_predict(scaled_features)



In [10]:
def evaluate_clusters(true_labels, predicted_clusters, label):
    ari = adjusted_rand_score(true_labels, predicted_clusters)
    homogeneity = homogeneity_score(true_labels, predicted_clusters)
    completeness = completeness_score(true_labels, predicted_clusters)
    print(f"\nEvaluation Metrics for {label}:")
    print(f"  Adjusted Rand Index (ARI): {ari:.4f}")
    print(f"  Homogeneity Score: {homogeneity:.4f}")
    print(f"  Completeness Score: {completeness:.4f}")

In [11]:
evaluate_clusters(iris.target, clusters_default, "Default K-means")
evaluate_clusters(iris.target, clusters_manhattan, "Manhattan K-means")
evaluate_clusters(iris.target, clusters_random_init, "Random Initialization K-means")


Evaluation Metrics for Default K-means:
  Adjusted Rand Index (ARI): 0.4328
  Homogeneity Score: 0.5347
  Completeness Score: 0.6570

Evaluation Metrics for Manhattan K-means:
  Adjusted Rand Index (ARI): 0.6530
  Homogeneity Score: 0.6823
  Completeness Score: 0.6853

Evaluation Metrics for Random Initialization K-means:
  Adjusted Rand Index (ARI): 0.6101
  Homogeneity Score: 0.6524
  Completeness Score: 0.6527
