# K-Means Clustering and Principal Component Analysis from Scratch
This notebook implements K-Means and PCA using only NumPy, Pandas, and Matplotlib (no ML libraries).

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris

# Load iris dataset and prepare it
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
species = iris.target

# Visualize original dataset
df.head()

In [None]:
# K-Means Clustering Implementation
def k_means(X, k=3, max_iters=100):
    np.random.seed(42)
    centroids = X[np.random.choice(range(X.shape[0]), k, replace=False)]

    for _ in range(max_iters):
        distances = np.linalg.norm(X[:, np.newaxis] - centroids, axis=2)
        labels = np.argmin(distances, axis=1)
        new_centroids = np.array([X[labels == j].mean(axis=0) for j in range(k)])
        if np.allclose(centroids, new_centroids):
            break
        centroids = new_centroids
    return labels, centroids

# Run K-Means
X = df.values
kmeans_labels, kmeans_centroids = k_means(X, k=3)

In [None]:
# Principal Component Analysis (PCA) Implementation
def pca(X, n_components=2):
    X_centered = X - X.mean(axis=0)
    covariance_matrix = np.cov(X_centered, rowvar=False)
    eigenvalues, eigenvectors = np.linalg.eigh(covariance_matrix)
    sorted_idx = np.argsort(eigenvalues)[::-1]
    eigenvectors = eigenvectors[:, sorted_idx[:n_components]]
    return X_centered @ eigenvectors

# Apply PCA
X_pca = pca(X, n_components=2)

In [None]:
# Plot PCA results with K-Means clusters
plt.figure(figsize=(8,6))
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=kmeans_labels, cmap='viridis', label='K-Means clusters')
plt.title('PCA projection with K-Means Clustering')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.grid(True)
plt.legend()
plt.show()