In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score

# ----------------------------
# 1. Load Dataset
# ----------------------------
df = pd.read_csv('/content/Mall_Customers (1).csv')

# Select features
X = df[['Annual Income (k$)', 'Spending Score (1-100)']]

# ----------------------------
# 2. Scaling (Important for K-Means)
# ----------------------------
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# ----------------------------
# 3. Elbow Method + Silhouette Score
# ----------------------------
wcss = []
silhouette_scores = []

K_range = range(2, 11)

for k in K_range:
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(X_scaled)

    wcss.append(kmeans.inertia_)
    silhouette_scores.append(silhouette_score(X_scaled, kmeans.labels_))

# Plot Elbow
plt.figure()
plt.plot(K_range, wcss, marker='o')
plt.xlabel('Number of Clusters')
plt.ylabel('WCSS')
plt.title('Elbow Method')
plt.show()

# Plot Silhouette Scores
plt.figure()
plt.plot(K_range, silhouette_scores, marker='o')
plt.xlabel('Number of Clusters')
plt.ylabel('Silhouette Score')
plt.title('Silhouette Analysis')
plt.show()

# ----------------------------
# 4. Choose Optimal K
# ----------------------------
optimal_k = K_range[np.argmax(silhouette_scores)]
print("Optimal K based on Silhouette Score:", optimal_k)

# ----------------------------
# 5. Final Model
# ----------------------------
kmeans = KMeans(n_clusters=optimal_k, random_state=42)
df['Cluster'] = kmeans.fit_predict(X_scaled)

# ----------------------------
# 6. Visualization (Original Scale)
# ----------------------------
plt.figure()
plt.scatter(
    df['Annual Income (k$)'],
    df['Spending Score (1-100)'],
    c=df['Cluster']
)

centroids = scaler.inverse_transform(kmeans.cluster_centers_)

plt.scatter(
    centroids[:, 0],
    centroids[:, 1],
    s=200
)

plt.xlabel('Annual Income (k$)')
plt.ylabel('Spending Score')
plt.title('Customer Segmentation using K-Means')
plt.show()

FileNotFoundError: [Errno 2] No such file or directory: '/content/Mall_Customers (1).csv'