**Create a K-means clustering algorithm to group customers of a retail store based on their purchase history.**

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

In [2]:
df = pd.read_csv("Mall_Customers.csv")
print(df.head())

   CustomerID  Gender  Age  Annual Income (k$)  Spending Score (1-100)
0           1    Male   19                  15                      39
1           2    Male   21                  15                      81
2           3  Female   20                  16                       6
3           4  Female   23                  16                      77
4           5  Female   31                  17                      40


In [3]:
X = df[['Annual Income (k$)', 'Spending Score (1-100)']]

In [4]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
inertia = []
k_range = range(1, 11)

In [5]:
for k in k_range:
  km = KMeans(n_clusters=k, random_state=42)
  km.fit(X_scaled)
  inertia.append(km.inertia_)

In [6]:
optimal_k = 5
kmeans = KMeans(n_clusters=optimal_k, random_state=42)
clusters = kmeans.fit_predict(X_scaled)
df['Cluster'] = clusters


In [7]:
kmeans = KMeans(n_clusters=k, random_state=42)
kmeans.fit(X_scaled)


In [8]:
cluster_labels = kmeans.predict(X_scaled)
df['Cluster'] = cluster_labels
print(df.head())

   CustomerID  Gender  Age  Annual Income (k$)  Spending Score (1-100)  \
0           1    Male   19                  15                      39   
1           2    Male   21                  15                      81   
2           3  Female   20                  16                       6   
3           4  Female   23                  16                      77   
4           5  Female   31                  17                      40   

   Cluster  
0        8  
1        3  
2        4  
3        3  
4        8  


In [9]:
print(df)

     CustomerID  Gender  Age  Annual Income (k$)  Spending Score (1-100)  \
0             1    Male   19                  15                      39   
1             2    Male   21                  15                      81   
2             3  Female   20                  16                       6   
3             4  Female   23                  16                      77   
4             5  Female   31                  17                      40   
..          ...     ...  ...                 ...                     ...   
195         196  Female   35                 120                      79   
196         197  Female   45                 126                      28   
197         198    Male   32                 126                      74   
198         199    Male   32                 137                      18   
199         200    Male   30                 137                      83   

     Cluster  
0          8  
1          3  
2          4  
3          3  
4          8

In [10]:
print("Cluster Centers (scaled):")
print(kmeans.cluster_centers_)

Cluster Centers (scaled):
[[ 0.07722948 -0.11290604]
 [ 1.39436395  1.39334111]
 [ 0.73205646 -1.42897101]
 [-1.35921337  1.19570407]
 [-1.37320883 -1.57680239]
 [ 0.62597864  1.18793976]
 [-0.55987331  0.08456805]
 [ 1.87564575 -1.09476801]
 [-1.23585672 -0.65714315]
 [ 2.56142316  1.10512043]]


In [11]:
original_centers = scaler.inverse_transform(kmeans.cluster_centers_)
print("Cluster Centers (original scale):")
print(original_centers)

Cluster Centers (original scale):
[[ 62.58333333  47.29166667]
 [ 97.09090909  86.09090909]
 [ 79.73913043  13.39130435]
 [ 24.95        81.        ]
 [ 24.58333333   9.58333333]
 [ 76.96        80.8       ]
 [ 45.89189189  52.37837838]
 [109.7         22.        ]
 [ 28.18181818  33.27272727]
 [127.66666667  78.66666667]]


In [12]:

# Add cluster labels to DataFrame
df['Cluster'] = clusters

# Save the new DataFrame with clusters to a CSV file
df.to_csv("Mall_Customers_with_Clusters.csv", index=False)