# 🧠 Customer Segmentation using K-Means Clustering

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Cargar datos
df = pd.read_csv('../data/customers.csv')
df.head()


In [None]:
df.describe()


In [None]:
sns.pairplot(df[['AnnualIncome', 'SpendingScore', 'Age']], diag_kind='kde')
plt.show()


In [None]:
from sklearn.preprocessing import StandardScaler

features = ['Age', 'AnnualIncome', 'SpendingScore', 'Recency', 'Frequency', 'TotalSpent']
X = df[features]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [None]:
from sklearn.cluster import KMeans

wcss = []
for k in range(2, 11):
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(X_scaled)
    wcss.append(kmeans.inertia_)

plt.figure(figsize=(8, 4))
plt.plot(range(2, 11), wcss, marker='o')
plt.title('Elbow Method for Optimal k')
plt.xlabel('Number of Clusters')
plt.ylabel('WCSS')
plt.grid(True)
plt.show()


In [None]:
optimal_k = 4
kmeans = KMeans(n_clusters=optimal_k, random_state=42)
clusters = kmeans.fit_predict(X_scaled)
df['Cluster'] = clusters
df.head()


In [None]:
import seaborn as sns

sns.scatterplot(x='AnnualIncome', y='SpendingScore', hue='Cluster', data=df, palette='Set2')
plt.title('Customer Segments by Income vs Spending Score')
plt.show()


In [None]:
df.to_csv('../outputs/clustered_customers.csv', index=False)
print("File saved to ../outputs/clustered_customers.csv")
