In [None]:
# Title: Customer Segmentation at LG Corporation

# 1. Import Required Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

In [None]:
# 2. Load the Dataset
df = pd.read_excel("LG_Customer_Segmentation_Dataset.xlsx")

In [None]:
# 3. Quick Look at the Data
print("First 5 Rows:")
print(df.head())

print("\nSummary Info:")
print(df.info())

In [None]:
# 4. Preprocess the Data
# Convert Gender to numeric
df['Gender'] = df['Gender'].map({'Male': 0, 'Female': 1})

# One-hot encode the ProductCategory column
df = pd.get_dummies(df, columns=['ProductCategory'])

# Drop CustomerID as it's not a useful feature for modeling
df_model = df.drop('CustomerID', axis=1)

In [None]:
# 5. Standardize the Features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(df_model)

In [None]:
# 6. Use Elbow Method to Find Optimal Number of Clusters
inertia = []
for k in range(1, 11):
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(scaled_features)
    inertia.append(kmeans.inertia_)

plt.figure(figsize=(8, 5))
plt.plot(range(1, 11), inertia, marker='o')
plt.title('Elbow Method to Determine Optimal k')
plt.xlabel('Number of clusters')
plt.ylabel('Inertia')
plt.grid(True)
plt.show()

In [None]:
# 7. Apply K-Means with Chosen Number of Clusters
kmeans = KMeans(n_clusters=4, random_state=42)
df['Cluster'] = kmeans.fit_predict(scaled_features)

In [None]:
# 8. Visualize Clusters using PCA
pca = PCA(n_components=2)
pca_components = pca.fit_transform(scaled_features)
df['PCA1'] = pca_components[:, 0]
df['PCA2'] = pca_components[:, 1]

plt.figure(figsize=(10, 6))
sns.scatterplot(data=df, x='PCA1', y='PCA2', hue='Cluster', palette='Set2')
plt.title('Customer Segmentation by PCA')
plt.xlabel('PCA Component 1')
plt.ylabel('PCA Component 2')
plt.grid(True)
plt.legend(title='Cluster')
plt.tight_layout()
plt.show()

In [None]:
# 9. Summary of Each Cluster
cluster_summary = df.groupby('Cluster').mean()
print("\nCluster Summary:")
print(cluster_summary)

In [None]:
# 10. Save the Dataset with Clusters
df.to_excel("LG_Customer_Segmentation_with_Clusters.xlsx", index=False)
print("\nSegmented dataset saved as 'LG_Customer_Segmentation_with_Clusters.xlsx'")