# Import necessary libraries

In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans
from sklearn.metrics import davies_bouldin_score, silhouette_score
import matplotlib.pyplot as plt
import seaborn as sns

# Load datasets

In [None]:
customers = pd.read_csv('Customers.csv')
transactions = pd.read_csv('Transactions.csv')
products = pd.read_csv('Products.csv')

# Merge datasets for analysis

In [None]:
merged_data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')


# Feature Engineering: Aggregate data by CustomerID

In [None]:
customer_features = merged_data.groupby('CustomerID').agg(
    TotalSpending=('TotalValue', 'sum'),
    AvgTransactionValue=('TotalValue', 'mean'),
    NumTransactions=('TransactionID', 'count'),
    Region=('Region', 'first')  # Take the first region for each customer
).reset_index()

# Normalize numerical features

In [None]:
scaler = MinMaxScaler()
numerical_features = ['TotalSpending', 'AvgTransactionValue', 'NumTransactions']
customer_features[numerical_features] = scaler.fit_transform(customer_features[numerical_features])

# Encode categorical features (Region)

In [None]:
customer_features = pd.get_dummies(customer_features, columns=['Region'], drop_first=True)

# Prepare data for clustering
X = customer_features.drop(['CustomerID'], axis=1)

# Apply K-Means Clustering

In [None]:
kmeans = KMeans(n_clusters=4, random_state=42)  # You can change n_clusters to experiment
kmeans.fit(X)
customer_features['Cluster'] = kmeans.labels_


# Evaluate Clustering: Davies-Bouldin Index and Silhouette Score

In [None]:
db_index = davies_bouldin_score(X, kmeans.labels_)
silhouette_avg = silhouette_score(X, kmeans.labels_)

print(f"Davies-Bouldin Index: {db_index}")
print(f"Silhouette Score: {silhouette_avg}")

# Visualize Clusters using PCA (Dimensionality Reduction)

In [None]:
from sklearn.decomposition import PCA

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)
customer_features['PCA1'] = X_pca[:, 0]
customer_features['PCA2'] = X_pca[:, 1]

plt.figure(figsize=(10, 6))
sns.scatterplot(
    x='PCA1', y='PCA2', hue='Cluster', data=customer_features,
    palette='viridis', s=100
)
plt.title('Customer Segmentation (PCA Visualization)')
plt.xlabel('PCA Component 1')
plt.ylabel('PCA Component 2')
plt.legend(title='Cluster')
plt.show()