In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler

from kmodes.kprototypes import KPrototypes
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
import seaborn as sns

from warnings import filterwarnings
filterwarnings('ignore')

In [None]:
df = pd.read_csv("../assets/clean_data.csv", sep=",")
df.head()

In [None]:
# Get the position of categorical columns
catColumnsPos = [1]

In [None]:
num_cols = ['Contacts_Count_12_mon','Credit_Limit','Total_Revolving_Bal', 'Avg_Utilization_Ratio']

In [None]:
scaled_col = StandardScaler().fit_transform(df[num_cols])
df[num_cols]=scaled_col

In [None]:
# Convert dataframe to matrix
dfMatrix = df.to_numpy()

In [None]:
#Elbow method to detect number of K

cost = []
for cluster in range(1, 10):
    try:
        kprototype = KPrototypes(n_jobs = -1, n_clusters = cluster, init = 'Huang', random_state = 0)
        kprototype.fit_predict(dfMatrix, categorical = catColumnsPos)
        cost.append(kprototype.cost_)
        print('Cluster initiation: {}'.format(cluster))
    except:
        break

plt.plot(cost)
plt.xlabel('K')
plt.ylabel('cost')
plt.show

In [None]:
# cost (sum distance): confirm visual clue of elbow plot
# KneeLocator class will detect elbows if curve is convex; if concave, will detect knees
from kneed import KneeLocator
cost_knee_c3 = KneeLocator(
        x=range(1,10), 
        y=cost, 
        S=0.1, curve="convex", direction="decreasing", online=True)

K_cost_c3 = cost_knee_c3.elbow   
print("elbow at k =", f'{K_cost_c3:.0f} clusters')

In [None]:
#build the Kprototype model with 4 clusters
kprototype = KPrototypes(n_jobs = -1, n_clusters = cluster, init = 'Huang', random_state = 0)
df['clusters']= kprototype.fit_predict(dfMatrix, categorical = catColumnsPos)

In [None]:
#predict the labels of clusters.
label = kprototype.fit_predict(dfMatrix, categorical = catColumnsPos)
print(label)

In [None]:
df.groupby('clusters').agg(['median' ,'mean']).T