In [1]:
!pip install pycaret &> /dev/null
print ("Pycaret installed sucessfully!!")

Pycaret installed sucessfully!!


In [2]:
from pycaret.utils import version
version()

'3.2.0'

In [3]:
import pandas as pd
import numpy as np
from sklearn import datasets
from pycaret.clustering import *

from pycaret.datasets import get_data
wholesale = get_data("wholesale") 

Unnamed: 0,Channel,Region,Fresh,Milk,Grocery,Frozen,Detergents_Paper,Delicassen
0,2,3,12669,9656,7561,214,2674,1338
1,2,3,7057,9810,9568,1762,3293,1776
2,2,3,6353,8808,7684,2405,3516,7844
3,1,3,13265,1196,4221,6404,507,1788
4,2,3,22615,5410,7198,3915,1777,5185


# **1) Using k means Clustering**

# Model Performance for "no data processing"

In [4]:
kMeanClusteringParameters = setup(data = wholesale, verbose=False)
results = []

# Define different values of clusters (c=3, 4, 5)
num_clusters_values = [3, 4, 5]
for num_clusters in num_clusters_values:
    # Create and evaluate KMeans clustering model
    kmeans = create_model('kmeans', num_clusters=num_clusters, verbose=False)
    metrics = pull()
    metrics = metrics.drop(['Homogeneity', 'Rand Index', 'Completeness'], axis=1)
    results.append(metrics)
no_data_processing = pd.concat(results, axis=0)
no_data_processing = no_data_processing.T
columnname=['C=3','C=4','C=5']
no_data_processing.columns=columnname
print("No Data Processing\n",no_data_processing)
   

No Data Processing
                         C=3       C=4       C=5
Silhouette           0.4764    0.3997    0.3737
Calinski-Harabasz  210.1005  207.8159  214.4368
Davies-Bouldin       0.8935    0.9014    0.9100


# Model Performance using "Normalization"

In [5]:
kMeanClusteringParameters = setup(data = wholesale,normalize = True, normalize_method = 'zscore', verbose=False)

results = []

num_clusters_values = [3, 4, 5]
for num_clusters in num_clusters_values:
    kmeans = create_model('kmeans', num_clusters=num_clusters, verbose=False)
    metrics = pull()
    metrics = metrics.drop(['Homogeneity', 'Rand Index', 'Completeness'], axis=1)
    results.append(metrics)
norm = pd.concat(results, axis=0)
norm = norm.T
columnname=['C=3','C=4','C=5']
norm.columns=columnname
print("Using normalization\n", norm)

Using normalization
                         C=3       C=4       C=5
Silhouette           0.3568    0.3482    0.3572
Calinski-Harabasz  139.3494  130.9179  136.7115
Davies-Bouldin       1.1736    1.2359    1.1506


# Model Performance using "Transformation"

In [6]:
kMeanClusteringParameters = setup(data = wholesale,transformation = True, transformation_method = 'yeo-johnson', verbose=False)

results = []

num_clusters_values = [3, 4, 5]
for num_clusters in num_clusters_values:
    kmeans = create_model('kmeans', num_clusters=num_clusters, verbose=False)
    metrics = pull()
    metrics = metrics.drop(['Homogeneity', 'Rand Index', 'Completeness'], axis=1)
    results.append(metrics)
trans = pd.concat(results, axis=0)
trans = trans.T
columnname=['C=3','C=4','C=5']
trans.columns=columnname
print("Using Transformation\n", trans)

Using Transformation
                           C=3         C=4         C=5
Silhouette             0.8554      0.5181      0.4620
Calinski-Harabasz  13039.7633  15162.6315  14221.6229
Davies-Bouldin         0.3389      0.6203      0.7017


# Model Performance using "PCA"

In [7]:
kMeanClusteringParameters = setup(data = wholesale,pca = True, pca_method = 'linear', verbose=False)

results = []

num_clusters_values = [3, 4, 5]
for num_clusters in num_clusters_values:
    kmeans = create_model('kmeans', num_clusters=num_clusters, verbose=False)
    metrics = pull()
    metrics = metrics.drop(['Homogeneity', 'Rand Index', 'Completeness'], axis=1)
    results.append(metrics)
pca = pd.concat(results, axis=0)
pca = pca.T
columnname=['C=3','C=4','C=5']
pca.columns=columnname
print("Using pca\n", pca)

Using pca
                         C=3       C=4       C=5
Silhouette           0.4770    0.3866    0.3698
Calinski-Harabasz  210.1456  207.8197  214.5078
Davies-Bouldin       0.8905    0.9543    0.9102


# Model Performance using "Transformation" + "Normalization"

In [8]:
kMeanClusteringParameters = setup(data = wholesale,transformation = True, normalize = True,
      normalize_method = 'zscore', transformation_method = 'yeo-johnson', verbose=False)

results = []

num_clusters_values = [3, 4, 5]
for num_clusters in num_clusters_values:
    kmeans = create_model('kmeans', num_clusters=num_clusters, verbose=False)
    metrics = pull()
    metrics = metrics.drop(['Homogeneity', 'Rand Index', 'Completeness'], axis=1)
    results.append(metrics)
trans_norm = pd.concat(results, axis=0)
trans_norm = trans_norm.T
columnname=['C=3','C=4','C=5']
trans_norm.columns=columnname
print("Using Transformation and Normalization\n", trans_norm)

Using Transformation and Normalization
                         C=3       C=4       C=5
Silhouette           0.2228    0.2472    0.2184
Calinski-Harabasz  153.5484  136.3349  121.2396
Davies-Bouldin       1.6406    1.4208    1.5850


#  Model Performance using "Transformation" + "Normalization" + "PCA"

In [9]:
kMeanClusteringParameters = setup(data = wholesale, transformation = True, normalize = True, pca = True,
      normalize_method = 'zscore', transformation_method = 'yeo-johnson', pca_method = 'linear', verbose=False)

results = []

num_clusters_values = [3, 4, 5]
for num_clusters in num_clusters_values:
    kmeans = create_model('kmeans', num_clusters=num_clusters, verbose=False)
    metrics = pull()
    metrics = metrics.drop(['Homogeneity', 'Rand Index', 'Completeness'], axis=1)
    results.append(metrics)
trans_norm_pca = pd.concat(results, axis=0)
trans_norm_pca = trans_norm_pca.T
columnname=['C=3','C=4','C=5']
trans_norm_pca.columns=columnname
print("Using Transformation, PCA and Normalization\n", trans_norm_pca)
#trans_norm_pca.style.set_table_styles([{'selector': 'th', 'props': [('border', '1px solid black')]}])


Using Transformation, PCA and Normalization
                         C=3       C=4       C=5
Silhouette           0.2229    0.2472    0.2184
Calinski-Harabasz  153.5594  136.3349  121.2396
Davies-Bouldin       1.6370    1.4208    1.5850


In [10]:
final_result=[]

table=pd.concat([no_data_processing, norm, trans, pca, trans_norm, trans_norm_pca],axis=1,
                keys=['No Data Preprocessing','Normalize','Transformation',' PCA', 'Using T+N', 'Using T+N+PCA'])


In [11]:
from IPython.display import display, Markdown

# Apply styling for borders to the entire table
styled_table = table.style.set_table_styles([{'selector': 'th', 'props': [('border', '1px solid black')]}])

# Display the heading and the styled table
display(Markdown('### K Means Clustering'))
display(styled_table)

### K Means Clustering

Unnamed: 0_level_0,No Data Preprocessing,No Data Preprocessing,No Data Preprocessing,Normalize,Normalize,Normalize,Transformation,Transformation,Transformation,PCA,PCA,PCA,Using T+N,Using T+N,Using T+N,Using T+N+PCA,Using T+N+PCA,Using T+N+PCA
Unnamed: 0_level_1,C=3,C=4,C=5,C=3,C=4,C=5,C=3,C=4,C=5,C=3,C=4,C=5,C=3,C=4,C=5,C=3,C=4,C=5
Silhouette,0.4764,0.3997,0.3737,0.3568,0.3482,0.3572,0.8554,0.5181,0.462,0.477,0.3866,0.3698,0.2228,0.2472,0.2184,0.2229,0.2472,0.2184
Calinski-Harabasz,210.1005,207.8159,214.4368,139.3494,130.9179,136.7115,13039.7633,15162.6315,14221.6229,210.1456,207.8197,214.5078,153.5484,136.3349,121.2396,153.5594,136.3349,121.2396
Davies-Bouldin,0.8935,0.9014,0.91,1.1736,1.2359,1.1506,0.3389,0.6203,0.7017,0.8905,0.9543,0.9102,1.6406,1.4208,1.585,1.637,1.4208,1.585


# **Using Hierarchical Clustering**

# Model Performance for "no data processing"

In [12]:
kMeanClusteringParameters = setup(data = wholesale, verbose=False)
results = []

# Define different values of clusters (c=3, 4, 5)
num_clusters_values = [3, 4, 5]
for num_clusters in num_clusters_values:
    hierarchical = create_model('hclust', num_clusters=num_clusters, linkage='complete', verbose=False)
    metrics = pull()
    metrics = metrics.drop(['Homogeneity', 'Rand Index', 'Completeness'], axis=1)
    results.append(metrics)
no_data_processing2 = pd.concat(results, axis=0)
no_data_processing2 = no_data_processing2.T
columnname=['C=3','C=4','C=5']
no_data_processing2.columns=columnname
print("No Data Processing\n",no_data_processing2)
   

No Data Processing
                        C=3      C=4      C=5
Silhouette          0.7130   0.6701   0.6663
Calinski-Harabasz  82.1065  68.8474  57.5564
Davies-Bouldin      0.6958   0.8860   0.8838


# Model Performance using "Normalization"

In [13]:
kMeanClusteringParameters = setup(data = wholesale,normalize = True, normalize_method = 'zscore', verbose=False)

results = []

num_clusters_values = [3, 4, 5]
for num_clusters in num_clusters_values:
    hierarchical = create_model('hclust', num_clusters=num_clusters, linkage='complete', verbose=False)
    metrics = pull()
    metrics = metrics.drop(['Homogeneity', 'Rand Index', 'Completeness'], axis=1)
    results.append(metrics)
norm2 = pd.concat(results, axis=0)
norm2 = norm2.T
columnname=['C=3','C=4','C=5']
norm2.columns=columnname
print("Using normalization\n", norm2)

Using normalization
                        C=3      C=4      C=5
Silhouette          0.7038   0.6717   0.5191
Calinski-Harabasz  67.2849  58.2046  53.7536
Davies-Bouldin      0.4877   0.5032   0.6071


# Model Performance using "Transformation"

In [14]:
kMeanClusteringParameters = setup(data = wholesale,transformation = True, transformation_method = 'yeo-johnson', verbose=False)

results = []

num_clusters_values = [3, 4, 5]
for num_clusters in num_clusters_values:
    hierarchical = create_model('hclust', num_clusters=num_clusters, linkage='complete', verbose=False)
    metrics = pull()
    metrics = metrics.drop(['Homogeneity', 'Rand Index', 'Completeness'], axis=1)
    results.append(metrics)
trans2 = pd.concat(results, axis=0)
trans2 = trans2.T
columnname=['C=3','C=4','C=5']
trans2.columns=columnname
print("Using Transformation\n", trans2)

Using Transformation
                           C=3         C=4         C=5
Silhouette             0.8554      0.4695      0.4088
Calinski-Harabasz  13039.7633  12321.1770  10135.4548
Davies-Bouldin         0.3389      0.6151      0.7129


# Model Performance using "PCA"

In [15]:
kMeanClusteringParameters = setup(data = wholesale,pca = True, pca_method = 'linear', verbose=False)

results = []

num_clusters_values = [3, 4, 5]
for num_clusters in num_clusters_values:
    hierarchical = create_model('hclust', num_clusters=num_clusters, linkage='complete', verbose=False)
    metrics = pull()
    metrics = metrics.drop(['Homogeneity', 'Rand Index', 'Completeness'], axis=1)
    results.append(metrics)
pca2 = pd.concat(results, axis=0)
pca2 = pca2.T
columnname=['C=3','C=4','C=5']
pca2.columns=columnname
print("Using pca\n", pca2)

Using pca
                        C=3      C=4      C=5
Silhouette          0.7130   0.6701   0.6663
Calinski-Harabasz  82.1065  68.8474  57.5564
Davies-Bouldin      0.6958   0.8860   0.8838


# Model Performance using "Transformation" + "Normalization"

In [16]:
kMeanClusteringParameters = setup(data = wholesale,transformation = True, normalize = True,
      normalize_method = 'zscore', transformation_method = 'yeo-johnson', verbose=False)

results = []

num_clusters_values = [3, 4, 5]
for num_clusters in num_clusters_values:
    hierarchical = create_model('hclust', num_clusters=num_clusters, linkage='complete', verbose=False)
    metrics = pull()
    metrics = metrics.drop(['Homogeneity', 'Rand Index', 'Completeness'], axis=1)
    results.append(metrics)
trans_norm2 = pd.concat(results, axis=0)
trans_norm2 = trans_norm2.T
columnname2=['C=3','C=4','C=5']
trans_norm2.columns=columnname
print("Using Transformation and Normalization\n", trans_norm2)

Using Transformation and Normalization
                        C=3      C=4      C=5
Silhouette          0.0983   0.0694   0.0923
Calinski-Harabasz  45.7682  48.8332  65.8734
Davies-Bouldin      1.7890   1.5421   1.7017


#  Model Performance using "Transformation" + "Normalization" + "PCA"

In [17]:
kMeanClusteringParameters = setup(data = wholesale, transformation = True, normalize = True, pca = True,
      normalize_method = 'zscore', transformation_method = 'yeo-johnson', pca_method = 'linear', verbose=False)

results = []

num_clusters_values = [3, 4, 5]
for num_clusters in num_clusters_values:
    hierarchical = create_model('hclust', num_clusters=num_clusters, linkage='complete', verbose=False)
    metrics = pull()
    metrics = metrics.drop(['Homogeneity', 'Rand Index', 'Completeness'], axis=1)
    results.append(metrics)
trans_norm_pca2 = pd.concat(results, axis=0)
trans_norm_pca2 = trans_norm_pca2.T
columnname=['C=3','C=4','C=5']
trans_norm_pca2.columns=columnname
print("Using Transformation, PCA and Normalization\n", trans_norm_pca2)


Using Transformation, PCA and Normalization
                        C=3      C=4      C=5
Silhouette          0.0983   0.0694   0.0923
Calinski-Harabasz  45.7682  48.8332  65.8734
Davies-Bouldin      1.7890   1.5421   1.7017


In [18]:
table2=pd.concat([no_data_processing2, norm2, trans2, pca2, trans_norm2, trans_norm_pca2],axis=1,
                keys=['No Data Preprocessing','Normalize','Transformation',' PCA', 'Using T+N', 'Using T+N+PCA'])


In [19]:
# Apply styling for borders to the entire table
styled_table2 = table2.style.set_table_styles([{'selector': 'th', 'props': [('border', '1px solid black')]}])

# Display the heading and the styled table
display(Markdown('### Hierarchical Clustering'))
display(styled_table2)

### Hierarchical Clustering

Unnamed: 0_level_0,No Data Preprocessing,No Data Preprocessing,No Data Preprocessing,Normalize,Normalize,Normalize,Transformation,Transformation,Transformation,PCA,PCA,PCA,Using T+N,Using T+N,Using T+N,Using T+N+PCA,Using T+N+PCA,Using T+N+PCA
Unnamed: 0_level_1,C=3,C=4,C=5,C=3,C=4,C=5,C=3,C=4,C=5,C=3,C=4,C=5,C=3,C=4,C=5,C=3,C=4,C=5
Silhouette,0.713,0.6701,0.6663,0.7038,0.6717,0.5191,0.8554,0.4695,0.4088,0.713,0.6701,0.6663,0.0983,0.0694,0.0923,0.0983,0.0694,0.0923
Calinski-Harabasz,82.1065,68.8474,57.5564,67.2849,58.2046,53.7536,13039.7633,12321.177,10135.4548,82.1065,68.8474,57.5564,45.7682,48.8332,65.8734,45.7682,48.8332,65.8734
Davies-Bouldin,0.6958,0.886,0.8838,0.4877,0.5032,0.6071,0.3389,0.6151,0.7129,0.6958,0.886,0.8838,1.789,1.5421,1.7017,1.789,1.5421,1.7017


# **K-mean shift clustering**

# # Model Performance for "no data processing"

In [20]:
kMeanClusteringParameters = setup(data = wholesale, verbose=False)
results = []

# Define different values of clusters (c=3, 4, 5)
num_clusters_values = [3, 4, 5]
for num_clusters in num_clusters_values:
    mean_shift = create_model('meanshift', num_clusters=num_clusters, verbose=False)   
    metrics = pull()
    metrics = metrics.drop(['Homogeneity', 'Rand Index', 'Completeness'], axis=1)
    results.append(metrics)
no_data_processing3 = pd.concat(results, axis=0)
no_data_processing3 = no_data_processing3.T
columnname=['C=3','C=4','C=5']
no_data_processing3.columns=columnname
print("No Data Processing\n",no_data_processing3)
   

No Data Processing
                        C=3      C=4      C=5
Silhouette          0.3644   0.3644   0.3644
Calinski-Harabasz  54.7541  54.7541  54.7541
Davies-Bouldin      0.5765   0.5765   0.5765


# Model Performance using "Normalization"

In [21]:
kMeanClusteringParameters = setup(data = wholesale,normalize = True, normalize_method = 'zscore', verbose=False)

results = []

num_clusters_values = [3, 4, 5]
for num_clusters in num_clusters_values:
    mean_shift = create_model('meanshift', num_clusters=num_clusters, verbose=False)   
    metrics = pull()
    metrics = metrics.drop(['Homogeneity', 'Rand Index', 'Completeness'], axis=1)
    results.append(metrics)
norm3 = pd.concat(results, axis=0)
norm3 = norm3.T
columnname=['C=3','C=4','C=5']
norm3.columns=columnname
print("Using normalization\n", norm3)

Using normalization
                        C=3      C=4      C=5
Silhouette          0.4054   0.4054   0.4054
Calinski-Harabasz  30.8599  30.8599  30.8599
Davies-Bouldin      0.4229   0.4229   0.4229


# Model Performance using "Transformation"

In [22]:
kMeanClusteringParameters = setup(data = wholesale,transformation = True, transformation_method = 'yeo-johnson', verbose=False)

results = []

num_clusters_values = [3, 4, 5]
for num_clusters in num_clusters_values:
    mean_shift = create_model('meanshift', num_clusters=num_clusters, verbose=False)   
    metrics = pull()
    metrics = metrics.drop(['Homogeneity', 'Rand Index', 'Completeness'], axis=1)
    results.append(metrics)
trans3 = pd.concat(results, axis=0)
trans3 = trans3.T
columnname=['C=3','C=4','C=5']
trans3.columns=columnname
print("Using Transformation\n", trans3)

Using Transformation
                           C=3         C=4         C=5
Silhouette             0.9076      0.9076      0.9076
Calinski-Harabasz  14114.2429  14114.2429  14114.2429
Davies-Bouldin         0.1625      0.1625      0.1625


# Model Performance using "PCA"

In [23]:
kMeanClusteringParameters = setup(data = wholesale,pca = True, pca_method = 'linear', verbose=False)

results = []

num_clusters_values = [3, 4, 5]
for num_clusters in num_clusters_values:
    mean_shift = create_model('meanshift', num_clusters=num_clusters, verbose=False)   
    metrics = pull()
    metrics = metrics.drop(['Homogeneity', 'Rand Index', 'Completeness'], axis=1)
    results.append(metrics)
pca3 = pd.concat(results, axis=0)
pca3 = pca3.T
columnname=['C=3','C=4','C=5']
pca3.columns=columnname
print("Using pca\n", pca3)

Using pca
                        C=3      C=4      C=5
Silhouette          0.3644   0.3644   0.3644
Calinski-Harabasz  54.7541  54.7541  54.7541
Davies-Bouldin      0.5765   0.5765   0.5765


# Model Performance using "Transformation" + "Normalization"

In [24]:
kMeanClusteringParameters = setup(data = wholesale,transformation = True, normalize = True,
      normalize_method = 'zscore', transformation_method = 'yeo-johnson', verbose=False)

results = []

num_clusters_values = [3, 4, 5]
for num_clusters in num_clusters_values:
    mean_shift = create_model('meanshift', num_clusters=num_clusters, verbose=False)   
    metrics = pull()
    metrics = metrics.drop(['Homogeneity', 'Rand Index', 'Completeness'], axis=1)
    results.append(metrics)
trans_norm3 = pd.concat(results, axis=0)
trans_norm3 = trans_norm3.T
columnname3=['C=3','C=4','C=5']
trans_norm3.columns=columnname
print("Using Transformation and Normalization\n", trans_norm3)

Using Transformation and Normalization
                        C=3      C=4      C=5
Silhouette          0.3461   0.3461   0.3461
Calinski-Harabasz  17.4754  17.4754  17.4754
Davies-Bouldin      1.0571   1.0571   1.0571


#  Model Performance using "Transformation" + "Normalization" + "PCA"

In [25]:
kMeanClusteringParameters = setup(data = wholesale, transformation = True, normalize = True, pca = True,
      normalize_method = 'zscore', transformation_method = 'yeo-johnson', pca_method = 'linear', verbose=False)

results = []

num_clusters_values = [3, 4, 5]
for num_clusters in num_clusters_values:
    mean_shift = create_model('meanshift', num_clusters=num_clusters, verbose=False)   
    metrics = pull()
    metrics = metrics.drop(['Homogeneity', 'Rand Index', 'Completeness'], axis=1)
    results.append(metrics)
trans_norm_pca3 = pd.concat(results, axis=0)
trans_norm_pca3 = trans_norm_pca3.T
columnname=['C=3','C=4','C=5']
trans_norm_pca3.columns=columnname
print("Using Transformation, PCA and Normalization\n", trans_norm_pca3)


Using Transformation, PCA and Normalization
                        C=3      C=4      C=5
Silhouette          0.3461   0.3461   0.3461
Calinski-Harabasz  17.4754  17.4754  17.4754
Davies-Bouldin      1.0571   1.0571   1.0571


In [26]:
table3=pd.concat([no_data_processing3, norm3, trans3, pca3, trans_norm3, trans_norm_pca3],axis=1,
                keys=['No Data Preprocessing','Normalize','Transformation',' PCA', 'Using T+N', 'Using T+N+PCA'])


In [27]:
# Apply styling for borders to the entire table
styled_table3 = table3.style.set_table_styles([{'selector': 'th', 'props': [('border', '1px solid black')]}])

# Display the heading and the styled table
display(Markdown('### K-mean shift clustering'))
display(styled_table3)

### K-mean shift clustering

Unnamed: 0_level_0,No Data Preprocessing,No Data Preprocessing,No Data Preprocessing,Normalize,Normalize,Normalize,Transformation,Transformation,Transformation,PCA,PCA,PCA,Using T+N,Using T+N,Using T+N,Using T+N+PCA,Using T+N+PCA,Using T+N+PCA
Unnamed: 0_level_1,C=3,C=4,C=5,C=3,C=4,C=5,C=3,C=4,C=5,C=3,C=4,C=5,C=3,C=4,C=5,C=3,C=4,C=5
Silhouette,0.3644,0.3644,0.3644,0.4054,0.4054,0.4054,0.9076,0.9076,0.9076,0.3644,0.3644,0.3644,0.3461,0.3461,0.3461,0.3461,0.3461,0.3461
Calinski-Harabasz,54.7541,54.7541,54.7541,30.8599,30.8599,30.8599,14114.2429,14114.2429,14114.2429,54.7541,54.7541,54.7541,17.4754,17.4754,17.4754,17.4754,17.4754,17.4754
Davies-Bouldin,0.5765,0.5765,0.5765,0.4229,0.4229,0.4229,0.1625,0.1625,0.1625,0.5765,0.5765,0.5765,1.0571,1.0571,1.0571,1.0571,1.0571,1.0571


In [28]:
from IPython.display import display, Markdown

# Display the first styled table
display(Markdown('### Using K-mean Clustering'))
display(styled_table)

# Display the second styled table
display(Markdown('### Using Hierarchical Clustering'))
display(styled_table2)

# Display the third styled table
display(Markdown('### Using k-mean Shift Clustering'))
display(styled_table3)


### Using K-mean Clustering

Unnamed: 0_level_0,No Data Preprocessing,No Data Preprocessing,No Data Preprocessing,Normalize,Normalize,Normalize,Transformation,Transformation,Transformation,PCA,PCA,PCA,Using T+N,Using T+N,Using T+N,Using T+N+PCA,Using T+N+PCA,Using T+N+PCA
Unnamed: 0_level_1,C=3,C=4,C=5,C=3,C=4,C=5,C=3,C=4,C=5,C=3,C=4,C=5,C=3,C=4,C=5,C=3,C=4,C=5
Silhouette,0.4764,0.3997,0.3737,0.3568,0.3482,0.3572,0.8554,0.5181,0.462,0.477,0.3866,0.3698,0.2228,0.2472,0.2184,0.2229,0.2472,0.2184
Calinski-Harabasz,210.1005,207.8159,214.4368,139.3494,130.9179,136.7115,13039.7633,15162.6315,14221.6229,210.1456,207.8197,214.5078,153.5484,136.3349,121.2396,153.5594,136.3349,121.2396
Davies-Bouldin,0.8935,0.9014,0.91,1.1736,1.2359,1.1506,0.3389,0.6203,0.7017,0.8905,0.9543,0.9102,1.6406,1.4208,1.585,1.637,1.4208,1.585


### Using Hierarchical Clustering

Unnamed: 0_level_0,No Data Preprocessing,No Data Preprocessing,No Data Preprocessing,Normalize,Normalize,Normalize,Transformation,Transformation,Transformation,PCA,PCA,PCA,Using T+N,Using T+N,Using T+N,Using T+N+PCA,Using T+N+PCA,Using T+N+PCA
Unnamed: 0_level_1,C=3,C=4,C=5,C=3,C=4,C=5,C=3,C=4,C=5,C=3,C=4,C=5,C=3,C=4,C=5,C=3,C=4,C=5
Silhouette,0.713,0.6701,0.6663,0.7038,0.6717,0.5191,0.8554,0.4695,0.4088,0.713,0.6701,0.6663,0.0983,0.0694,0.0923,0.0983,0.0694,0.0923
Calinski-Harabasz,82.1065,68.8474,57.5564,67.2849,58.2046,53.7536,13039.7633,12321.177,10135.4548,82.1065,68.8474,57.5564,45.7682,48.8332,65.8734,45.7682,48.8332,65.8734
Davies-Bouldin,0.6958,0.886,0.8838,0.4877,0.5032,0.6071,0.3389,0.6151,0.7129,0.6958,0.886,0.8838,1.789,1.5421,1.7017,1.789,1.5421,1.7017


### Using k-mean Shift Clustering

Unnamed: 0_level_0,No Data Preprocessing,No Data Preprocessing,No Data Preprocessing,Normalize,Normalize,Normalize,Transformation,Transformation,Transformation,PCA,PCA,PCA,Using T+N,Using T+N,Using T+N,Using T+N+PCA,Using T+N+PCA,Using T+N+PCA
Unnamed: 0_level_1,C=3,C=4,C=5,C=3,C=4,C=5,C=3,C=4,C=5,C=3,C=4,C=5,C=3,C=4,C=5,C=3,C=4,C=5
Silhouette,0.3644,0.3644,0.3644,0.4054,0.4054,0.4054,0.9076,0.9076,0.9076,0.3644,0.3644,0.3644,0.3461,0.3461,0.3461,0.3461,0.3461,0.3461
Calinski-Harabasz,54.7541,54.7541,54.7541,30.8599,30.8599,30.8599,14114.2429,14114.2429,14114.2429,54.7541,54.7541,54.7541,17.4754,17.4754,17.4754,17.4754,17.4754,17.4754
Davies-Bouldin,0.5765,0.5765,0.5765,0.4229,0.4229,0.4229,0.1625,0.1625,0.1625,0.5765,0.5765,0.5765,1.0571,1.0571,1.0571,1.0571,1.0571,1.0571
