In [10]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import SpectralClustering
from tabulate import tabulate
import warnings
warnings.filterwarnings("ignore")

In [2]:
cancer = load_breast_cancer()
X=cancer.data

## KMeans

#### N = 3

In [3]:
results_data = []

# KMeans clustering with different preprocessing techniques
preprocessing_techniques = {
    "No Preprocessing": X,
    "Normalization": MinMaxScaler().fit_transform(X),
    "PCA": PCA(n_components=2).fit_transform(X),
    "Transform + Normalization": MinMaxScaler().fit_transform(PCA(n_components=2).fit_transform(X)),
    "Transform + Normalization + PCA": PCA(n_components=2).fit_transform(MinMaxScaler().fit_transform(X))
}

for technique, data in preprocessing_techniques.items():
    # Perform KMeans clustering
    clustering = KMeans(n_clusters=3, n_init='auto')
    labels = clustering.fit_predict(data)

    # Calculate evaluation metrics
    silhouette = silhouette_score(data, labels)
    calinski_harabasz = calinski_harabasz_score(data, labels)
    davies_bouldin = davies_bouldin_score(data, labels)

    # Append results to the list
    results_data.append([technique, silhouette, calinski_harabasz, davies_bouldin])

# Table headers
headers = ["Preprocessing Technique", "Silhouette Score", "Calinski-Harabasz Score", "Davies-Bouldin Score"]

# Display the table
print(tabulate(results_data, headers=headers,tablefmt='heavy_grid'))

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Preprocessing Technique         ┃   Silhouette Score ┃   Calinski-Harabasz Score ┃   Davies-Bouldin Score ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ No Preprocessing                ┃           0.675258 ┃                  1251.53  ┃               0.55185  ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Normalization                   ┃           0.330546 ┃                   253.174 ┃               1.45537  ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ PCA                             ┃           0.659914 ┃                  1262.2   ┃               0.603631 ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Transfor

#### N = 4

In [4]:
results_data = []

# KMeans clustering with different preprocessing techniques
preprocessing_techniques = {
    "No Preprocessing": X,
    "Normalization": MinMaxScaler().fit_transform(X),
    "PCA": PCA(n_components=2).fit_transform(X),
    "Transform + Normalization": MinMaxScaler().fit_transform(PCA(n_components=2).fit_transform(X)),
    "Transform + Normalization + PCA": PCA(n_components=2).fit_transform(MinMaxScaler().fit_transform(X))
}

for technique, data in preprocessing_techniques.items():
    # Perform KMeans clustering
    clustering = KMeans(n_clusters=4, n_init='auto')
    labels = clustering.fit_predict(data)

    # Calculate evaluation metrics
    silhouette = silhouette_score(data, labels)
    calinski_harabasz = calinski_harabasz_score(data, labels)
    davies_bouldin = davies_bouldin_score(data, labels)

    # Append results to the list
    results_data.append([technique, silhouette, calinski_harabasz, davies_bouldin])

# Table headers
headers = ["Preprocessing Technique", "Silhouette Score", "Calinski-Harabasz Score", "Davies-Bouldin Score"]

# Display the table
print(tabulate(results_data, headers=headers,tablefmt='heavy_grid'))

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Preprocessing Technique         ┃   Silhouette Score ┃   Calinski-Harabasz Score ┃   Davies-Bouldin Score ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ No Preprocessing                ┃           0.543859 ┃                  1444.52  ┃               0.605756 ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Normalization                   ┃           0.152268 ┃                   200.321 ┃               1.80108  ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ PCA                             ┃           0.546657 ┃                  1464.72  ┃               0.599408 ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Transfor

#### N = 5

In [5]:
results_data = []

# KMeans clustering with different preprocessing techniques
preprocessing_techniques = {
    "No Preprocessing": X,
    "Normalization": MinMaxScaler().fit_transform(X),
    "PCA": PCA(n_components=2).fit_transform(X),
    "Transform + Normalization": MinMaxScaler().fit_transform(PCA(n_components=2).fit_transform(X)),
    "Transform + Normalization + PCA": PCA(n_components=2).fit_transform(MinMaxScaler().fit_transform(X))
}

for technique, data in preprocessing_techniques.items():
    # Perform KMeans clustering
    clustering = KMeans(n_clusters=5, n_init='auto')
    labels = clustering.fit_predict(data)

    # Calculate evaluation metrics
    silhouette = silhouette_score(data, labels)
    calinski_harabasz = calinski_harabasz_score(data, labels)
    davies_bouldin = davies_bouldin_score(data, labels)

    # Append results to the list
    results_data.append([technique, silhouette, calinski_harabasz, davies_bouldin])

# Table headers
headers = ["Preprocessing Technique", "Silhouette Score", "Calinski-Harabasz Score", "Davies-Bouldin Score"]

# Display the table
print(tabulate(results_data, headers=headers,tablefmt='heavy_grid'))

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Preprocessing Technique         ┃   Silhouette Score ┃   Calinski-Harabasz Score ┃   Davies-Bouldin Score ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ No Preprocessing                ┃           0.51571  ┃                  1604.84  ┃               0.643389 ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Normalization                   ┃           0.169664 ┃                   173.215 ┃               1.65202  ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ PCA                             ┃           0.519798 ┃                  1635.56  ┃               0.635832 ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Transfor

## Hierarchial Clustering

#### N = 3

In [6]:
results_data = []

preprocessing_techniques = {
    "No Preprocessing": X,
    "Normalization": MinMaxScaler().fit_transform(X),
    "PCA": PCA(n_components=2).fit_transform(X),
    "Transform + Normalization": MinMaxScaler().fit_transform(PCA(n_components=2).fit_transform(X)),
    "Transform + Normalization + PCA": PCA(n_components=2).fit_transform(MinMaxScaler().fit_transform(X))
}

for technique, data in preprocessing_techniques.items():

    clustering = AgglomerativeClustering(n_clusters=3)
    labels = clustering.fit_predict(data)

    # Calculate evaluation metrics
    silhouette = silhouette_score(data, labels)
    calinski_harabasz = calinski_harabasz_score(data, labels)
    davies_bouldin = davies_bouldin_score(data, labels)

    # Append results to the list
    results_data.append([technique, silhouette, calinski_harabasz, davies_bouldin])

# Table headers
headers = ["Preprocessing Technique", "Silhouette Score", "Calinski-Harabasz Score", "Davies-Bouldin Score"]

# Display the table
print(tabulate(results_data, headers=headers,tablefmt='heavy_grid'))

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Preprocessing Technique         ┃   Silhouette Score ┃   Calinski-Harabasz Score ┃   Davies-Bouldin Score ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ No Preprocessing                ┃           0.508254 ┃                  1089.93  ┃               0.631429 ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Normalization                   ┃           0.335346 ┃                   235.843 ┃               1.46615  ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ PCA                             ┃           0.518864 ┃                  1089.94  ┃               0.615082 ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Transfor

#### N = 4

In [7]:
results_data = []

# Hierarchical Clustering with different preprocessing techniques
preprocessing_techniques = {
    "No Preprocessing": X,
    "Normalization": MinMaxScaler().fit_transform(X),
    "PCA": PCA(n_components=2).fit_transform(X),
    "Transform + Normalization": MinMaxScaler().fit_transform(PCA(n_components=2).fit_transform(X)),
    "Transform + Normalization + PCA": PCA(n_components=2).fit_transform(MinMaxScaler().fit_transform(X))
}

for technique, data in preprocessing_techniques.items():
    # Perform hierarchical clustering
    clustering = AgglomerativeClustering(n_clusters=4)
    labels = clustering.fit_predict(data)

    # Calculate evaluation metrics
    silhouette = silhouette_score(data, labels)
    calinski_harabasz = calinski_harabasz_score(data, labels)
    davies_bouldin = davies_bouldin_score(data, labels)

    # Append results to the list
    results_data.append([technique, silhouette, calinski_harabasz, davies_bouldin])

# Table headers
headers = ["Preprocessing Technique", "Silhouette Score", "Calinski-Harabasz Score", "Davies-Bouldin Score"]

# Display the table
print(tabulate(results_data, headers=headers,tablefmt='heavy_grid'))

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Preprocessing Technique         ┃   Silhouette Score ┃   Calinski-Harabasz Score ┃   Davies-Bouldin Score ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ No Preprocessing                ┃           0.509003 ┃                  1245.57  ┃               0.609063 ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Normalization                   ┃           0.136605 ┃                   185.186 ┃               1.91465  ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ PCA                             ┃           0.52739  ┃                  1369.42  ┃               0.586194 ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Transfor

#### N = 5

In [8]:
results_data = []

# Hierarchical Clustering with different preprocessing techniques
preprocessing_techniques = {
    "No Preprocessing": X,
    "Normalization": MinMaxScaler().fit_transform(X),
    "PCA": PCA(n_components=2).fit_transform(X),
    "Transform + Normalization": MinMaxScaler().fit_transform(PCA(n_components=2).fit_transform(X)),
    "Transform + Normalization + PCA": PCA(n_components=2).fit_transform(MinMaxScaler().fit_transform(X))
}

for technique, data in preprocessing_techniques.items():
    # Perform hierarchical clustering
    clustering = AgglomerativeClustering(n_clusters=5)
    labels = clustering.fit_predict(data)

    # Calculate evaluation metrics
    silhouette = silhouette_score(data, labels)
    calinski_harabasz = calinski_harabasz_score(data, labels)
    davies_bouldin = davies_bouldin_score(data, labels)

    # Append results to the list
    results_data.append([technique, silhouette, calinski_harabasz, davies_bouldin])

# Table headers
headers = ["Preprocessing Technique", "Silhouette Score", "Calinski-Harabasz Score", "Davies-Bouldin Score"]

# Display the table
print(tabulate(results_data, headers=headers,tablefmt='heavy_grid'))

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Preprocessing Technique         ┃   Silhouette Score ┃   Calinski-Harabasz Score ┃   Davies-Bouldin Score ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ No Preprocessing                ┃           0.511445 ┃                  1541.86  ┃               0.611407 ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Normalization                   ┃           0.128007 ┃                   158.21  ┃               1.85584  ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ PCA                             ┃           0.478405 ┃                  1421.91  ┃               0.592308 ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Transfor

## Spectral Clustering

#### N = 3

In [11]:
from sklearn.mixture import GaussianMixture
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from tabulate import tabulate

results_data = []

preprocessing_techniques = {
    "No Preprocessing": X,
    "Normalization": MinMaxScaler().fit_transform(X),
    "PCA": PCA(n_components=2).fit_transform(X),
    "Transform + Normalization": MinMaxScaler().fit_transform(PCA(n_components=2).fit_transform(X)),
    "Transform + Normalization + PCA": PCA(n_components=2).fit_transform(MinMaxScaler().fit_transform(X))
}

# Specify the number of clusters
n_clusters = 3

for technique, data in preprocessing_techniques.items():
    # Perform Gaussian Mixture Models clustering
    gmm = GaussianMixture(n_components=n_clusters)
    labels = gmm.fit_predict(data)

    # Calculate evaluation metrics
    silhouette = silhouette_score(data, labels)
    calinski_harabasz = calinski_harabasz_score(data, labels)
    davies_bouldin = davies_bouldin_score(data, labels)

    # Append results to the list
    results_data.append([technique, silhouette, calinski_harabasz, davies_bouldin])

# Table headers
headers = ["Preprocessing Technique", "Silhouette Score", "Calinski-Harabasz Score", "Davies-Bouldin Score"]

# Display the table
print(tabulate(results_data, headers=headers, tablefmt='heavy_grid'))


┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Preprocessing Technique         ┃   Silhouette Score ┃   Calinski-Harabasz Score ┃   Davies-Bouldin Score ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ No Preprocessing                ┃           0.517158 ┃                   771.414 ┃               0.641682 ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Normalization                   ┃           0.308106 ┃                   230.161 ┃               1.5942   ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ PCA                             ┃           0.490939 ┃                   757.978 ┃               0.776605 ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Transfor

#### N = 4

In [13]:
from sklearn.mixture import GaussianMixture
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from tabulate import tabulate

results_data = []

preprocessing_techniques = {
    "No Preprocessing": X,
    "Normalization": MinMaxScaler().fit_transform(X),
    "PCA": PCA(n_components=2).fit_transform(X),
    "Transform + Normalization": MinMaxScaler().fit_transform(PCA(n_components=2).fit_transform(X)),
    "Transform + Normalization + PCA": PCA(n_components=2).fit_transform(MinMaxScaler().fit_transform(X))
}

# Specify the number of clusters
n_clusters = 4

for technique, data in preprocessing_techniques.items():
    # Perform Gaussian Mixture Models clustering
    gmm = GaussianMixture(n_components=n_clusters)
    labels = gmm.fit_predict(data)

    # Calculate evaluation metrics
    silhouette = silhouette_score(data, labels)
    calinski_harabasz = calinski_harabasz_score(data, labels)
    davies_bouldin = davies_bouldin_score(data, labels)

    # Append results to the list
    results_data.append([technique, silhouette, calinski_harabasz, davies_bouldin])

# Table headers
headers = ["Preprocessing Technique", "Silhouette Score", "Calinski-Harabasz Score", "Davies-Bouldin Score"]

# Display the table
print(tabulate(results_data, headers=headers, tablefmt='heavy_grid'))


┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Preprocessing Technique         ┃   Silhouette Score ┃   Calinski-Harabasz Score ┃   Davies-Bouldin Score ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ No Preprocessing                ┃           0.496918 ┃                  1386.58  ┃               0.634767 ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Normalization                   ┃           0.294796 ┃                   188.707 ┃               1.44973  ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ PCA                             ┃           0.456981 ┃                  1097.52  ┃               0.747762 ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Transfor

#### N = 5

In [14]:
from sklearn.mixture import GaussianMixture
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from tabulate import tabulate

results_data = []

preprocessing_techniques = {
    "No Preprocessing": X,
    "Normalization": MinMaxScaler().fit_transform(X),
    "PCA": PCA(n_components=2).fit_transform(X),
    "Transform + Normalization": MinMaxScaler().fit_transform(PCA(n_components=2).fit_transform(X)),
    "Transform + Normalization + PCA": PCA(n_components=2).fit_transform(MinMaxScaler().fit_transform(X))
}

# Specify the number of clusters
n_clusters = 5

for technique, data in preprocessing_techniques.items():
    # Perform Gaussian Mixture Models clustering
    gmm = GaussianMixture(n_components=n_clusters)
    labels = gmm.fit_predict(data)

    # Calculate evaluation metrics
    silhouette = silhouette_score(data, labels)
    calinski_harabasz = calinski_harabasz_score(data, labels)
    davies_bouldin = davies_bouldin_score(data, labels)

    # Append results to the list
    results_data.append([technique, silhouette, calinski_harabasz, davies_bouldin])

# Table headers
headers = ["Preprocessing Technique", "Silhouette Score", "Calinski-Harabasz Score", "Davies-Bouldin Score"]

# Display the table
print(tabulate(results_data, headers=headers, tablefmt='heavy_grid'))

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Preprocessing Technique         ┃   Silhouette Score ┃   Calinski-Harabasz Score ┃   Davies-Bouldin Score ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ No Preprocessing                ┃           0.430748 ┃                  1457.25  ┃               0.655471 ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Normalization                   ┃           0.139274 ┃                   162.504 ┃               1.83887  ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ PCA                             ┃           0.399896 ┃                  1128.7   ┃               0.748835 ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━━━━┫
┃ Transfor