# Clustering Activity

In [2]:
!pip install -U scikit-fuzzy

Collecting scikit-fuzzy
  Downloading scikit-fuzzy-0.4.2.tar.gz (993 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m994.0/994.0 kB[0m [31m12.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scikit-fuzzy
  Building wheel for scikit-fuzzy (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-fuzzy: filename=scikit_fuzzy-0.4.2-py3-none-any.whl size=894077 sha256=beb1ee10558f2b9266dcc6e22f624769637ef32305d4d56d46b5ef8fe111eb75
  Stored in directory: /root/.cache/pip/wheels/4f/86/1b/dfd97134a2c8313e519bcebd95d3fedc7be7944db022094bc8
Successfully built scikit-fuzzy
Installing collected packages: scikit-fuzzy
Successfully installed scikit-fuzzy-0.4.2


In [17]:
# Import necessary libraries
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans, AgglomerativeClustering, SpectralClustering
from sklearn.metrics import adjusted_rand_score, silhouette_score
import skfuzzy.cluster as fuzz
import numpy as np

# Generate a hypothetical dataset
X, y_true = make_blobs(n_samples=300, centers=4, random_state=42, cluster_std=1.0)

Exercise: Evaluate four clustering algorithms (K-Means, Fuzzy C-Means, Spectral, and Agglomerative)
1. Apply each clustering algorithm to the dataset where:
- cluster_count = 4
- random_state = 42 (where applicable)
- do not specify affinity, metric, algo, init, linkage (if applicable)
2. Use the **Adjusted Rand index** and **Silhouette Score** to evaluate the clustering performance.
3. Compare the performance of the algorithms based on the aforementioned metrics and decide which algorithm is best suited for the data given.

In [18]:
# K-means Clustering
kmeans = KMeans(n_clusters=4, random_state=42)
kmeans_labels = kmeans.fit_predict(X)
kmeans_ari = adjusted_rand_score(y_true, kmeans_labels)
kmeans_silhouette = silhouette_score(X, kmeans_labels)




In [19]:
# Apply Fuzzy C-Means clustering
fuzzy_cmeans_centers, fuzzy_cmeans_labels, _, _, _, _, _ = fuzz.cmeans(X.T, 4, 2, error=0.005, maxiter=1000)
fuzzy_cmeans_labels = np.argmax(fuzzy_cmeans_labels, axis=0)
fuzzy_cmeans_ari = adjusted_rand_score(y_true, fuzzy_cmeans_labels)
fuzzy_cmeans_silhouette = silhouette_score(X, fuzzy_cmeans_labels)


In [20]:
# Spectral clustering
spectral = SpectralClustering(n_clusters=4, random_state=42)
spectral_labels = spectral.fit_predict(X)
spectral_ari = adjusted_rand_score(y_true, spectral_labels)
spectral_silhouette = silhouette_score(X, spectral_labels)

In [21]:
# Agglomerative clustering
agglomerative = AgglomerativeClustering(n_clusters=4)
agglomerative_labels = agglomerative.fit_predict(X)
agglomerative_ari = adjusted_rand_score(y_true, agglomerative_labels)
agglomerative_silhouette = silhouette_score(X, agglomerative_labels)


In [22]:
print("K-Means - ARI:", kmeans_ari, "Silhouette:", kmeans_silhouette)
print("Fuzzy C-Means - ARI:", fuzzy_cmeans_ari, "Silhouette:", fuzzy_cmeans_silhouette)
print("Spectral Clustering - ARI:", spectral_ari, "Silhouette:", spectral_silhouette)
print("Agglomerative Clustering - ARI:", agglomerative_ari, "Silhouette:", agglomerative_silhouette)


K-Means - ARI: 0.9910811504997546 Silhouette: 0.7915830011443039
Fuzzy C-Means - ARI: 0.9910811504997546 Silhouette: 0.7915830011443039
Spectral Clustering - ARI: 0.9910811504997546 Silhouette: 0.7915830011443039
Agglomerative Clustering - ARI: 0.9910811504997546 Silhouette: 0.7915830011443039
