In [None]:
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.cluster import KMeans
import sklearn.metrics as sm
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.mixture import GaussianMixture
import os
os.environ['OMP_NUM_THREADS'] = '1'
iris = datasets.load_iris()
X = pd.DataFrame(iris.data)
X.columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
y = pd.DataFrame(iris.target)
y.columns = ['target']
model = KMeans(n_clusters=3, n_init=10) 
model.fit(X)
plt.figure(figsize=(18, 14))
colormap = np.array(['red', 'lime', 'black'])
plt.subplot(2, 2, 1)
plt.scatter(X.petal_length, X.petal_width, c=colormap[y.target], s=40)
plt.title('Real Clusters')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
plt.subplot(2, 2, 2)
plt.scatter(X.petal_length, X.petal_width, c=colormap[model.labels_], s=40)
plt.title('KMeans Clustering')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
kmeans_accuracy = sm.accuracy_score(y, model.labels_)
print("KMeans Accuracy:", kmeans_accuracy)
print("KMeans Confusion Matrix:\n", sm.confusion_matrix(y, model.labels_))
scaler = preprocessing.StandardScaler()
scaler.fit(X)
xsa = scaler.transform(X)
xs = pd.DataFrame(xsa, columns=X.columns)
gmm = GaussianMixture(n_components=3)
gmm.fit(xs)
gmm_y = gmm.predict(xs)
plt.subplot(2, 2, 3)
plt.scatter(X.petal_length, X.petal_width, c=colormap[gmm_y], s=40)
plt.title('GMM Clustering')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
gmm_accuracy = sm.accuracy_score(y, gmm_y)
print("GMM Accuracy:", gmm_accuracy)
print("GMM Confusion Matrix:\n", sm.confusion_matrix(y, gmm_y))
plt.show()
print("Observation: GMM-based clustering matched the true labels more closely than KMeans.")
