In [6]:
import numpy as np
from sklearn.datasets import load_iris 
from sklearn.metrics import silhouette_score  


class KMeans:
    def __init__(self, n_clusters=3, max_iter=300):
        # 初始化聚类数量
        self.n_clusters = n_clusters
        # 初始化最大迭代次数
        self.max_iter = max_iter
        # 初始化质心，用于后续存储聚类的质心
        self.centroids = None

    def _euclidean_distance(self, x1, x2):
        # 计算欧几里得距离
        return np.sqrt(np.sum((x1 - x2) ** 2))

    def fit(self, X):  #定义fit方法，用于训练K-means模型
        try:  #开始异常处理块，用于捕获可能出现的异常。
            # 随机初始化质心
            self.centroids = X[np.random.choice(X.shape[0], self.n_clusters, replace=False)]
            
            for _ in range(self.max_iter):
                prev_centroids = self.centroids.copy()
                clusters = [[] for _ in range(self.n_clusters)]

                # 分配样本到最近的质心
                for idx, sample in enumerate(X):
                    distances = [self._euclidean_distance(
                        sample, centroid) for centroid in self.centroids]
                    closest_cluster_idx = np.argmin(distances)
                    clusters[closest_cluster_idx].append(idx)

                # 更新质心
                for i in range(self.n_clusters):
                    if len(clusters[i]) > 0:
                        self.centroids[i] = np.mean(
                            X[clusters[i]], axis=0)
                    else:
                        self.centroids[i] = X[np.random.choice(
                            X.shape[0], 1)]

                # 判断是否收敛
                if np.allclose(prev_centroids, self.centroids):
                    break
        except IndexError:
            print("数据索引错误，请检查数据。")
        except Exception as e:
            print(f"发生未知错误: {e}")

    def predict(self, X):
        predictions = []
        for sample in X:
            distances = [self._euclidean_distance(
                sample, centroid) for centroid in self.centroids]
            closest_cluster_idx = np.argmin(distances)
            predictions.append(closest_cluster_idx)
        return np.array(predictions)


# 加载iris数据集
try:
    iris = load_iris()
    X = iris.data
    y = iris.target
except Exception as e:
    print(f"加载数据集时出错: {e}")

# 应用KMeans算法
try:
    kmeans = KMeans(n_clusters=3)
    kmeans.fit(X)
    y_pred = kmeans.predict(X)
except Exception as e:
    print(f"K-Means算法执行出错: {e}")

# 打印轮廓系数（更适合聚类的评估指标）
try:
    silhouette_avg = silhouette_score(X, y_pred)
    print("轮廓系数:", silhouette_avg)
except Exception as e:
    print(f"计算轮廓系数时出错: {e}")
    

轮廓系数: 0.5525919445213676
