In [None]:
from PIL import Image
import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import os
from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score
import warnings

warnings.filterwarnings("ignore")  # 消除警告

X, Y = [], []
for i in range(1, 6):
    for f in os.listdir(f"./orl_faces/s{i}"):
        image = Image.open(f"./orl_faces/s{i}/{f}")
        image_array = np.array(image)
        reshaped_image = image_array.flatten()
        X.append(reshaped_image)
        Y.append(i)

X = np.array(X)
Y = np.array(Y)
# 初始化 PCA，选择主成分的数量
# 例如，选择前 50 个主成分
pca50 = PCA(n_components=50)
pca3 = PCA(n_components=3)

# # 应用 PCA
X_50 = pca50.fit_transform(X)
X_3 = pca3.fit_transform(X)

In [None]:
index = 0  # 可以改变这个索引来查看不同的图像

# 重构图像
reconstructed_image = pca50.inverse_transform(X_50[index]).reshape(
    image_array.shape
)

# 显示原始图像和重构图像
fig, axes = plt.subplots(1, 2, figsize=(8, 4))
axes[0].imshow(X[index].reshape(image_array.shape), cmap="gray")
axes[0].set_title("Original Image")
axes[1].imshow(reconstructed_image, cmap="gray")
axes[1].set_title("Reconstructed Image")
plt.show()

In [None]:
def show_images(X, labels, title, image_shape):
    unique_labels = set(labels)

    for label in unique_labels:
        # 找到属于当前类别的所有索引
        indices = [i for i, l in enumerate(labels) if l == label]

        n_images = len(indices)

        # 创建子图，每个类别一行
        fig, axes = plt.subplots(1, n_images, figsize=(15, 3))
        fig.suptitle(f"{title} - Cluster {label}")

        # 如果只有一个图像，则 axes 不是数组，需要处理这种情况
        if n_images == 1:
            axes = [axes]

        # 在子图上显示图像
        for ax, idx in zip(axes, indices[:n_images]):
            ax.imshow(X[idx].reshape(image_shape), cmap="gray")
            ax.axis("off")

        # 隐藏剩余的空白子图
        for ax in axes[n_images:]:
            ax.axis("off")


def clusters_3d(X, labels, title):
    fig = plt.figure()
    ax = fig.add_subplot(111, projection="3d")
    ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=labels, cmap="viridis")
    ax.set_title(title)


# 计算聚类准确率
def ari_nmi_acc(labels, name):
    ari = adjusted_rand_score(Y, labels)
    nmi = normalized_mutual_info_score(Y, labels)
    print(f"{name} Clustering - ARI: {ari}, NMI: {nmi}")

In [None]:
def clustering(clustering_model, name):
    clustering_model.fit(X)
    labels = clustering_model.labels_
    ari_nmi_acc(labels, name)
    show_images(X, labels, name, image_array.shape)
    clustering_model.fit(X_50)
    labels = clustering_model.labels_
    ari_nmi_acc(labels, name + " PCA50")
    show_images(X, labels, name + " PCA50", image_array.shape)
    clustering_model.fit(X_3)
    labels = clustering_model.labels_
    ari_nmi_acc(labels, name + " PCA3")
    show_images(X, labels, name + " PCA3", image_array.shape)
    clusters_3d(X_3, labels, name + " PCA3")

In [None]:
from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters=5)
clustering(kmeans, "KMeans Clustering")

In [None]:
from handwriting import MyKMeans

mykmeans = MyKMeans(n_clusters=5)
clustering(mykmeans, "MyKMeans Clustering")

In [None]:
# 层次聚类
from sklearn.cluster import AgglomerativeClustering

hier = AgglomerativeClustering(n_clusters=5)
clustering(hier, "Agglomerative Clustering")

In [None]:
from handwriting import MyAgglomerativeClustering

myhier = MyAgglomerativeClustering(n_clusters=5)
clustering(myhier, "MyAgglomerative Clustering")

In [None]:
# DBSCAN
from sklearn.cluster import DBSCAN

dbscan = DBSCAN(eps=0.01, min_samples=5)  # eps 和 min_samples 需要根据数据集调整
clustering(dbscan, "DBSCAN Clustering")

In [None]:
from handwriting import MyDBSCAN

mydbscan = MyDBSCAN(eps=0.0001, min_samples=5)  # eps 和 min_samples 需要根据数据集调整
clustering(mydbscan, "MyDBSCAN Clustering")

In [None]:
# 均值漂移
from sklearn.cluster import MeanShift

mean_shift = MeanShift()
clustering(mean_shift, "MeanShift Clustering")

In [None]:
# from handwriting import MyMeanShift

# mymean_shift = MyMeanShift()
# clustering(mymean_shift, "MyMeanShift Clustering")

In [None]:
# 光谱聚类
from sklearn.cluster import SpectralClustering

spectral = SpectralClustering(n_clusters=5)
clustering(spectral, "Spectral Clustering")

In [None]:
# 高斯混合模型
# from sklearn.mixture import GaussianMixture

# gmm = GaussianMixture(n_components=3)
# gmm.fit(X)
# labels = gmm.predict(X)

In [None]:
# OPTICS
from sklearn.cluster import OPTICS

optics = OPTICS(min_samples=5, xi=0.05, min_cluster_size=0.15)
clustering(optics, "OPTICS Clustering")