In [None]:
from sklearn.cluster import KMeans
from sklearn.datasets import load_digits
from sklearn.preprocessing import Normalizer
from sklearn.metrics import rand_score
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.manifold import Isomap
from sklearn.manifold import TSNE

#### 1) Завантаження датасету картинок з цифрами

In [None]:
digits = load_digits()
N = 110
selected_data = np.empty((0, digits.data.shape[1]))

for i in range(10):
    digit_data = digits.data[digits.target == i][:N]
    selected_data = np.concatenate((selected_data, digit_data), axis=0)
    
num_samples_per_class = 3
for i in range(10):
    for j in range(num_samples_per_class):
        index = i * N + j
        plt.imshow(selected_data[index].reshape(8, 8))
        plt.title(f"Class {i}")
        plt.show()
        
normalized_data = Normalizer().fit_transform(selected_data)

In [None]:
# Визуализация в двумерном пространстве
pca_2d = PCA(n_components=2)
pca_3d = PCA(n_components=3)
data_2d = pca_2d.fit_transform(normalized_data)
data_3d = pca_3d.fit_transform(normalized_data)
plt.figure(figsize=(8, 8))
plt.scatter(data_2d[:, 0], data_2d[:, 1], c=digits.target[:N * 10])
plt.colorbar(label='Digit Class')
plt.title('PCA 2D Embedding')
plt.show()


In [None]:
# Визуализация в трехмерном пространстве
fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(projection='3d')
scatter = ax.scatter(data_3d[:, 0], data_3d[:, 1], data_3d[:, 2], c=digits.target[:N * 10])
ax.set_title('PCA 3D Embedding')
plt.colorbar(scatter, label='Digit Class')
plt.show()


In [None]:
# Визуализация в двумерном пространстве ISOMAP
isomap_2d = Isomap(n_components=2, n_neighbors=30)
isomap_3d = Isomap(n_components=3, n_neighbors=30)
data_2d = isomap_2d.fit_transform(normalized_data)
data_3d = isomap_3d.fit_transform(normalized_data)
plt.figure(figsize=(8, 8))
plt.scatter(data_2d[:, 0], data_2d[:, 1], c=digits.target[:N * 10])
plt.colorbar(label='Digit Class')
plt.title('ISOMAP 2D Embedding')
plt.show()


In [None]:
# Визуализация в трехмерном пространстве ISOMAP
fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(111, projection='3d')
scatter = ax.scatter(data_3d[:, 0], data_3d[:, 1], data_3d[:, 2], c=digits.target[:N * 10])
ax.set_title('ISOMAP 3D Embedding')
plt.colorbar(scatter, label='Digit Class')
plt.show()

In [None]:
d_range = range(1, 10)

isomap_score = []
for d in d_range:
    isomap_data = Isomap(n_components=d).fit_transform(normalized_data)
    kmeans = KMeans(n_clusters=5)
    labels = kmeans.fit_predict(isomap_data)
    score = rand_score(digits.target[:N * 10], labels)
    isomap_score.append(score)

In [None]:
pca_score = []
for d in d_range:
    pca_data = PCA(n_components=d).fit_transform(normalized_data)
    kmeans = KMeans(n_clusters=5)
    labels = kmeans.fit_predict(isomap_data)
    score = rand_score(digits.target[:N * 10], labels)
    pca_score.append(score)

In [None]:
tsne_score = []
for d in d_range:
    tsne_data = TSNE(n_components=d, method="exact").fit_transform(normalized_data)
    kmeans = KMeans(n_clusters=5)
    labels = kmeans.fit_predict(isomap_data)
    score = rand_score(digits.target[:N * 10], labels)
    tsne_score.append(score)

In [None]:
plt.figure(figsize=(10, 5))
plt.scatter(d_range, isomap_score, label='ISOMAP')
plt.scatter(d_range, pca_score, label='PCA')
plt.scatter(d_range, tsne_score, label='TSNE')
plt.title("Grade of the quality of clustering with nested data")
plt.xlabel("Number of components")
plt.ylabel("RandScore")
plt.legend()
plt.show()
print(f'Кількість компонентів, точність з ISOMAP, точність з PCA, точність з T-SNE')

In [None]:
for i, j in zip(d_range, range(0, 9)):
    print(f'{i}, {isomap_score[j]:.4f}, {pca_score[j]:.4f}, {tsne_score[j]:.4f}')