In [204]:
from sklearn.decomposition import PCA
from sklearn import datasets
from sklearn.preprocessing import scale
import numpy as np
from sklearn import metrics

In [205]:
digits = datasets.load_digits()
data = scale(digits.data)
n_samples, n_features = data.shape
n_digits = len(np.unique(digits.target))
labels = digits.target
sample_size = 300

In [None]:
data_pca_5 = PCA(n_components=5).fit_transform(data)
data_pca_5

In [206]:
data_pca_8 = PCA(n_components=8).fit_transform(data)
data_pca_8

array([[ 1.91405139, -0.95421576, -3.94593459, ...,  0.53111805,
        -1.41593471,  1.48793325],
       [ 0.58867073,  0.92553703,  3.92318764, ..., -0.66207758,
         1.89760634,  0.50960404],
       [ 1.30219356, -0.31769635,  3.02324257, ...,  0.93879634,
        -1.29954195,  1.14878233],
       ...,
       [ 1.02284309, -0.14857833,  2.47253504, ..., -0.01478505,
         0.35267242,  0.52931033],
       [ 1.0761579 , -0.38148618, -2.45361869, ..., -0.64874384,
         1.01936394,  0.76431466],
       [-1.25669035, -2.22991344,  0.28852886, ...,  2.32318125,
        -0.43222551, -1.14441305]])

## K-means

In [207]:
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist

In [208]:
estimator = KMeans(init='k-means++', n_clusters=n_digits, n_init=10)
estimator.fit(data)

KMeans(n_clusters=10)

In [209]:
mutual = metrics.adjusted_mutual_info_score(labels,  estimator.labels_)
siholitte = metrics.silhouette_score(data, estimator.labels_,
                                      metric='euclidean',
                                      sample_size=sample_size)
adj_rand = metrics.adjusted_rand_score(labels, estimator.labels_)
rand = metrics.rand_score(labels, estimator.labels_)

In [210]:
print(f"Rand {rand}",f"Adjusted Rand Index {adj_rand}", f"Adjusted Mutual Information: {mutual}", f"Silhouette {siholitte}", sep='\n')

Adjusted Rand Index 0.4662530761948069
Adjusted Mutual Information: 0.612678156041812
Silhouette 0.12559126075575974


In [211]:
pca_estimator = KMeans(init='k-means++', n_clusters=n_digits, n_init=10)
pca_estimator.fit(data_pca_8)

KMeans(n_clusters=10)

In [212]:
mutual = metrics.adjusted_mutual_info_score(labels,  pca_estimator.labels_)
siholitte = metrics.silhouette_score(data, pca_estimator.labels_,
                                      metric='euclidean',
                                      sample_size=sample_size)
adj_rand = metrics.adjusted_adj__score(labels, pca_estimator.labels_)
rand = metrics.rand_score(labels, estimator.labels_)

In [213]:
print(f"Rand {rand}", f"Adjusted Rand Index {adj_rand}", f"Adjusted Mutual Information: {mutual}", f"Silhouette {siholitte}", sep='\n')

Adjusted Rand Index 0.5083604674798776
Adjusted Mutual Information: 0.6311530915500716
Silhouette 0.13925349953910762


In [None]:
pca_estimator = KMeans(init='k-means++', n_clusters=n_digits, n_init=10)
pca_estimator.fit(data_pca_5)
mutual = metrics.adjusted_mutual_info_score(labels,  pca_estimator.labels_)
siholitte = metrics.silhouette_score(data, pca_estimator.labels_,
                                      metric='euclidean',
                                      sample_size=sample_size)
adj_rand = metrics.adjusted_rand_score(labels, pca_estimator.labels_)
rand = metrics.rand_score(labels, estimator.labels_)
print(f"Rand {rand}", f"Adjusted Rand Index {adj_rand}", f"Adjusted Mutual Information: {mutual}", f"Silhouette {siholitte}", sep='\n')

## Affinity Propagation

In [214]:
from sklearn.cluster import AffinityPropagation

In [215]:
model_aff = AffinityPropagation()
model_aff.fit(data)

AffinityPropagation()

In [216]:
len(model_aff.cluster_centers_indices_)

132

In [217]:
max(model_aff.labels_)

131

In [218]:
mutual = metrics.adjusted_mutual_info_score(labels,  model_aff.labels_)
siholitte = metrics.silhouette_score(data, model_aff.labels_,
                                      metric='euclidean',
                                      sample_size=sample_size)
adj_rand = metrics.adjusted_rand_score(labels, model_aff.labels_)
rand = metrics.rand_score(labels, estimator.labels_)

In [219]:
print(f"Rand {rand}", f"Adjusted Rand Index {adj_rand}", f"Adjusted Mutual Information: {mutual}", f"Silhouette {siholitte}", sep='\n')

Adjusted Rand Index 0.1543890171236833
Adjusted Mutual Information: 0.5728829386106491
Silhouette 0.059802242874669294


In [220]:
aff_pca_estimator = AffinityPropagation()
aff_pca_estimator.fit(data_pca_8)

AffinityPropagation()

In [221]:
len(aff_pca_estimator.cluster_centers_indices_)

75

In [222]:
mutual = metrics.adjusted_mutual_info_score(labels,  aff_pca_estimator.labels_)
siholitte = metrics.silhouette_score(data, aff_pca_estimator.labels_,
                                      metric='euclidean',
                                      sample_size=sample_size)
adj_rand = metrics.adjusted_rand_score(labels, aff_pca_estimator.labels_)
rand = metrics.rand_score(labels, estimator.labels_)

In [223]:
print(f"Rand {rand}", f"Adjusted Rand Index {adj_rand}", f"Adjusted Mutual Information: {mutual}", f"Silhouette {siholitte}", sep='\n')

Adjusted Rand Index 0.2145950107593524
Adjusted Mutual Information: 0.5835018417710425
Silhouette 0.03413962164031616


In [None]:
aff_pca_estimator = AffinityPropagation()
aff_pca_estimator.fit(data_pca_5)
mutual = metrics.adjusted_mutual_info_score(labels,  aff_pca_estimator.labels_)
siholitte = metrics.silhouette_score(data, aff_pca_estimator.labels_,
                                      metric='euclidean',
                                      sample_size=sample_size)
adj_rand = metrics.adjusted_rand_score(labels, aff_pca_estimator.labels_)
rand = metrics.rand_score(labels, estimator.labels_)
print(f"Rand {rand}", f"Adjusted Rand Index {adj_rand}", f"Adjusted Mutual Information: {mutual}", f"Silhouette {siholitte}", sep='\n')

## Agglomerative Hierarchical clustering

In [224]:
from sklearn.cluster import AgglomerativeClustering

In [225]:
model_agglom = AgglomerativeClustering(n_clusters=n_digits)
model_agglom.fit(data)

AgglomerativeClustering(n_clusters=10)

In [226]:
model_agglom.labels_

array([5, 1, 1, ..., 1, 1, 1])

In [227]:
mutual = metrics.adjusted_mutual_info_score(labels,  model_agglom.labels_)
adj_rand = metrics.adjusted_rand_score(labels, model_agglom.labels_)
rand = metrics.rand_score(labels, estimator.labels_)
siholitte = metrics.silhouette_score(data, model_agglom.labels_,
                                      metric='euclidean',
                                      sample_size=300)

In [228]:
print(f"Rand {rand}",f"Adjusted Rand Index {adj_rand}", f"Adjusted Mutual Information: {mutual}", f"Silhouette {siholitte}", sep='\n')

Adjusted Rand Index 0.6643458356002894
Adjusted Mutual Information: 0.7934927361004462
Silhouette 0.13271791991498086


In [229]:
agg_pca_estimator = AgglomerativeClustering(n_clusters=n_digits)
agg_pca_estimator.fit(data_pca_8)

AgglomerativeClustering(n_clusters=10)

In [230]:
mutual = metrics.adjusted_mutual_info_score(labels,  agg_pca_estimator.labels_)
siholitte = metrics.silhouette_score(data, agg_pca_estimator.labels_,
                                      metric='euclidean',
                                      sample_size=sample_size)
rand = metrics.rand_score(labels, estimator.labels_)
adj_rand = metrics.adjusted_rand_score(labels, agg_pca_estimator.labels_)

In [231]:
print(f"Rand {rand}", f"Adjusted Rand Index {adj_rand}", f"Adjusted Mutual Information: {mutual}", f"Silhouette {siholitte}", sep='\n')

Adjusted Rand Index 0.5404614204652988
Adjusted Mutual Information: 0.6833793151852052
Silhouette 0.10831738051460027


In [None]:
agg_pca_estimator = AgglomerativeClustering(n_clusters=n_digits)
agg_pca_estimator.fit(data_pca_5)
mutual = metrics.adjusted_mutual_info_score(labels,  agg_pca_estimator.labels_)
siholitte = metrics.silhouette_score(data, agg_pca_estimator.labels_,
                                      metric='euclidean',
                                      sample_size=sample_size)
adj_rand = metrics.adjusted_rand_score(labels, agg_pca_estimator.labels_)
rand = metrics.rand_score(labels, estimator.labels_)
print(f"Rand {rand}", f"Adjusted Rand Index {adj_rand}", f"Adjusted Mutual Information: {mutual}", f"Silhouette {siholitte}", sep='\n')