In [136]:
from sklearn.decomposition import PCA
from sklearn import datasets
from sklearn.preprocessing import scale
import numpy as np
from sklearn import metrics

In [137]:
digits = datasets.load_digits()
data = scale(digits.data)
n_samples, n_features = data.shape
n_digits = len(np.unique(digits.target))
labels = digits.target
sample_size = 300

In [138]:
data_pca = PCA(n_components=8).fit_transform(data)
data_pca

array([[ 1.91426312, -0.95480585, -3.94562013, ...,  0.5292565 ,
        -1.40517563,  1.4902348 ],
       [ 0.58893863,  0.92478129,  3.92491933, ..., -0.66917099,
         1.86945132,  0.55442673],
       [ 1.3018379 , -0.31664146,  3.02348782, ...,  0.93883758,
        -1.32804559,  1.17357428],
       ...,
       [ 1.02261922, -0.14787733,  2.46986909, ..., -0.00438218,
         0.3796035 ,  0.53501866],
       [ 1.0760635 , -0.38102   , -2.4552698 , ..., -0.63636231,
         1.0360042 ,  0.76823529],
       [-1.25774481, -2.22798985,  0.28261806, ...,  2.33694643,
        -0.37724771, -1.1729592 ]])

## K-means

In [139]:
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist

In [140]:
estimator = KMeans(init='k-means++', n_clusters=n_digits, n_init=10)
estimator.fit(data)

KMeans(n_clusters=10)

In [141]:
mutual = metrics.adjusted_mutual_info_score(labels,  estimator.labels_)
siholitte = metrics.silhouette_score(data, estimator.labels_,
                                      metric='euclidean',
                                      sample_size=sample_size)
rand = metrics.adjusted_rand_score(labels, estimator.labels_)

In [142]:
print(f"Adjusted Rand Index {rand}", f"Adjusted Mutual Information: {mutual}", f"Silhouette {siholitte}", sep='\n')

Adjusted Rand Index 0.5361457295834392
Adjusted Mutual Information: 0.6685457904988449
Silhouette 0.1350220118986986


In [143]:
pca_estimator = KMeans(init='k-means++', n_clusters=n_digits, n_init=10)
pca_estimator.fit(data_pca)

KMeans(n_clusters=10)

In [144]:
mutual = metrics.adjusted_mutual_info_score(labels,  pca_estimator.labels_)
siholitte = metrics.silhouette_score(data, pca_estimator.labels_,
                                      metric='euclidean',
                                      sample_size=sample_size)
rand = metrics.adjusted_rand_score(labels, pca_estimator.labels_)

In [145]:
print(f"Adjusted Rand Index {rand}", f"Adjusted Mutual Information: {mutual}", f"Silhouette {siholitte}", sep='\n')

Adjusted Rand Index 0.5098878479260415
Adjusted Mutual Information: 0.6329601091061987
Silhouette 0.12764753427118586


## Affinity Propagation

In [146]:
from sklearn.cluster import AffinityPropagation

In [147]:
model_aff = AffinityPropagation()
model_aff.fit(data)

AffinityPropagation()

In [148]:
max(model_aff.labels_)

131

In [149]:
mutual = metrics.adjusted_mutual_info_score(labels,  model_aff.labels_)
siholitte = metrics.silhouette_score(data, model_aff.labels_,
                                      metric='euclidean',
                                      sample_size=sample_size)
rand = metrics.adjusted_rand_score(labels, model_aff.labels_)

In [150]:
print(f"Adjusted Rand Index {rand}", f"Adjusted Mutual Information: {mutual}", f"Silhouette {siholitte}", sep='\n')

Adjusted Rand Index 0.1543890171236833
Adjusted Mutual Information: 0.5728829386106491
Silhouette 0.052808618955257555


In [151]:
pca_estimator = AffinityPropagation()
pca_estimator.fit(data_pca)

AffinityPropagation()

In [152]:
pca_estimator

AffinityPropagation()

In [153]:
pca_estimator.labels_

array([54, 48, 20, ...,  1, 66, 72])

In [154]:
mutual = metrics.adjusted_mutual_info_score(labels,  pca_estimator.labels_)
siholitte = metrics.silhouette_score(data, pca_estimator.labels_,
                                      metric='euclidean',
                                      sample_size=sample_size)
rand = metrics.adjusted_rand_score(labels, pca_estimator.labels_)

In [155]:
print(f"Adjusted Rand Index {rand}", f"Adjusted Mutual Information: {mutual}", f"Silhouette {siholitte}", sep='\n')

Adjusted Rand Index 0.21868737338136437
Adjusted Mutual Information: 0.5868199032905739
Silhouette 0.02568752364013651


## Agglomerative Hierarchical clustering

In [156]:
from sklearn.cluster import AgglomerativeClustering

In [157]:
model_agglom = AgglomerativeClustering(n_clusters=10)
model_agglom.fit(data)

AgglomerativeClustering(n_clusters=10)

In [158]:
model_agglom.labels_

array([5, 1, 1, ..., 1, 1, 1])

In [159]:
mutual = metrics.adjusted_mutual_info_score(labels,  model_agglom.labels_)
rand = metrics.adjusted_rand_score(labels, model_agglom.labels_)
siholitte = metrics.silhouette_score(data, model_agglom.labels_,
                                      metric='euclidean',
                                      sample_size=300)

In [160]:
print(f"Adjusted Rand Index {rand}", f"Adjusted Mutual Information: {mutual}", f"Silhouette {siholitte}", sep='\n')

Adjusted Rand Index 0.6643458356002894
Adjusted Mutual Information: 0.7934927361004462
Silhouette 0.13822212756512742


In [161]:
pca_estimator = AgglomerativeClustering(n_clusters=10)
pca_estimator.fit(data_pca)

AgglomerativeClustering(n_clusters=10)

In [162]:
mutual = metrics.adjusted_mutual_info_score(labels,  pca_estimator.labels_)
siholitte = metrics.silhouette_score(data, pca_estimator.labels_,
                                      metric='euclidean',
                                      sample_size=sample_size)
rand = metrics.adjusted_rand_score(labels, pca_estimator.labels_)

In [163]:
print(f"Adjusted Rand Index {rand}", f"Adjusted Mutual Information: {mutual}", f"Silhouette {siholitte}", sep='\n')

Adjusted Rand Index 0.5175170872709371
Adjusted Mutual Information: 0.6812080413094683
Silhouette 0.10047023052277776
