In [1]:
%matplotlib inline

In [2]:
from time import time
import numpy as np
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")
from sklearn import metrics
from sklearn.cluster import KMeans
from sklearn.datasets import load_digits
from sklearn.decomposition import PCA
from sklearn.preprocessing import scale
from sklearn.cluster import AffinityPropagation
from sklearn.cluster import MeanShift
from sklearn.cluster import SpectralClustering
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import DBSCAN
from sklearn.cluster import AgglomerativeClustering

np.random.seed(30)

digits = load_digits()
data = scale(digits.data)

n_samples, n_features = data.shape
n_digits = len(np.unique(digits.target))
labels = digits.target

sample_size = 300

print("n_digits: %d, \t n_samples %d, \t n_features %d"
      % (n_digits, n_samples, n_features))

print('%-20s\t%-5s\t%-5s\t%-5s\t%-5s'%("init","time","NMI","Homo","Cpl"))
# print(digits)
def evalution(estimator,name,data):
    t0 = time()
    estimator.fit(data)
    print('%-20s\t%.2fs\t%.3f\t%.3f\t%.3f'
          % (name, (time() - t0),
             metrics.normalized_mutual_info_score(labels,estimator.labels_),
             metrics.homogeneity_score(labels, estimator.labels_),
             metrics.completeness_score(labels, estimator.labels_)
             ))

evalution(KMeans(init='random', n_clusters=n_digits, n_init=10),
              name="KMeans", data=data)

evalution(AffinityPropagation(),name="AffinityPropagation",data=data)

evalution(MeanShift(bandwidth=5),name="MeanShift",data=data)

evalution(SpectralClustering(n_clusters=n_digits,
                             assign_labels="discretize",
                             random_state=0),
          name="SpectralClustering",
          data=data)

evalution(AgglomerativeClustering(linkage="ward", n_clusters=10),name="AgglomerativeClustering",data=data)

evalution(AgglomerativeClustering(n_clusters=n_digits),name="AgglomerativeClustering",data=data)

evalution(DBSCAN(eps=4, min_samples=15),name="DBSCAN",data=data)

n_digits: 10, 	 n_samples 1797, 	 n_features 64
init                	time 	NMI  	Homo 	Cpl  
KMeans              	0.13s	0.627	0.603	0.652
AffinityPropagation 	3.86s	0.655	0.932	0.460
MeanShift           	8.35s	0.675	0.833	0.546
SpectralClustering  	310.92s	0.030	0.006	0.148
AgglomerativeClustering	0.19s	0.797	0.758	0.836
AgglomerativeClustering	0.14s	0.797	0.758	0.836
DBSCAN              	0.37s	0.444	0.297	0.663
