In [43]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from sklearn.cluster import DBSCAN
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics.cluster import v_measure_score
from sklearn.cluster import MiniBatchKMeans
from sklearn.neighbors import kneighbors_graph
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

n_samples = 1500
dataset = datasets.make_blobs(n_samples=n_samples, centers=2, center_box=(-7.0, 7.5), cluster_std=[1.4, 1.7], random_state=42)
X_2, _ = datasets.make_blobs(n_samples=n_samples, random_state=170, centers=[[-4, -3]], cluster_std=[1.9])
transformation = [[1.2, -0.8], [-0.4, 1.7]]
X_2 = np.dot(X_2, transformation)
X, y = np.concatenate((dataset[0], X_2)), np.concatenate((dataset[1], np.array([2] * len(X_2))))

#plt.rcParams['figure.figsize'] = 10, 10
#plt.scatter(X[:, 0], X[:, 1], c=y, alpha=0.5)
#plt.show()

k_means = KMeans(init="k-means++", n_clusters=3, random_state=42)
y_pred_kmeans = k_means.fit_predict(X)

gm = GaussianMixture(n_components=3, random_state=42)
y_pred_gm = gm.fit_predict(X)

ac = AgglomerativeClustering(n_clusters=3)
y_pred_ac = ac.fit_predict(X)

dbscan8 = DBSCAN(eps=0.8, min_samples=35)
y_pred_dbscan8 = dbscan8.fit_predict(X)

dbscan9 = DBSCAN(eps=0.9, min_samples=35)
y_pred_dbscan9 = dbscan9.fit_predict(X)

print('KMeans:                  ', v_measure_score(labels_true=y, labels_pred=y_pred_kmeans))
print('GaussianMixture:         ', v_measure_score(labels_true=y, labels_pred=y_pred_gm))
print('AgglomerativeClustering: ', v_measure_score(labels_true=y, labels_pred=y_pred_ac))
print('DBSCAN_8:                ', v_measure_score(labels_true=y, labels_pred=y_pred_dbscan8))
print('DBSCAN_9:                ', v_measure_score(labels_true=y, labels_pred=y_pred_dbscan9))

k_means_1 = KMeans(init="k-means++", n_init=1, n_clusters=3, random_state=42)
y_pred_kmeans_1 = k_means_1.fit_predict(X)

k_means_2 = KMeans(init="random", n_init=1, n_clusters=3, random_state=42)
y_pred_kmeans_2 = k_means_2.fit_predict(X)

kmeans_mini_batch = MiniBatchKMeans(n_init=1, n_clusters=3, random_state=42)
y_pred_kmeans_mb = kmeans_mini_batch.fit_predict(X)

print('KMeans 1:         ', v_measure_score(labels_true=y, labels_pred=y_pred_kmeans_1))
print('KMeans 2:         ', v_measure_score(labels_true=y, labels_pred=y_pred_kmeans_2))
print('KMeans MB:        ', v_measure_score(labels_true=y, labels_pred=y_pred_kmeans_mb))

ac_w = AgglomerativeClustering(n_clusters=3, linkage='ward')
y_pred_ac_w = ac_w.fit_predict(X)

ac_a = AgglomerativeClustering(n_clusters=3, linkage='average')
y_pred_ac_a = ac_a.fit_predict(X)

ac_c = AgglomerativeClustering(n_clusters=3, linkage='complete')
y_pred_ac_c = ac_c.fit_predict(X)

ac_s = AgglomerativeClustering(n_clusters=3, linkage='single')
y_pred_ac_s = ac_s.fit_predict(X)

connectivity = kneighbors_graph(X, n_neighbors=6, include_self=False)
connectivity = 0.5 * (connectivity + connectivity.T)
ac_k = AgglomerativeClustering(n_clusters=3, connectivity=connectivity)
y_pred_ac_k = ac_k.fit_predict(X)

ac_ts = AgglomerativeClustering(n_clusters=3, linkage='ward')
y_pred_ac_ts = ac_ts.fit_predict(StandardScaler().fit_transform(X))

ac_tm = AgglomerativeClustering(n_clusters=3, linkage='ward')
y_pred_ac_tm = ac_tm.fit_predict(MinMaxScaler().fit_transform(X))

print('ac_w :        ', v_measure_score(labels_true=y, labels_pred=y_pred_ac_w))
print('ac_a :        ', v_measure_score(labels_true=y, labels_pred=y_pred_ac_a))
print('ac_c :        ', v_measure_score(labels_true=y, labels_pred=y_pred_ac_c))
print('ac_s :        ', v_measure_score(labels_true=y, labels_pred=y_pred_ac_s))
print('ac_k :        ', v_measure_score(labels_true=y, labels_pred=y_pred_ac_k))
print('ac_ts:        ', v_measure_score(labels_true=y, labels_pred=y_pred_ac_ts))
print('ac_tm:        ', v_measure_score(labels_true=y, labels_pred=y_pred_ac_tm))

  super()._check_params_vs_input(X, default_n_init=10)


KMeans:                   0.7978517230695474
GaussianMixture:          0.9332760595996924
AgglomerativeClustering:  0.703747024360433
DBSCAN_8:                 0.7063100442576923
DBSCAN_9:                 0.7732549110297919
KMeans 1:          0.7900627599380985
KMeans 2:          0.5990620007488798
KMeans MB:         0.7750223553202533
ac_w :         0.703747024360433
ac_a :         0.539161632594271
ac_c :         0.4105143541946565
ac_s :         0.0008842106330108959
ac_k :         0.902634398342219
ac_ts:         0.9079037199053294
ac_tm:         0.8929241488344335
