In [7]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from sklearn.cluster import DBSCAN
from sklearn.cluster import AgglomerativeClustering

n_samples = 1500
dataset = datasets.make_blobs(n_samples=n_samples, centers=2, center_box=(-7.0, 7.5),
                              cluster_std=[1.4, 1.7],
                              random_state=42)
X_2, _ = datasets.make_blobs(n_samples=n_samples, random_state=170, centers=[[-4, -3]], cluster_std=[1.9])
transformation = [[1.2, -0.8], [-0.4, 1.7]]
X_2 = np.dot(X_2, transformation)
X, y = np.concatenate((dataset[0], X_2)), np.concatenate((dataset[1], np.array([2] * len(X_2))))

#plt.rcParams['figure.figsize'] = 10, 10
#plt.scatter(X[:, 0], X[:, 1], c=y, alpha=0.5)
#plt.show()

k_means = KMeans(init="k-means++", n_clusters=3, random_state=42)
y_pred_kmeans = k_means.fit_predict(X)

gm = GaussianMixture(n_components=3, random_state=42)
y_pred_gm = gm.fit_predict(X)

ac = AgglomerativeClustering(n_clusters=3)
y_pred_ac = ac.fit_predict(X)

dbscan = DBSCAN(eps=0.9, min_samples=35)
y_pred_dbscan = dbscan.fit_predict(X)

# теперь посчитаем коэффициент силуэта
print('KMeans:                  ', silhouette_score(X=X, labels=y_pred_kmeans, metric='euclidean'))
print('GaussianMixture:         ', silhouette_score(X=X, labels=y_pred_gm, metric='euclidean'))
print('AgglomerativeClustering: ', silhouette_score(X=X, labels=y_pred_ac, metric='euclidean'))
print('DBSCAN:                  ', silhouette_score(X=X, labels=y_pred_dbscan, metric='euclidean'))


kmeans_clusters = 2
gm_clusters = 2
ac_clusters = 2
kmeans_sc = -1
gm_sc = -1
ac_sc = -1
for i in np.arange(2, 11, 1):
    k_means = KMeans(init="k-means++", n_clusters=i, random_state=42)
    y_pred_kmeans = k_means.fit_predict(X)

    gm = GaussianMixture(n_components=i, random_state=42)
    y_pred_gm = gm.fit_predict(X)

    ac = AgglomerativeClustering(n_clusters=i)
    y_pred_ac = ac.fit_predict(X)

    kmeans_silhouette_score = silhouette_score(X=X, labels=y_pred_kmeans, metric='euclidean')
    gm_silhouette_score = silhouette_score(X=X, labels=y_pred_gm, metric='euclidean')
    ac_silhouette_score = silhouette_score(X=X, labels=y_pred_ac, metric='euclidean')

    if (kmeans_silhouette_score > kmeans_sc):
        kmeans_sc = kmeans_silhouette_score
        kmeans_clusters = i

    if (gm_silhouette_score > gm_sc):
        gm_sc = gm_silhouette_score
        gm_clusters = i
    
    if (ac_silhouette_score > ac_sc):
        ac_sc = ac_silhouette_score
        ac_clusters = i


print('KMeans:                  ', kmeans_sc, kmeans_clusters)
print('GaussianMixture:         ', gm_sc, gm_clusters)
print('AgglomerativeClustering: ', ac_sc, ac_clusters)

  super()._check_params_vs_input(X, default_n_init=10)


KMeans:                   0.5134091924643247
GaussianMixture:          0.49893287606943293
AgglomerativeClustering:  0.4811992210663849
DBSCAN:                   0.4454335539277996


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


KMeans:                   0.5134091924643247 3
GaussianMixture:          0.510682306926087 4
AgglomerativeClustering:  0.48470679039805054 4
