In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets

In [None]:
iris = datasets.load_iris()

In [None]:
x = iris.data
y = iris.target

# PCA

In [None]:
from sklearn.decomposition import PCA

In [None]:
iris = datasets.load_iris()

In [None]:
x.shape

In [None]:
pca = PCA(n_components=2)
x_reduced = pca.fit_transform(x)

x_reduced.shape

# LLE

In [None]:
from sklearn.manifold import locally_linear_embedding

In [None]:
points, color = datasets.make_swiss_roll(n_samples=1500)

In [None]:
lle, err = locally_linear_embedding(
    points, n_neighbors=12, n_components=2)

# K-means

In [None]:
from sklearn.cluster import KMeans

In [None]:
model = KMeans(n_clusters=3, n_init=1, max_iter=100)
model.fit(x)

all_predictions = model.predict(x)
centroids = model.cluster_centers_

In [None]:
centroids

In [None]:
plt.scatter(x[:,0], x[:,1], c=all_predictions)
plt.scatter(centroids[:,0], centroids[:,1], marker='x', color="black")
plt.show

## Finding optimal number of clusters

In [None]:
k_values = []
intertia_scores = []

for k in range(2,15):
    model = KMeans(n_clusters=k)
    model.fit(x)
    intertia_scores.append(model.inertia_)
    k_values.append(k)

module_of_second_derivative = np.abs(np.diff(np.diff(intertia_scores)))

In [None]:
plt.plot(k_values, intertia_scores)
plt.scatter(k_values, intertia_scores)
plt.plot(k_values[1:-1], module_of_second_derivative, color='red')
plt.scatter(k_values[1:-1], module_of_second_derivative, color='red')
plt.show()

# Hierarchical clustering

In [None]:
from sklearn.cluster import AgglomerativeClustering

In [None]:
linkage_method = "ward"

model = AgglomerativeClustering(n_clusters=3, linkage=linkage_method)
model.fit(x)

agclust_predictions = model.labels_

In [None]:
plt.scatter(x[:,0], x[:,1], c=agclust_predictions)
plt.show

In [None]:
from scipy.cluster.hierarchy import dendrogram
from scipy.cluster.hierarchy import linkage

In [None]:
linkage_method = "ward"

dend = dendrogram(linkage(x, method='average'), truncate_mode='lastp', p=60)

# DBSCAN

In [None]:
from sklearn.cluster import DBSCAN

In [None]:
model = DBSCAN()
model.fit(x)

dens_predictions = model.labels_

In [None]:
plt.scatter(x[:,0], x[:,1], c=dens_predictions)
plt.show