-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
15 changed files
with
251 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
# [Exploring the Different Types of Clustering Algorithms in Machine Learning with Python](https://www.thepythoncode.com/article/clustering-algorithms-in-machine-learning-with-python) |
23 changes: 23 additions & 0 deletions
23
machine-learning/clustering-algorithms/affinity_propagation.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
import numpy as np | ||
from sklearn.datasets import make_classification | ||
from sklearn.cluster import AffinityPropagation | ||
from matplotlib import pyplot | ||
|
||
X, y = make_classification(n_samples=1000, n_features=2, n_informative=2, | ||
n_redundant=0, n_clusters_per_class=1, random_state=10) | ||
|
||
# initialize the model | ||
m = AffinityPropagation(damping=0.9) | ||
# fit the model | ||
m.fit(X) | ||
# predict the cluster for each data point | ||
p = m.predict(X) | ||
# unique clusters | ||
cl = np.unique(p) | ||
# plot the data points and cluster centers | ||
for c in cl: | ||
r = np.where(c == p) | ||
pyplot.title('Affinity Propagation Clustering') | ||
pyplot.scatter(X[r, 0], X[r, 1]) | ||
# show the plot | ||
pyplot.show() |
20 changes: 20 additions & 0 deletions
20
machine-learning/clustering-algorithms/agglomerative_clustering.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
import numpy as np | ||
from sklearn.datasets import make_classification | ||
from sklearn.cluster import AgglomerativeClustering | ||
from matplotlib import pyplot | ||
|
||
X, y = make_classification(n_samples=1000, n_features=2, n_informative=2, | ||
n_redundant=0, n_clusters_per_class=1, random_state=10) | ||
# init the model with 3 clusters | ||
m = AgglomerativeClustering(n_clusters=3) | ||
# predict the cluster for each data point after fitting the model | ||
p = m.fit_predict(X) | ||
# unique clusters | ||
cl = np.unique(p) | ||
# plot the data points and cluster centers | ||
for c in cl: | ||
r = np.where(c == p) | ||
pyplot.title('Agglomerative Clustering') | ||
pyplot.scatter(X[r, 0], X[r, 1]) | ||
# show the plot | ||
pyplot.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
import numpy as np | ||
from sklearn.datasets import make_classification | ||
from sklearn.cluster import Birch | ||
from matplotlib import pyplot | ||
|
||
X, y = make_classification(n_samples=1000, n_features=2, n_informative=2, | ||
n_redundant=0, n_clusters_per_class=1, random_state=10) | ||
# init the model with 2 clusters | ||
m = Birch(threshold=0.05, n_clusters=2) | ||
# predict the cluster for each data point after fitting the model | ||
p = m.fit_predict(X) | ||
# unique clusters | ||
cl = np.unique(p) | ||
# plot the data points and cluster centers | ||
for c in cl: | ||
r = np.where(c == p) | ||
pyplot.title('Birch Clustering') | ||
pyplot.scatter(X[r, 0], X[r, 1]) | ||
# show the plot | ||
pyplot.show() |
20 changes: 20 additions & 0 deletions
20
machine-learning/clustering-algorithms/dbscan_clustering.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
import numpy as np | ||
from sklearn.datasets import make_classification | ||
from sklearn.cluster import DBSCAN | ||
from matplotlib import pyplot | ||
|
||
X, y = make_classification(n_samples=1000, n_features=2, n_informative=2, | ||
n_redundant=0, n_clusters_per_class=1, random_state=10) | ||
# init the model | ||
m = DBSCAN(eps=0.05, min_samples=10) | ||
# predict the cluster for each data point after fitting the model | ||
p = m.fit_predict(X) | ||
# unique clusters | ||
cl = np.unique(p) | ||
# plot the data points and cluster centers | ||
for c in cl: | ||
r = np.where(c == p) | ||
pyplot.title('DBSCAN Clustering') | ||
pyplot.scatter(X[r, 0], X[r, 1]) | ||
# show the plot | ||
pyplot.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
import numpy as np | ||
from sklearn.datasets import make_classification | ||
from sklearn.mixture import GaussianMixture | ||
from matplotlib import pyplot | ||
|
||
X, y = make_classification(n_samples=1000, n_features=2, n_informative=2, | ||
n_redundant=0, n_clusters_per_class=1, random_state=10) | ||
# init the model with 2 components | ||
m = GaussianMixture(n_components=2) | ||
# predict the cluster for each data point after fitting the model | ||
p = m.fit_predict(X) | ||
# unique clusters | ||
cl = np.unique(p) | ||
# plot the data points and cluster centers | ||
for c in cl: | ||
r = np.where(c == p) | ||
pyplot.title('Gaussian Mixture Clustering') | ||
pyplot.scatter(X[r, 0], X[r, 1]) | ||
# show the plot | ||
pyplot.show() |
24 changes: 24 additions & 0 deletions
24
machine-learning/clustering-algorithms/kmeans_clustering.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
import numpy as np | ||
from sklearn.datasets import make_classification | ||
from sklearn.cluster import KMeans | ||
from matplotlib import pyplot | ||
|
||
# 2 features, 2 informative, 0 redundant, 1 cluster per class | ||
X, y = make_classification(n_samples=1000, n_features=2, n_informative=2, | ||
n_redundant=0, n_clusters_per_class=1, random_state=10) | ||
|
||
# 2 clusters | ||
m = KMeans(n_clusters=2) | ||
# fit the model | ||
m.fit(X) | ||
# predict the cluster for each data point | ||
p = m.predict(X) | ||
# unique clusters | ||
cl = np.unique(p) | ||
# plot the data points and cluster centers | ||
for c in cl: | ||
r = np.where(c == p) | ||
pyplot.title('K-means (No. of Clusters = 3)') | ||
pyplot.scatter(X[r, 0], X[r, 1]) | ||
# show the plot | ||
pyplot.show() |
20 changes: 20 additions & 0 deletions
20
machine-learning/clustering-algorithms/meanshift_clustering.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
import numpy as np | ||
from sklearn.datasets import make_classification | ||
from sklearn.cluster import MeanShift | ||
from matplotlib import pyplot | ||
|
||
X, y = make_classification(n_samples=1000, n_features=2, n_informative=2, | ||
n_redundant=0, n_clusters_per_class=1, random_state=10) | ||
# init the model | ||
m = MeanShift() | ||
# predict the cluster for each data point after fitting the model | ||
p = m.fit_predict(X) | ||
# unique clusters | ||
cl = np.unique(p) | ||
# plot the data points and cluster centers | ||
for c in cl: | ||
r = np.where(c == p) | ||
pyplot.title('Mean Shift Clustering') | ||
pyplot.scatter(X[r, 0], X[r, 1]) | ||
# show the plot | ||
pyplot.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
from sklearn import metrics | ||
|
||
y_true = [5, 3, 5, 4, 4, 5] | ||
y_pred = [3, 5, 5, 4, 3, 4] | ||
# homogeneity: each cluster contains only members of a single class. | ||
print(metrics.homogeneity_score(y_true, y_pred)) | ||
# completeness: all members of a given class are assigned to the same cluster. | ||
print(metrics.completeness_score(y_true, y_pred)) | ||
# v-measure: harmonic mean of homogeneity and completeness | ||
print(metrics.v_measure_score(y_true, y_pred)) |
22 changes: 22 additions & 0 deletions
22
machine-learning/clustering-algorithms/minibatch_kmeans_clustering.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
import numpy as np | ||
from sklearn.datasets import make_classification | ||
from sklearn.cluster import MiniBatchKMeans | ||
from matplotlib import pyplot | ||
|
||
X, y = make_classification(n_samples=1000, n_features=2, n_informative=2, | ||
n_redundant=0, n_clusters_per_class=1, random_state=10) | ||
# 3 clusters | ||
m = MiniBatchKMeans(n_clusters=3) | ||
# fit the model | ||
m.fit(X) | ||
# predict the cluster for each data point | ||
p = m.predict(X) | ||
# unique clusters | ||
cl = np.unique(p) | ||
# plot the data points and cluster centers | ||
for c in cl: | ||
r = np.where(c == p) | ||
pyplot.title('Mini Batch K-means') | ||
pyplot.scatter(X[r, 0], X[r, 1]) | ||
# show the plot | ||
pyplot.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
import numpy as np | ||
from sklearn.datasets import make_classification | ||
from sklearn.cluster import OPTICS | ||
from matplotlib import pyplot | ||
|
||
X, y = make_classification(n_samples=1000, n_features=2, n_informative=2, | ||
n_redundant=0, n_clusters_per_class=1, random_state=10) | ||
|
||
# init the model | ||
m = OPTICS(eps=0.5, min_samples=10) | ||
# predict the cluster for each data point after fitting the model | ||
p = m.fit_predict(X) | ||
# unique clusters | ||
cl = np.unique(p) | ||
# plot the data points and cluster centers | ||
for c in cl: | ||
r = np.where(c == p) | ||
pyplot.title('OPTICS Clustering') | ||
pyplot.scatter(X[r, 0], X[r, 1]) | ||
# show the plot | ||
pyplot.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
scikit-learn | ||
numpy | ||
matplotlib |
20 changes: 20 additions & 0 deletions
20
machine-learning/clustering-algorithms/spectral_clustering.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
import numpy as np | ||
from sklearn.datasets import make_classification | ||
from sklearn.cluster import SpectralClustering | ||
from matplotlib import pyplot | ||
|
||
X, y = make_classification(n_samples=1000, n_features=2, n_informative=2, | ||
n_redundant=0, n_clusters_per_class=1, random_state=10) | ||
# init the model with 3 clusters | ||
m = SpectralClustering(n_clusters=3) | ||
# predict the cluster for each data point after fitting the model | ||
p = m.fit_predict(X) | ||
# unique clusters | ||
cl = np.unique(p) | ||
# plot the data points and cluster centers | ||
for c in cl: | ||
r = np.where(c == p) | ||
pyplot.title('Spectral Clustering') | ||
pyplot.scatter(X[r, 0], X[r, 1]) | ||
# show the plot | ||
pyplot.show() |
26 changes: 26 additions & 0 deletions
26
machine-learning/clustering-algorithms/time_diff_minibatch_and_kmeans.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
import numpy as np | ||
from sklearn.datasets import make_classification | ||
from sklearn.cluster import MiniBatchKMeans | ||
from sklearn.cluster import KMeans | ||
from matplotlib import pyplot | ||
import timeit | ||
|
||
X, y = make_classification(n_samples=1000, n_features=2, n_informative=2, | ||
n_redundant=0, n_clusters_per_class=1, random_state=10) | ||
# start timer for Mini Batch K-Means | ||
t1_mkm = timeit.default_timer() | ||
m = MiniBatchKMeans(n_clusters=2) | ||
m.fit(X) | ||
p = m.predict(X) | ||
# stop timer for Mini Batch K-Means | ||
t2_mkm = timeit.default_timer() | ||
# start timer for K-Means | ||
t1_km = timeit.default_timer() | ||
m = KMeans(n_clusters=2) | ||
m.fit(X) | ||
p = m.predict(X) | ||
# stop timer for K-Means | ||
t2_km = timeit.default_timer() | ||
# print time difference | ||
print("Time difference between Mini Batch K-Means and K-Means = ", | ||
(t2_km-t1_km)-(t2_mkm-t1_mkm)) |