Skip to content

Commit

Permalink
add clustering algos tutorial
Browse files Browse the repository at this point in the history
  • Loading branch information
x4nth055 committed Jan 20, 2023
1 parent c03bf85 commit 99b7a18
Show file tree
Hide file tree
Showing 15 changed files with 251 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ This is a repository of all the tutorials of [The Python Code](https://www.thepy
- [Dimensionality Reduction: Using Feature Selection in Python](https://www.thepythoncode.com/article/dimensionality-reduction-feature-selection). ([code](machine-learning/dimensionality-reduction-feature-selection))
- [A Guide to Explainable AI Using Python](https://www.thepythoncode.com/article/explainable-ai-model-python). ([code](machine-learning/explainable-ai))
- [Autoencoders for Dimensionality Reduction using TensorFlow in Python](https://www.thepythoncode.com/article/feature-extraction-dimensionality-reduction-autoencoders-python-keras). ([code](machine-learning/feature-extraction-autoencoders))
- [Exploring the Different Types of Clustering Algorithms in Machine Learning with Python](https://www.thepythoncode.com/article/clustering-algorithms-in-machine-learning-with-python). ([code](machine-learning/clustering-algorithms))

- ### [General Python Topics](https://www.thepythoncode.com/topic/general-python-topics)
- [How to Make Facebook Messenger bot in Python](https://www.thepythoncode.com/article/make-bot-fbchat-python). ([code](general/messenger-bot))
Expand Down
1 change: 1 addition & 0 deletions machine-learning/clustering-algorithms/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# [Exploring the Different Types of Clustering Algorithms in Machine Learning with Python](https://www.thepythoncode.com/article/clustering-algorithms-in-machine-learning-with-python)
23 changes: 23 additions & 0 deletions machine-learning/clustering-algorithms/affinity_propagation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import numpy as np
from sklearn.datasets import make_classification
from sklearn.cluster import AffinityPropagation
from matplotlib import pyplot

X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
n_redundant=0, n_clusters_per_class=1, random_state=10)

# initialize the model
m = AffinityPropagation(damping=0.9)
# fit the model
m.fit(X)
# predict the cluster for each data point
p = m.predict(X)
# unique clusters
cl = np.unique(p)
# plot the data points and cluster centers
for c in cl:
r = np.where(c == p)
pyplot.title('Affinity Propagation Clustering')
pyplot.scatter(X[r, 0], X[r, 1])
# show the plot
pyplot.show()
20 changes: 20 additions & 0 deletions machine-learning/clustering-algorithms/agglomerative_clustering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import numpy as np
from sklearn.datasets import make_classification
from sklearn.cluster import AgglomerativeClustering
from matplotlib import pyplot

X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
n_redundant=0, n_clusters_per_class=1, random_state=10)
# init the model with 3 clusters
m = AgglomerativeClustering(n_clusters=3)
# predict the cluster for each data point after fitting the model
p = m.fit_predict(X)
# unique clusters
cl = np.unique(p)
# plot the data points and cluster centers
for c in cl:
r = np.where(c == p)
pyplot.title('Agglomerative Clustering')
pyplot.scatter(X[r, 0], X[r, 1])
# show the plot
pyplot.show()
20 changes: 20 additions & 0 deletions machine-learning/clustering-algorithms/birch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import numpy as np
from sklearn.datasets import make_classification
from sklearn.cluster import Birch
from matplotlib import pyplot

X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
n_redundant=0, n_clusters_per_class=1, random_state=10)
# init the model with 2 clusters
m = Birch(threshold=0.05, n_clusters=2)
# predict the cluster for each data point after fitting the model
p = m.fit_predict(X)
# unique clusters
cl = np.unique(p)
# plot the data points and cluster centers
for c in cl:
r = np.where(c == p)
pyplot.title('Birch Clustering')
pyplot.scatter(X[r, 0], X[r, 1])
# show the plot
pyplot.show()
20 changes: 20 additions & 0 deletions machine-learning/clustering-algorithms/dbscan_clustering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import numpy as np
from sklearn.datasets import make_classification
from sklearn.cluster import DBSCAN
from matplotlib import pyplot

X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
n_redundant=0, n_clusters_per_class=1, random_state=10)
# init the model
m = DBSCAN(eps=0.05, min_samples=10)
# predict the cluster for each data point after fitting the model
p = m.fit_predict(X)
# unique clusters
cl = np.unique(p)
# plot the data points and cluster centers
for c in cl:
r = np.where(c == p)
pyplot.title('DBSCAN Clustering')
pyplot.scatter(X[r, 0], X[r, 1])
# show the plot
pyplot.show()
20 changes: 20 additions & 0 deletions machine-learning/clustering-algorithms/gmm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import numpy as np
from sklearn.datasets import make_classification
from sklearn.mixture import GaussianMixture
from matplotlib import pyplot

X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
n_redundant=0, n_clusters_per_class=1, random_state=10)
# init the model with 2 components
m = GaussianMixture(n_components=2)
# predict the cluster for each data point after fitting the model
p = m.fit_predict(X)
# unique clusters
cl = np.unique(p)
# plot the data points and cluster centers
for c in cl:
r = np.where(c == p)
pyplot.title('Gaussian Mixture Clustering')
pyplot.scatter(X[r, 0], X[r, 1])
# show the plot
pyplot.show()
24 changes: 24 additions & 0 deletions machine-learning/clustering-algorithms/kmeans_clustering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import numpy as np
from sklearn.datasets import make_classification
from sklearn.cluster import KMeans
from matplotlib import pyplot

# 2 features, 2 informative, 0 redundant, 1 cluster per class
X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
n_redundant=0, n_clusters_per_class=1, random_state=10)

# 2 clusters
m = KMeans(n_clusters=2)
# fit the model
m.fit(X)
# predict the cluster for each data point
p = m.predict(X)
# unique clusters
cl = np.unique(p)
# plot the data points and cluster centers
for c in cl:
r = np.where(c == p)
pyplot.title('K-means (No. of Clusters = 3)')
pyplot.scatter(X[r, 0], X[r, 1])
# show the plot
pyplot.show()
20 changes: 20 additions & 0 deletions machine-learning/clustering-algorithms/meanshift_clustering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import numpy as np
from sklearn.datasets import make_classification
from sklearn.cluster import MeanShift
from matplotlib import pyplot

X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
n_redundant=0, n_clusters_per_class=1, random_state=10)
# init the model
m = MeanShift()
# predict the cluster for each data point after fitting the model
p = m.fit_predict(X)
# unique clusters
cl = np.unique(p)
# plot the data points and cluster centers
for c in cl:
r = np.where(c == p)
pyplot.title('Mean Shift Clustering')
pyplot.scatter(X[r, 0], X[r, 1])
# show the plot
pyplot.show()
10 changes: 10 additions & 0 deletions machine-learning/clustering-algorithms/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from sklearn import metrics

y_true = [5, 3, 5, 4, 4, 5]
y_pred = [3, 5, 5, 4, 3, 4]
# homogeneity: each cluster contains only members of a single class.
print(metrics.homogeneity_score(y_true, y_pred))
# completeness: all members of a given class are assigned to the same cluster.
print(metrics.completeness_score(y_true, y_pred))
# v-measure: harmonic mean of homogeneity and completeness
print(metrics.v_measure_score(y_true, y_pred))
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import numpy as np
from sklearn.datasets import make_classification
from sklearn.cluster import MiniBatchKMeans
from matplotlib import pyplot

X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
n_redundant=0, n_clusters_per_class=1, random_state=10)
# 3 clusters
m = MiniBatchKMeans(n_clusters=3)
# fit the model
m.fit(X)
# predict the cluster for each data point
p = m.predict(X)
# unique clusters
cl = np.unique(p)
# plot the data points and cluster centers
for c in cl:
r = np.where(c == p)
pyplot.title('Mini Batch K-means')
pyplot.scatter(X[r, 0], X[r, 1])
# show the plot
pyplot.show()
21 changes: 21 additions & 0 deletions machine-learning/clustering-algorithms/optics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import numpy as np
from sklearn.datasets import make_classification
from sklearn.cluster import OPTICS
from matplotlib import pyplot

X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
n_redundant=0, n_clusters_per_class=1, random_state=10)

# init the model
m = OPTICS(eps=0.5, min_samples=10)
# predict the cluster for each data point after fitting the model
p = m.fit_predict(X)
# unique clusters
cl = np.unique(p)
# plot the data points and cluster centers
for c in cl:
r = np.where(c == p)
pyplot.title('OPTICS Clustering')
pyplot.scatter(X[r, 0], X[r, 1])
# show the plot
pyplot.show()
3 changes: 3 additions & 0 deletions machine-learning/clustering-algorithms/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
scikit-learn
numpy
matplotlib
20 changes: 20 additions & 0 deletions machine-learning/clustering-algorithms/spectral_clustering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import numpy as np
from sklearn.datasets import make_classification
from sklearn.cluster import SpectralClustering
from matplotlib import pyplot

X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
n_redundant=0, n_clusters_per_class=1, random_state=10)
# init the model with 3 clusters
m = SpectralClustering(n_clusters=3)
# predict the cluster for each data point after fitting the model
p = m.fit_predict(X)
# unique clusters
cl = np.unique(p)
# plot the data points and cluster centers
for c in cl:
r = np.where(c == p)
pyplot.title('Spectral Clustering')
pyplot.scatter(X[r, 0], X[r, 1])
# show the plot
pyplot.show()
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import numpy as np
from sklearn.datasets import make_classification
from sklearn.cluster import MiniBatchKMeans
from sklearn.cluster import KMeans
from matplotlib import pyplot
import timeit

X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
n_redundant=0, n_clusters_per_class=1, random_state=10)
# start timer for Mini Batch K-Means
t1_mkm = timeit.default_timer()
m = MiniBatchKMeans(n_clusters=2)
m.fit(X)
p = m.predict(X)
# stop timer for Mini Batch K-Means
t2_mkm = timeit.default_timer()
# start timer for K-Means
t1_km = timeit.default_timer()
m = KMeans(n_clusters=2)
m.fit(X)
p = m.predict(X)
# stop timer for K-Means
t2_km = timeit.default_timer()
# print time difference
print("Time difference between Mini Batch K-Means and K-Means = ",
(t2_km-t1_km)-(t2_mkm-t1_mkm))

0 comments on commit 99b7a18

Please sign in to comment.