add clustering algos tutorial

x4nth055 · Jan 20, 2023 · 99b7a18 · 99b7a18
1 parent c03bf85
commit 99b7a18
Show file tree

Hide file tree

Showing 15 changed files with 251 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -100,6 +100,7 @@ This is a repository of all the tutorials of [The Python Code](https://www.thepy
     - [Dimensionality Reduction: Using Feature Selection in Python](https://www.thepythoncode.com/article/dimensionality-reduction-feature-selection). ([code](machine-learning/dimensionality-reduction-feature-selection))
     - [A Guide to Explainable AI Using Python](https://www.thepythoncode.com/article/explainable-ai-model-python). ([code](machine-learning/explainable-ai))
     - [Autoencoders for Dimensionality Reduction using TensorFlow in Python](https://www.thepythoncode.com/article/feature-extraction-dimensionality-reduction-autoencoders-python-keras). ([code](machine-learning/feature-extraction-autoencoders))
+    - [Exploring the Different Types of Clustering Algorithms in Machine Learning with Python](https://www.thepythoncode.com/article/clustering-algorithms-in-machine-learning-with-python). ([code](machine-learning/clustering-algorithms))
 
 - ### [General Python Topics](https://www.thepythoncode.com/topic/general-python-topics)
     - [How to Make Facebook Messenger bot in Python](https://www.thepythoncode.com/article/make-bot-fbchat-python). ([code](general/messenger-bot))

diff --git a/machine-learning/clustering-algorithms/README.md b/machine-learning/clustering-algorithms/README.md
@@ -0,0 +1 @@
+# [Exploring the Different Types of Clustering Algorithms in Machine Learning with Python](https://www.thepythoncode.com/article/clustering-algorithms-in-machine-learning-with-python)
diff --git a/machine-learning/clustering-algorithms/affinity_propagation.py b/machine-learning/clustering-algorithms/affinity_propagation.py
@@ -0,0 +1,23 @@
+import numpy as np
+from sklearn.datasets import make_classification
+from sklearn.cluster import AffinityPropagation
+from matplotlib import pyplot
+
+X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
+                           n_redundant=0, n_clusters_per_class=1, random_state=10)
+
+# initialize the model
+m = AffinityPropagation(damping=0.9)
+# fit the model
+m.fit(X)
+# predict the cluster for each data point
+p = m.predict(X)
+# unique clusters
+cl = np.unique(p)
+# plot the data points and cluster centers
+for c in cl:
+    r = np.where(c == p)
+    pyplot.title('Affinity Propagation Clustering')
+    pyplot.scatter(X[r, 0], X[r, 1])
+# show the plot
+pyplot.show()
diff --git a/machine-learning/clustering-algorithms/agglomerative_clustering.py b/machine-learning/clustering-algorithms/agglomerative_clustering.py
@@ -0,0 +1,20 @@
+import numpy as np
+from sklearn.datasets import make_classification
+from sklearn.cluster import AgglomerativeClustering
+from matplotlib import pyplot
+
+X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
+                           n_redundant=0, n_clusters_per_class=1, random_state=10)
+# init the model with 3 clusters
+m = AgglomerativeClustering(n_clusters=3)
+# predict the cluster for each data point after fitting the model
+p = m.fit_predict(X) 
+# unique clusters
+cl = np.unique(p)
+# plot the data points and cluster centers
+for c in cl:
+    r = np.where(c == p)
+    pyplot.title('Agglomerative Clustering')
+    pyplot.scatter(X[r, 0], X[r, 1])
+# show the plot
+pyplot.show()
diff --git a/machine-learning/clustering-algorithms/birch.py b/machine-learning/clustering-algorithms/birch.py
@@ -0,0 +1,20 @@
+import numpy as np
+from sklearn.datasets import make_classification
+from sklearn.cluster import Birch
+from matplotlib import pyplot
+
+X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
+                           n_redundant=0, n_clusters_per_class=1, random_state=10)
+# init the model with 2 clusters
+m = Birch(threshold=0.05, n_clusters=2)
+# predict the cluster for each data point after fitting the model
+p = m.fit_predict(X) 
+# unique clusters
+cl = np.unique(p)
+# plot the data points and cluster centers
+for c in cl:
+    r = np.where(c == p)
+    pyplot.title('Birch Clustering')
+    pyplot.scatter(X[r, 0], X[r, 1])
+# show the plot
+pyplot.show()
diff --git a/machine-learning/clustering-algorithms/dbscan_clustering.py b/machine-learning/clustering-algorithms/dbscan_clustering.py
@@ -0,0 +1,20 @@
+import numpy as np
+from sklearn.datasets import make_classification
+from sklearn.cluster import DBSCAN
+from matplotlib import pyplot
+
+X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
+                           n_redundant=0, n_clusters_per_class=1, random_state=10)
+# init the model
+m = DBSCAN(eps=0.05, min_samples=10)
+# predict the cluster for each data point after fitting the model
+p = m.fit_predict(X) 
+# unique clusters
+cl = np.unique(p)
+# plot the data points and cluster centers
+for c in cl:
+    r = np.where(c == p)
+    pyplot.title('DBSCAN Clustering')
+    pyplot.scatter(X[r, 0], X[r, 1])
+# show the plot
+pyplot.show()
diff --git a/machine-learning/clustering-algorithms/gmm.py b/machine-learning/clustering-algorithms/gmm.py
@@ -0,0 +1,20 @@
+import numpy as np
+from sklearn.datasets import make_classification
+from sklearn.mixture import GaussianMixture
+from matplotlib import pyplot
+
+X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
+                           n_redundant=0, n_clusters_per_class=1, random_state=10)
+# init the model with 2 components
+m = GaussianMixture(n_components=2)
+# predict the cluster for each data point after fitting the model
+p = m.fit_predict(X)
+# unique clusters
+cl = np.unique(p)
+# plot the data points and cluster centers
+for c in cl:
+    r = np.where(c == p)
+    pyplot.title('Gaussian Mixture Clustering')
+    pyplot.scatter(X[r, 0], X[r, 1])
+# show the plot
+pyplot.show()
diff --git a/machine-learning/clustering-algorithms/kmeans_clustering.py b/machine-learning/clustering-algorithms/kmeans_clustering.py
@@ -0,0 +1,24 @@
+import numpy as np
+from sklearn.datasets import make_classification
+from sklearn.cluster import KMeans
+from matplotlib import pyplot
+
+# 2 features, 2 informative, 0 redundant, 1 cluster per class
+X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
+                           n_redundant=0, n_clusters_per_class=1, random_state=10) 
+
+# 2 clusters
+m = KMeans(n_clusters=2) 
+# fit the model
+m.fit(X)
+# predict the cluster for each data point
+p = m.predict(X) 
+# unique clusters
+cl = np.unique(p)
+# plot the data points and cluster centers
+for c in cl:
+    r = np.where(c == p)
+    pyplot.title('K-means (No. of Clusters = 3)')
+    pyplot.scatter(X[r, 0], X[r, 1])
+# show the plot
+pyplot.show()
diff --git a/machine-learning/clustering-algorithms/meanshift_clustering.py b/machine-learning/clustering-algorithms/meanshift_clustering.py
@@ -0,0 +1,20 @@
+import numpy as np
+from sklearn.datasets import make_classification
+from sklearn.cluster import MeanShift
+from matplotlib import pyplot
+
+X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
+                           n_redundant=0, n_clusters_per_class=1, random_state=10)
+# init the model
+m = MeanShift()
+# predict the cluster for each data point after fitting the model
+p = m.fit_predict(X)
+# unique clusters
+cl = np.unique(p)
+# plot the data points and cluster centers
+for c in cl:
+    r = np.where(c == p)
+    pyplot.title('Mean Shift Clustering')
+    pyplot.scatter(X[r, 0], X[r, 1])
+# show the plot
+pyplot.show()
diff --git a/machine-learning/clustering-algorithms/metrics.py b/machine-learning/clustering-algorithms/metrics.py
@@ -0,0 +1,10 @@
+from sklearn import metrics
+
+y_true = [5, 3, 5, 4, 4, 5]
+y_pred = [3, 5, 5, 4, 3, 4]
+# homogeneity: each cluster contains only members of a single class.
+print(metrics.homogeneity_score(y_true, y_pred))
+# completeness: all members of a given class are assigned to the same cluster.
+print(metrics.completeness_score(y_true, y_pred))
+# v-measure: harmonic mean of homogeneity and completeness
+print(metrics.v_measure_score(y_true, y_pred))
diff --git a/machine-learning/clustering-algorithms/minibatch_kmeans_clustering.py b/machine-learning/clustering-algorithms/minibatch_kmeans_clustering.py
@@ -0,0 +1,22 @@
+import numpy as np
+from sklearn.datasets import make_classification
+from sklearn.cluster import MiniBatchKMeans
+from matplotlib import pyplot
+
+X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
+                           n_redundant=0, n_clusters_per_class=1, random_state=10)
+# 3 clusters
+m = MiniBatchKMeans(n_clusters=3) 
+# fit the model
+m.fit(X)
+# predict the cluster for each data point
+p = m.predict(X) 
+# unique clusters
+cl = np.unique(p)
+# plot the data points and cluster centers
+for c in cl:
+    r = np.where(c == p)
+    pyplot.title('Mini Batch K-means')
+    pyplot.scatter(X[r, 0], X[r, 1])
+# show the plot
+pyplot.show()
diff --git a/machine-learning/clustering-algorithms/optics.py b/machine-learning/clustering-algorithms/optics.py
@@ -0,0 +1,21 @@
+import numpy as np
+from sklearn.datasets import make_classification
+from sklearn.cluster import OPTICS
+from matplotlib import pyplot
+
+X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
+                           n_redundant=0, n_clusters_per_class=1, random_state=10)
+
+# init the model
+m = OPTICS(eps=0.5, min_samples=10)
+# predict the cluster for each data point after fitting the model
+p = m.fit_predict(X)
+# unique clusters
+cl = np.unique(p)
+# plot the data points and cluster centers
+for c in cl:
+    r = np.where(c == p)
+    pyplot.title('OPTICS Clustering')
+    pyplot.scatter(X[r, 0], X[r, 1])
+# show the plot
+pyplot.show()
diff --git a/machine-learning/clustering-algorithms/requirements.txt b/machine-learning/clustering-algorithms/requirements.txt
@@ -0,0 +1,3 @@
+scikit-learn
+numpy
+matplotlib
diff --git a/machine-learning/clustering-algorithms/spectral_clustering.py b/machine-learning/clustering-algorithms/spectral_clustering.py
@@ -0,0 +1,20 @@
+import numpy as np
+from sklearn.datasets import make_classification
+from sklearn.cluster import SpectralClustering
+from matplotlib import pyplot
+
+X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
+                           n_redundant=0, n_clusters_per_class=1, random_state=10)
+# init the model with 3 clusters
+m = SpectralClustering(n_clusters=3)
+# predict the cluster for each data point after fitting the model
+p = m.fit_predict(X)
+# unique clusters
+cl = np.unique(p)
+# plot the data points and cluster centers
+for c in cl:
+    r = np.where(c == p)
+    pyplot.title('Spectral Clustering')
+    pyplot.scatter(X[r, 0], X[r, 1])
+# show the plot
+pyplot.show()
diff --git a/machine-learning/clustering-algorithms/time_diff_minibatch_and_kmeans.py b/machine-learning/clustering-algorithms/time_diff_minibatch_and_kmeans.py
@@ -0,0 +1,26 @@
+import numpy as np
+from sklearn.datasets import make_classification
+from sklearn.cluster import MiniBatchKMeans
+from sklearn.cluster import KMeans
+from matplotlib import pyplot
+import timeit
+
+X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
+                           n_redundant=0, n_clusters_per_class=1, random_state=10)
+# start timer for Mini Batch K-Means
+t1_mkm = timeit.default_timer() 
+m = MiniBatchKMeans(n_clusters=2)
+m.fit(X)
+p = m.predict(X)
+# stop timer for Mini Batch K-Means
+t2_mkm = timeit.default_timer()
+# start timer for K-Means
+t1_km = timeit.default_timer()
+m = KMeans(n_clusters=2)
+m.fit(X)
+p = m.predict(X)
+# stop timer for K-Means
+t2_km = timeit.default_timer()
+# print time difference
+print("Time difference between Mini Batch K-Means and K-Means = ",
+      (t2_km-t1_km)-(t2_mkm-t1_mkm))