In [16]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.cluster import KMeans as skKMeans

In [17]:
class KMeans:
    """KMeans algorithm for cluster
    """
    def __init__(self, n_clusters=8, 
                 max_iter=300, tol=1e-4, random_state=0):
        self.n_clusters = n_clusters
        self.max_iter = max_iter
        self.tol = tol
        self.random_state = random_state
        
    def _inertia(self, X, centroids):
        """Computing the inertia(within-cluster sum-of-squares
        criterion) given the centroids
        
        Parameters:
        ----------
        X: array-like of shape (n_samples, n_features)
        
        centroids: array-like of shape (n_clusters, n_features)
        
        Returns:
        --------
        new_centroids: array-like of shape (n_clusters, n_features)
        
        inertia: float
        """
        dist = np.square(euclidean_distances(X, centroids))
        labels = np.argmin(dist, axis=1)
        inertia = np.sum(np.amin(dist, axis=1))
        new_centroids = np.zeros((self.n_clusters, X.shape[1]))
        for i in range(self.n_clusters):
            X_c = X[labels==i]
            new_centroids[i] = np.mean(X_c, axis=0)
        
        return new_centroids, labels, inertia
        
        
    def fit(self, X):
        """Fit to the data
        Parameters:
        -----------
        X: array-like of shape (n_samples, n_features)
            Input data
        """
        rng = np.random.RandomState(self.random_state)
        centroids = X[rng.permutation(X.shape[0])[:self.n_clusters]]
        for i in range(self.max_iter):
            old_centroids = centroids.copy()
            centroids, labels, inertia = self._inertia(X, centroids)
            centroids_shift = np.sum(np.square(centroids - old_centroids))
            if centroids_shift < self.tol:
                break
        self.labels_ = labels
        self.cluster_centers_ = centroids
        self.inertia_ = inertia
        self.n_iter = i + 1
        return self
    
    def predict(self, X):
        return self._inertia(X, self.cluster_centers_)[1]

In [18]:
X = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]])
kmeans = KMeans(n_clusters=2, random_state=0).fit(X)

In [19]:
kmeans.labels_

array([1, 1, 1, 0, 0, 0], dtype=int64)

In [20]:
kmeans.cluster_centers_

array([[10.,  2.],
       [ 1.,  2.]])

In [23]:
kmeans.predict(np.array([[0, 0], [12, 3]]))

array([1, 0], dtype=int64)