In [4]:
import numpy as np
import scipy.spatial.distance as distance

def k_init_fixed_v3(X, n_clusters):
    """k-means++
    
    Parameters
    ----------
    X: array or sparse matrix, shape (n_samples, n_features)
        The data.
    n_clusters : integer
        The number of clusters
    
    Returns
    -------
    centers : array of shape (n_clusters, n_features)
        Initial centers for k-means
    """
    
    np.random.seed(12345)
    n_samples, n_features = X.shape
    
    centers = np.empty((n_clusters, n_features), dtype=X.dtype)
    center_id = np.random.randint(n_samples)
    centers[0] = X[center_id]

    closest_dist_sq = distance.cdist(X, centers[0:1], metric="sqeuclidean").min(axis=1)
    current_pot = closest_dist_sq.sum()

    for c in range(1, n_clusters):
        rand_val = np.random.random_sample(1) * current_pot
        center_id = np.searchsorted(np.cumsum(closest_dist_sq), rand_val)[0]
        centers[c] = X[center_id]

        new_dist_sq = distance.cdist(X, centers[c:c+1], metric="sqeuclidean").flatten()
        closest_dist_sq = np.minimum(closest_dist_sq, new_dist_sq)
        current_pot = closest_dist_sq.sum()

    return centers

# Test data
X = np.array([
    [0, 0], [1, 0], [-1, 0],
    [0, 1], [0, -1], [-1, 1], [-1, -1],
    [0, 0.5], [0.5, 0.5], [0.5, 0],
    [1,2],[-1,2],[-2,-2],
    [-3, 3], [-2, 0], [2, -2]
])

# Testing the fixed function
centers_fixed_v3 = k_init_fixed_v3(X, n_clusters=3)
print(centers_fixed_v3)


[[-1.  0.]
 [ 2. -2.]
 [ 0.  1.]]
