In [None]:
from sklearn import metrics
import numpy as np
from my_ml_package.clustering.metrics import silhouette_score, calculate_distortion, calculate_purity

In [11]:
y =          ['A',  'B', 'B', 'B', 'A',]
y_pred =     [ 1,    2,   2,   2,   2, ]

# Adults (A): Average height is around 170 cm to 180 cm, and weight is around 70 kg to 80 kg.
# Babies (B): Average height might be around 50 cm to 70 cm (considering infants to toddlers), and weight could be around 3 kg to 10 kg.
X = np.array([
    [175, 75],  # Adult
    [60, 5],    # Baby
    [50, 4],    # Baby
    [70, 7],    # Baby
    [180, 80]   # Adult
])



## Purity 
* A supervised metric
* Similar to accuracy
* How to match the ground-truth labels and cluster labels?

In [12]:
purity_score = calculate_purity(y, y_pred)
print(f"Purity Score: {purity_score}")

[[1 1]
 [0 3]]
Purity Score: 0.8


## Silhouette Coefficient
The silhouette_score gives the average value for all the samples. This gives a perspective into the density and separation of the formed clusters

* a: The mean distance between a data point and all other points in the same cluster.
* b: The mean distance between a data point and all other points in **the next nearest cluster**.
* The silhouette score $S$ for a single data point is then calculated using the formula:
$$S=\frac{b-a}{\max (a, b)}$$

Questions:
* How to identify **the next neareat cluster**?
* What does it measure?
  * cohesion (how close points are to other points in the same cluster) and 
  * separation (how distinct or well-separated a cluster is from other clusters)
* What do 1, 0, -1 mean?

In [9]:
print(f'Mean Silhouette Score: {silhouette_score(X, y_pred)}')
# print(f'Mean Silhouette Score:{metrics.silhouette_score(X, labels)}')

Mean Silhouette Score: 0.8088985194499744


In [None]:
print(f"Adjusted Rand index for K-Means performance: {metrics.adjusted_rand_score(y, y_pred)}")
# Mutual Information
print(f"Normalized mutual information for K-Means performance: {metrics.normalized_mutual_info_score(y, y_pred)}")