# 02 - Clustering & Metrics

In [None]:

import scanpy as sc, pandas as pd, numpy as np, matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score, silhouette_score
adata = sc.read_h5ad("results/preprocessed.h5ad")
sc.tl.louvain(adata, resolution=0.6, key_added='louvain_r06')
sc.tl.leiden(adata, resolution=0.6, key_added='leiden_r06')
X = adata.obsm['X_pca'][:, :30]
k = len(adata.obs['leiden_r06'].unique())
km = KMeans(n_clusters=k, n_init=25, random_state=0)
adata.obs['kmeans_k_leiden'] = km.fit_predict(X).astype(str)
sc.pl.umap(adata, color=['louvain_r06','leiden_r06','kmeans_k_leiden'], ncols=3, show=False)
plt.savefig("results/figures/umap_clusters.png", bbox_inches="tight")
ari_ll = adjusted_rand_score(adata.obs['louvain_r06'], adata.obs['leiden_r06'])
sil_louv = silhouette_score(X, adata.obs['louvain_r06'].astype(int))
sil_leid = silhouette_score(X, adata.obs['leiden_r06'].astype(int))
sil_km = silhouette_score(X, adata.obs['kmeans_k_leiden'].astype(int))
pd.DataFrame({'metric':['ARI(Louvain,Leiden)','Silhouette(Louvain)','Silhouette(Leiden)','Silhouette(k-means)'],'value':[ari_ll,sil_louv,sil_leid,sil_km]}).to_csv("results/clustering_metrics.csv", index=False)
sc.pl.umap(adata, color=['leiden_r06'], legend_loc='on data', show=False)
plt.savefig("results/figures/umap_leiden_labels.png", bbox_inches="tight")
print("ok")
