In [ ]:
import subprocess,sys
subprocess.check_call([sys.executable,"-m","pip","install","-q","numpy==1.26.4","pandas==2.1.4","scipy==1.11.4","scanpy==1.10.2","anndata==0.10.8","umap-learn==0.5.5","pynndescent==0.5.12","scikit-learn==1.3.2","matplotlib==3.8.4","python-igraph==0.11.5","leidenalg==0.10.1","louvain==0.8.2"])

In [ ]:
from google.colab import files
u=files.upload()
import os
h=[k for k in u.keys() if k.endswith('.h5ad')]
data_path=h[0] if h else None
print(data_path)

In [ ]:
import scanpy as sc, numpy as np, pandas as pd
import matplotlib.pyplot as plt
import pathlib
pathlib.Path('figures').mkdir(exist_ok=True)
adata=sc.read_h5ad(data_path)
sc.pp.filter_cells(adata,min_genes=200)
sc.pp.filter_genes(adata,min_cells=3)
if 'mt' not in adata.var.columns:
    adata.var['mt']=adata.var_names.str.upper().str.startswith(('MT-','MT.'))
sc.pp.calculate_qc_metrics(adata,qc_vars=['mt'],inplace=True)
sc.pp.normalize_total(adata,target_sum=1e4)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata,n_top_genes=3000,flavor='seurat')
adata=adata[:,adata.var.highly_variable].copy()
sc.pp.scale(adata,max_value=10)
sc.tl.pca(adata,svd_solver='arpack')
sc.pp.neighbors(adata,n_neighbors=15,n_pcs=30)
sc.tl.umap(adata)
sc.tl.louvain(adata,resolution=0.6,key_added='louvain_r06')
sc.tl.leiden(adata,resolution=0.6,key_added='leiden_r06',flavor='igraph',n_iterations=2,directed=False)
import sklearn.cluster as skc
X=adata.obsm['X_pca'][:,:30]
k=len(adata.obs['leiden_r06'].unique())
km=skc.KMeans(n_clusters=max(k,2),n_init=25,random_state=0)
adata.obs['kmeans_k_leiden']=km.fit_predict(X).astype(str)
sc.pl.umap(adata,color=['louvain_r06','leiden_r06','kmeans_k_leiden'],ncols=3)
plt.savefig('umap_clusters.png',bbox_inches='tight')
plt.close()

In [ ]:
skin=adata[adata.obs['tissue']=='Skin'].copy() if 'tissue' in adata.obs.columns and 'Skin' in set(adata.obs['tissue']) else adata.copy()
if 'X_pca' not in skin.obsm:
    import scanpy as sc
    sc.pp.scale(skin,max_value=10)
    sc.tl.pca(skin,svd_solver='arpack')
sc.pp.neighbors(skin,n_neighbors=15,n_pcs=min(30,skin.obsm['X_pca'].shape[1] if 'X_pca' in skin.obsm else 30))
sc.tl.umap(skin)
sc.tl.leiden(skin,resolution=0.8,key_added='leiden_skin',flavor='igraph',n_iterations=2,directed=False)
panel=['fgf8','wnt5b','wnt3a','sox2','sox10','ctgfa','mki67']
g=[x for x in panel if x in skin.var_names]
import numpy as np
if len(g)>0:
    sc.pp.scale(skin,max_value=10,copy=False)
    s=np.asarray(skin[:,g].X.mean(axis=1)).ravel()
    skin.obs['roc_score']=s
    tgt=skin.obs.groupby('leiden_skin')['roc_score'].mean().idxmax()
else:
    tgt=skin.obs['leiden_skin'].value_counts().idxmax()
sc.tl.rank_genes_groups(skin,'leiden_skin',method='logreg',key_added='rank_logreg')
sc.tl.rank_genes_groups(skin,'leiden_skin',method='wilcoxon',key_added='rank_wilcoxon')
import pandas as pd
names1=pd.DataFrame(skin.uns['rank_logreg']['names'])[tgt][:12].tolist()
names2=pd.DataFrame(skin.uns['rank_wilcoxon']['names'])[tgt][:12].tolist()
cons=[x for x in names1 if x in names2][:12]
sc.pl.dotplot(skin,cons if len(cons)>0 else names1,groupby='leiden_skin')
import matplotlib.pyplot as plt
plt.savefig('roc_consensus.png',bbox_inches='tight')
plt.close()

In [ ]:
from google.colab import files
files.download('umap_clusters.png')
files.download('roc_consensus.png')