# scCODEX preview - Clustering

In [None]:
# inspect which markers work, and drop the ones that did not work from the clustering step
# make an anndata to be compatiable with the downstream clustering step
adata = hf_makeAnndata(df_nn = df_nn,
                       col_sum = 41, # this is the column index that has the last protein feature
                       nonFuncAb_list = ['GranzymeB', 'Tbet','PD-L1', 'PD1', 'IDO-I', 'ICOS'] # remove the antibodies that are not working
                      )
adata

In [None]:
# no need to reassign the adata
tl_clustering(adata, 
              clustering='leiden', # can choose louvian
              n_neighbors=10,
              res = 1,
              reclustering = False # if true, no computing the neighbors
             ) 
adata

In [None]:
# visualization of clustering with UMAP
sc.pl.umap(adata, color = 'leiden_1') 

In [None]:
#reclustering with different resolution
tl_clustering(adata, 
              clustering='leiden', # can choose Louvain
              n_neighbors=10,
              res = 0.4,
              reclustering = True # if true, no recomputing the neighbors
             )
sc.pl.umap(adata, color = 'leiden_0.4', legend_loc="on data")

In [None]:
#look at the marker gene expression
sc.pl.dotplot(adata, adata.var.index.to_list(), 'leiden_0.4')

In [None]:
# Plot data using its original spatial coordinates
df_nn['leiden_0.4'] = adata.obs['leiden_0.4']
ax = sns.scatterplot(x='x', y='y', data=df_nn, hue='leiden_0.4', s=1, alpha=0.5)
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0)
ax.invert_yaxis()

In [None]:
old_to_new = {
    '0': 'Epithelial',
    '1': 'Stromal/blood vessels',
    '2': 'Stroma', 
    '3': 'Blood vessels',
    '4': 'Proliferating stroma',
    '5': 'M2 macrophage',
    '6': 'Early exhausted CD8 T cells',
    '7': "Overexposed junk",
    '8': 'monocyte/macrophage?',
    '9': 'Plasma cells',
    '10': 'Lymphatics',
    '11': 'Treg',
    '12': 'CD4+ (memory) T cells',
    '13': "DC",
    '14': 'Epithelial',
    '15': 'Neutrophil'
}
adata.obs['celltype'] = (
    adata.obs['leiden_0.4']
    .map(old_to_new)
    .astype('category')
)

In [None]:
df_nn['celltype'] = adata.obs['celltype'].values

In [None]:
pl_mono_cluster_spatial(df=df_nn, sample_col='region_num', cluster_col='leiden_0.4', figsize=(20, 20))

## Sub-clustering

In [None]:
# subclustering cluster 0
sc.tl.leiden(adata, restrict_to=('leiden_0.4',['0']), resolution=0.13, key_added='leiden_0.4_subcluster_0')
sc.pl.umap(adata, color = 'leiden_0.4_subcluster_0')

In [None]:
sc.pl.dotplot(adata, adata.var.index.to_list(), 'leiden_0.4_subcluster_0')

## Save anndata

In [None]:
adata.write(output_dir+"NBT_230223_70129_adata.h5ad")