In [1]:
import numpy as np
import pandas as pd
import os
import sys
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import seaborn as sns
import anndata as ad
import scanpy as sc
import squidpy as sq
import spatialdata as sd
import spatialdata_io as sdio
import spatialdata_plot

from joblib import Parallel, delayed

from spatialdata.transformations import (
        Affine,
        Identity,
        MapAxis,
        Scale,
        Sequence,
        Translation,
        get_transformation,
        get_transformation_between_coordinate_systems,
        set_transformation,
    )

prjdir = os.path.abspath(os.path.join(os.getcwd(), '../..'))
if prjdir not in sys.path:
    sys.path.append(prjdir)

n_jobs=32
sc.settings.n_jobs=n_jobs
sc.set_figure_params(dpi=100, frameon=True, vector_friendly=True, fontsize=10)

from matplotlib.colors import LinearSegmentedColormap
cmap = LinearSegmentedColormap.from_list('grey_to_blue', ['lightgrey', 'mediumblue'])

from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

In [2]:
%load_ext autoreload
%autoreload 2
import src.spatial_helpers.spatial as spp
import src.spatial_helpers.spatialplot as spl
import src.spatial_helpers.sc as scp
scp.set_all_seeds()

In [3]:
datadir = '../../data/xenium_merged'
resultsdir = '../../data/xenium_results'
ncdir = os.path.join(datadir, 'nichecompass')
os.makedirs(resultsdir, exist_ok=True)
sc.settings.figdir = os.path.join(resultsdir)

## Niche analysis

In [None]:
# load nichecompass results
adata = sc.read_h5ad(os.path.join(ncdir, 'delaunayr50', 'model', 'adata.h5ad'))

In [None]:
# load annotations
adata_anno = sc.read_h5ad(os.path.join(datadir, 'crc_ffpe_anno.h5ad'))
adata.obs = adata.obs.merge(adata_anno.obs.loc[:, ~adata_anno.obs.columns.isin(adata.obs.columns)], left_index=True, right_index=True, how='left')
adata.uns['colors'] = adata_anno.uns['colors'].copy()

In [None]:
sc.pl.embedding(adata, basis='delaunayr50', color=['name', nc_key, 'tissue_region', 'celltype'], show=False)
plt.savefig(os.path.join(resultsdir, 'nc_latent.jpg'), dpi=300, bbox_inches='tight')

In [None]:
nc_key = 'delaunayr50_nb_leiden_0.15'
adata.obs[nc_key] = adata.obs[nc_key].astype(str)
adata.obs['CN'] = adata.obs[nc_key].astype('int') + 1
adata.obs['CN'] = 'CN' + adata.obs['CN'].astype('str')

In [None]:
cn_colors = {
    'CN1': '#44a685',
    'CN2': '#7EC8E3',
    'CN3': '#b86c8c',
    'CN4': '#79b9f7',
    'CN5': '#6495ED',
    'CN6': '#4e6bb5',
    'CN7': '#5096e6',
    'CN8': '#3650ad',
    'CN9': '#ffd000',
    'CN10': '#4169E1',
    'CN11': '#1C39BB'
}

In [None]:
colors = spl.get_obs_colors(adata, obs_key='celltype', colors_key='colors')
scp.plot_composition(adata.obs, 'CN', 'celltype', colors=colors, fontsize=18, width=12, height=6, rel=False, save=os.path.join(resultsdir, 'CN_composition_abs.jpg'))
scp.plot_composition(adata.obs, 'CN', 'celltype', colors=colors, fontsize=18, width=12, height=6, rel=True, save=os.path.join(resultsdir, 'CN_composition.pdf'))

### Niche marker genes

In [None]:
# CN markers across all celltypes
sc.tl.rank_genes_groups(adata, key_added='CN_rank', groupby='CN', method='wilcoxon')
sc.pl.rank_genes_groups_dotplot(adata, key='CN_rank', n_genes=10, layer='norm', groups=['CN9'], swap_axes=True, min_logfoldchange=0.5, dendrogram=False, save='CN_markers.png')

In [None]:
# CN - for each celltype
for ct in adata.obs['celltype'].unique():
    adata_sub = adata[(adata.obs['celltype']==ct),:].copy()
    sc.tl.rank_genes_groups(adata_sub, key_added='CN_rank', groupby='CN', method='wilcoxon')
    sc.pl.rank_genes_groups_dotplot(adata_sub, key='CN_rank', n_genes=6, layer='norm', min_logfoldchange=0.5, dendrogram=False, save='CN_markers_'+ct+'.png')

In [None]:
# Fibroblasts
adata_fibro = adata[(adata.obs['celltype']=='Fibroblast'),:].copy()
test_genes = spp.filter_bg_genes(adata, list(adata.var.index), target_celltype='Fibroblast', expr_thres=0.2, corr_thres=0.6)
adata_fibro = adata_fibro[:,test_genes]
sc.tl.rank_genes_groups(adata_fibro, layer='norm', key_added='CN_fibro_rank', groupby='CN', method='wilcoxon')
# adata_fibro.write_h5ad(os.path.join(datadir, 'adata_fibro.h5ad'))

In [None]:
# adata_fibro = sc.read_h5ad(os.path.join(datadir, 'adata_fibro.h5ad'))
fibro_genes = scp.marker_dotplot(adata_fibro, group_by='CN', group='CN9', rank_key='CN_fibro_rank', save=os.path.join(resultsdir, 'crca_fibro_niches.png'))
fibro_genes = scp.marker_dotplot(adata_fibro, group_by='CN', group='CN9', rank_key='CN_fibro_rank', save=os.path.join(resultsdir, 'crca_fibro_niches.pdf'))

In [None]:
# Cancer cells
adata_cancer = adata[(adata.obs['celltype']=='Cancer cell'),:].copy()
test_genes = spp.filter_bg_genes(adata, list(adata.var.index), target_celltype='Cancer cell', expr_thres=0.2, corr_thres=0.6)
adata_cancer = adata_cancer[:,test_genes]
sc.tl.rank_genes_groups(adata_cancer, layer='norm', key_added='CN_cancer_rank', groupby='CN', method='wilcoxon')
# adata_cancer.write_h5ad(os.path.join(datadir, 'adata_cancer.h5ad'))

In [None]:
# adata_cancer = sc.read_h5ad(os.path.join(datadir, 'adata_cancer.h5ad'))
cancer_genes = scp.marker_dotplot(adata_cancer, group_by='CN', group='CN9', rank_key='CN_cancer_rank', save=os.path.join(resultsdir, 'crca_cancer_niches.png'))
cancer_genes = scp.marker_dotplot(adata_cancer, group_by='CN', group='CN9', rank_key='CN_cancer_rank', save=os.path.join(resultsdir, 'crca_cancer_niches.pdf'))

In [None]:
# T cells
adata_tcells = adata[(adata.obs['celltype']=='T cell') & (adata.obs['CN']!='CN11'),:].copy() # CN11 too few cells
test_genes = spp.filter_bg_genes(adata, list(adata.var.index), target_celltype='T cell', expr_thres=0.2, corr_thres=0.6)
adata_tcells = adata_tcells[:,test_genes]
sc.tl.rank_genes_groups(adata_tcells, layer='norm', key_added='CN_tcells_rank', groupby='CN', method='wilcoxon')
# adata_tcells.write_h5ad(os.path.join(datadir, 'adata_tcells.h5ad'))

In [None]:
# adata_tcells = sc.read_h5ad(os.path.join(datadir, 'adata_tcells.h5ad'))
tcell_genes = scp.marker_dotplot(adata_tcells, group_by='CN', group='CN9', rank_key='CN_tcells_rank', save=os.path.join(resultsdir, 'crca_tcell_niches.png'))
tcell_genes = scp.marker_dotplot(adata_tcells, group_by='CN', group='CN9', rank_key='CN_tcells_rank', save=os.path.join(resultsdir, 'crca_tcell_niches.pdf'))

In [None]:
# Expression of these genes in other celltypes of the same niche
group='CN9'
genes= list(set(list(cancer_genes) + list(fibro_genes) + list(tcell_genes) ))
adata_cn = adata[(adata.obs['CN']==group),:].copy()
font_size = 18
with mpl.rc_context({'font.size': font_size, 
                     'axes.labelsize': font_size,
                     'axes.titlesize': font_size,
                     'xtick.labelsize': font_size,
                     'ytick.labelsize': font_size,
                     'legend.fontsize': font_size,
                    }):
    plt.figure(figsize=(12, 10))
    fig = sc.pl.dotplot(adata_cn, layer='norm', var_names=genes, groupby='celltype', dendrogram=False, swap_axes=True, return_fig=True)
    fig.legend(colorbar_title='Expression', size_title='% of cells ', width=2.5)
    fig.savefig(os.path.join(resultsdir, 'crca_neutro_niche_celltype.jpg'), bbox_inches='tight', dpi=300)

In [None]:
adata.write_h5ad(os.path.join(datadir, 'crc_ffpe_niches.h5ad'))

## Cluster analysis

In [None]:
nc_key = 'delaunayr50_nb_leiden_0.15'
neighbor_key='delaunayr50_connectivities'
adata.obsp['delaunayr50_connectivities'].sum(axis=1).max()

In [None]:
celltype_neighbors_df = spp.get_neighbors(adata, obs_key='celltype', neighbors_key='delaunayr50_connectivities', n_jobs=n_jobs)

In [None]:
# get cells that have neutrophils as neighbors
adata.obs['neutrophil_neighbors'] = celltype_neighbors_df['Neutrophil'].values.copy()

In [None]:
# find aggregates
adata = spp.find_aggregates(adata, celltype='Neutrophil', n_neighbors=3, max_iterations=10, n_neighbors_key='neutrophil_neighbors',
                            aggr_key='neutrophil_aggregate', neighbors_key=neighbor_key)
adata.obs['neutrophil_aggregate_seed'].value_counts()
adata.obs['neutrophil_aggregate'].value_counts()

In [None]:
adata.obs['Neutrophil'] = 'other celltype'
adata.obs.loc[adata.obs['celltype'] == 'Neutrophil', 'Neutrophil'] = 'dispersed'
adata.obs.loc[(adata.obs['celltype'] == 'Neutrophil') & (adata.obs['neutrophil_aggregate'] == True), 'Neutrophil'] = 'clustered'
adata.obs['Neutrophil'].value_counts()

In [None]:
cluster_colors = {
    'clustered': '#7d7ddb',
    'dispersed': '#b8b8de',
    'other celltype': '#e0e0e0'
}

In [None]:
# plot cluster distribution
fontsize = 18
group_key = 'CN'
composition_key = 'Neutrophil'
df = adata[adata.obs['Neutrophil'] != 'other celltype'].obs
dfplot = (df.groupby([group_key, composition_key], observed=True).size().unstack())
dfplot = dfplot.div(dfplot.sum(axis=1), axis=0)
color_mapping = {category: cluster_colors.get(category, '#ECECEC') for category in dfplot.columns}
plotcolors = [color_mapping.get(col, '#ECECEC') for col in dfplot.columns]

with mpl.rc_context({'font.size': fontsize, 
                     'axes.labelsize': fontsize,
                     'axes.titlesize': fontsize,
                     'xtick.labelsize': fontsize,
                     'ytick.labelsize': fontsize,
                     'legend.fontsize': fontsize,
                    }):
    fig, ax = plt.subplots(1, 1, figsize=(8, 5))
    dfplot.plot(kind="bar", stacked=True, ax=ax, width=0.7, color=plotcolors)
    legend = ax.legend(bbox_to_anchor=(1, 1), labelspacing=0.15, frameon=False, loc="upper left")
    ax.set_xlabel("")
    ax.set_ylabel("")
    ax.grid(False)
    ax.set_ylim(0, 1)
    ax.tick_params(axis='both', which='major')
    plt.tight_layout()
    plt.savefig(os.path.join(resultsdir, 'Neutrophil_cluster_composition.jpg'), bbox_inches='tight', pad_inches=0, dpi=500)
    plt.savefig(os.path.join(resultsdir, 'Neutrophil_cluster_composition.pdf'), bbox_inches='tight', pad_inches=0, dpi=500)

In [None]:
plt.figure(figsize=(8, 6))
for i, (cell_type, color) in enumerate(cn_colors.items()):
    plt.barh(i, 1, color=color)
    plt.text(1.05, i, cell_type, va='center', ha='left', fontsize=12)
plt.axis('off')
plt.show()

In [None]:
adata.obs['Neutrophil_cluster_niche'] = 'other celltype'
adata.obs.loc[(adata.obs['celltype'] == 'Neutrophil'), 'Neutrophil_cluster_niche'] = 'other Neutrophil'
adata.obs.loc[(adata.obs['Neutrophil'] == 'in cluster') & (adata.obs['CN'] == 'CN9'), 'Neutrophil_cluster_niche'] = 'Neutrophil in cluster & niche'
adata.obs['Neutrophil_cluster_niche'].value_counts()

In [None]:
# add plotting colors
cluster_colors = {
    'in cluster': '#ffbb00',
    'not in cluster': '#4d4df0',
    'other celltype': '#ECECEC'
}

CD74_colors = {
    'Neutrophil_CD74-lo': '#4d4df0',
    'Neutrophil_CD74-hi': '#ffbb00',
    'other celltype': '#ECECEC'
}

neutro_colors = {
    'Neutrophil in cluster & niche': '#ffbb00',
    'other Neutrophil': '#4d4df0',
    'other celltype': '#ECECEC'
}

adata.uns['colors'] = {'celltype': adata.uns['colors']['celltype'],
                       'CN': cn_colors,
                       'Neutrophil': cluster_colors,
                       'Neutrophil_CD74': CD74_colors,
                       'Neutrophil_cluster_niche': neutro_colors}

In [None]:
# save
adata.write_h5ad(os.path.join(datadir, 'crc_ffpe_niches.h5ad'))

In [None]:
sdata = sd.read_zarr(os.path.join(datadir, 'crca_xenium.zarr'))
sdata['niches'] = adata
sdata = spp.match_ids(sdata, ['cell_boundaries'], table_key='anno')
sdata.tables['niches'].obs['region'] = 'cell_boundaries'
sdata.set_table_annotates_spatialelement('niches', region_key='region', region='cell_boundaries')
sdata.delete_element_from_disk('niches')
sdata.write_element('niches', overwrite=True)