# Visualization

2024-09-29

In [None]:
# load libraries
import os
import numpy as np
import pandas as pd
import seaborn as sns
import scanpy as sc
import squidpy as sq
import anndata as ad
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from tqdm.notebook import tqdm

from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score

In [None]:
sc.settings.figdir = fig_path
sc.set_figure_params(format='pdf', dpi=150)

## Input

In [None]:
# define IO path
base_path = './path/to/dataset'
output_path = os.path.join(base_path, 'output')
expr_path = os.path.join(base_path, 'expr')
fig_path = os.path.join(base_path, "figures")
if not os.path.exists(fig_path):
    os.mkdir(fig_path)

In [None]:
# load datasets
raw_cdata = sc.read_h5ad(os.path.join(expr_path, f'combined-raw.h5ad'))
cdata = sc.read_h5ad(os.path.join(expr_path, f'2024-09-29-combined-level3-bk.h5ad'))
rdata = sc.read_h5ad(os.path.join(expr_path, f'2024-09-29-combined-level3-region23-bk.h5ad'))

In [None]:
# create unique index
raw_cdata.obs['unique_index'] = raw_cdata.obs['sample'].astype(str) + '_' + raw_cdata.obs['fov_id'].astype(str) + '_' +  raw_cdata.obs['seg_label'].astype(str)
raw_cdata.obs.index = raw_cdata.obs['unique_index']

In [None]:
# subset raw data
sc.pp.calculate_qc_metrics(raw_cdata, inplace=True, percent_top=None)
raw_cdata = raw_cdata[rdata.obs.index, ]
raw_cdata.layers['raw'] = raw_cdata.X.copy()
raw_cdata

## Create color palettes

In [None]:
plt.style.use('default')

In [None]:
# reorder categories
level_1_order = ['T cells', 'B cells', 'Macrophages', 'Dendritic cells', 'NA']
cdata.obs['level_1'] = cdata.obs['level_1'].astype('category')
cdata.obs['level_1'] = cdata.obs['level_1'].cat.reorder_categories(level_1_order)

rdata.obs['level_1'] = rdata.obs['level_1'].astype('category')
rdata.obs['level_1'] = rdata.obs['level_1'].cat.reorder_categories(level_1_order)

In [None]:
# level 1
level_1_pl = sns.color_palette(['#1a5fd6', '#FBB040', '#00A651', '#92278F', '#dbdbdb'])
b_cells_pl = sns.color_palette(['#FBB040'])
level_1_cmap = ListedColormap(level_1_pl.as_hex())
sns.palplot(level_1_pl)
plt.xticks(range(len(level_1_order)), level_1_order, size=5, rotation=45)
plt.tight_layout()
# plt.savefig(os.path.join(fig_path, 'level_1_palette.pdf'))
plt.show()

In [None]:
# level 2 t cells
level_2_t_cells_order = ['CD4+ T cells', 'CD8+ T cells', 'Synthetic T cells', 'T cells']
level_2_t_cells_pl = sns.color_palette(['#42cf13', '#2a73bd', '#e45ff5', '#ad7307'])
level_2_t_cells_cmap = ListedColormap(level_2_t_cells_pl.as_hex())
sns.palplot(level_2_t_cells_pl)
plt.xticks(range(len(level_2_t_cells_order)), level_2_t_cells_order, size=5, rotation=45)
plt.tight_layout()
# plt.savefig(os.path.join(fig_path, 'level_2_t_cells_palette.pdf'))
plt.show()

In [None]:
# level 2 dendritic cells
level_2_dc_order = ['cDC1', 'cDC2', 'Other Dendritic cells', ]
level_2_dc_pl = sns.color_palette(['#fa6e52', '#4164a6', '#b079f2'])
level_2_dc_cmap = ListedColormap(level_2_dc_pl.as_hex())
sns.palplot(level_2_dc_pl)
plt.xticks(range(len(level_2_dc_order)), level_2_dc_order, size=5, rotation=45)
plt.tight_layout()
plt.savefig(os.path.join(fig_path, 'level_2_dc_palette.pdf'))
plt.show()

In [None]:
# level 2 macrophage
level_2_mc_order = ['Macrophages', 'Activated Macrophages', 'Monocytes']
level_2_mc_pl = sns.color_palette(['#8120f5', '#e0416c', '#13ede6'])
level_2_mc_cmap = ListedColormap(level_2_mc_pl.as_hex())
sns.palplot(level_2_mc_pl)
plt.xticks(range(len(level_2_mc_order)), level_2_mc_order, size=3, rotation=45)
plt.tight_layout()
plt.savefig(os.path.join(fig_path, 'level_2_mc_palette.pdf'))
plt.show()

In [None]:
# region
region_pl = sns.color_palette('tab10', 4)[2:]

## Spatial map

In [None]:
def plot_regional_sct(cdata, rdata, sample, reigon_bd, base_level, type_list, palette, file_name):
    sample_cdata = cdata[cdata.obs['sample'] == sample, ]
    sample_rdata = rdata[rdata.obs['sample'] == sample, ]

    spot_size = 15
    sample_c_pdata = sample_cdata[sample_cdata.obs['global_x'].isin(range(reigon_bd[0], reigon_bd[1])) & sample_cdata.obs['global_y'].isin(range(reigon_bd[2], reigon_bd[3])), ]
    sample_r_pdata = sample_rdata[sample_rdata.obs['global_x'].isin(range(reigon_bd[0], reigon_bd[1])) & sample_rdata.obs['global_y'].isin(range(reigon_bd[2], reigon_bd[3])), ]

    current_complete_obs = sample_c_pdata.obs.copy()
    current_region_obs = sample_r_pdata.obs.copy()
    if type_list:
        current_region_obs = current_region_obs.loc[current_region_obs[base_level].isin(type_list), :]
        current_region_obs[base_level] = current_region_obs[base_level].astype(object)
        current_region_obs[base_level] = current_region_obs[base_level].astype('category')
        # print(current_region_obs[base_level].unique())
        current_region_obs[base_level] = current_region_obs[base_level].cat.reorder_categories(type_list)
    
    fig, ax = plt.subplots(figsize=(5,5))
    sns.scatterplot(x='global_x', y='global_y', data=current_complete_obs, color='#dbdbdb', s=spot_size, linewidth=0, ax=ax, rasterized=True)
    sns.scatterplot(x='global_x', y='global_y', hue=base_level, data=current_region_obs, palette=palette, s=spot_size, linewidth=0, legend=False, ax=ax, rasterized=False, alpha=.8)
    plt.gca().set_aspect('equal')
    plt.axis('off')
    plt.tight_layout()
    plt.savefig(os.path.join(fig_path, file_name), dpi=300)
    plt.show()

In [None]:
# define sample boundary 
sample1_bd = [8000, 12000, 9500, 13750]
sample3_bd = [23500, 27500, 15000, 19000]
sample6_bd = [16250, 19500, 4500, 8400]

In [None]:
# level1 
plot_regional_sct(cdata, rdata, 'sample1', sample1_bd, 'level_1', [], level_1_pl, 'sct_smaple1_level_1.svg')
plot_regional_sct(cdata, rdata, 'sample3', sample3_bd, 'level_1', [], level_1_pl, 'sct_smaple3_level_1.svg')
plot_regional_sct(cdata, rdata, 'sample6', sample6_bd, 'level_1', [], level_1_pl, 'sct_smaple6_level_1.svg')

# B cells
plot_regional_sct(cdata, rdata, 'sample1', sample1_bd, 'level_1', ['B cells'], b_cells_pl, 'sct_smaple1_b_cells.svg')
plot_regional_sct(cdata, rdata, 'sample3', sample3_bd, 'level_1', ['B cells'], b_cells_pl, 'sct_smaple3_b_cells.svg')
plot_regional_sct(cdata, rdata, 'sample6', sample6_bd, 'level_1', ['B cells'], b_cells_pl, 'sct_smaple6_b_cells.svg')

# T cells 
plot_regional_sct(cdata, rdata, 'sample1', sample1_bd, 'level_2', level_2_t_cells_order, level_2_t_cells_pl, 'sct_smaple1_t_cells.svg')
plot_regional_sct(cdata, rdata, 'sample3', sample3_bd, 'level_2', level_2_t_cells_order, level_2_t_cells_pl, 'sct_smaple3_t_cells.svg')
plot_regional_sct(cdata, rdata, 'sample6', sample6_bd, 'level_2', level_2_t_cells_order, level_2_t_cells_pl, 'sct_smaple6_t_cells.svg')

# DC 
plot_regional_sct(cdata, rdata, 'sample1', sample1_bd, 'level_2', level_2_dc_order, level_2_dc_pl, 'sct_smaple1_dc.svg')
plot_regional_sct(cdata, rdata, 'sample3', sample3_bd, 'level_2', level_2_dc_order, level_2_dc_pl, 'sct_smaple3_dc.svg')
plot_regional_sct(cdata, rdata, 'sample6', sample6_bd, 'level_2', level_2_dc_order, level_2_dc_pl, 'sct_smaple6_dc.svg')

# MC
plot_regional_sct(cdata, rdata, 'sample1', sample1_bd, 'level_2', level_2_mc_order, level_2_mc_pl, 'sct_smaple1_mc.svg')
plot_regional_sct(cdata, rdata, 'sample3', sample3_bd, 'level_2', level_2_mc_order, level_2_mc_pl, 'sct_smaple3_mc.svg')
plot_regional_sct(cdata, rdata, 'sample6', sample6_bd, 'level_2', level_2_mc_order, level_2_mc_pl, 'sct_smaple6_mc.svg')

In [None]:
# MC
plot_regional_sct(cdata, rdata, 'sample1', sample1_bd, 'region', [], region_pl, 'sct_smaple1_region_crop.png')
plot_regional_sct(cdata, rdata, 'sample3', sample3_bd, 'region', [], region_pl, 'sct_smaple3_region_crop.png')
plot_regional_sct(cdata, rdata, 'sample6', sample6_bd, 'region', [], region_pl, 'sct_smaple6_region_crop.png')

In [None]:
for current_sample in cdata.obs['sample'].unique():
    print(current_sample)
    current_complete_obs = cdata.obs.loc[cdata.obs['sample'] == current_sample, :]

    fig_size = np.array([current_complete_obs['global_x'].max(), current_complete_obs['global_y'].max()]) / 1000
    fig, ax = plt.subplots(figsize=fig_size)
    sns.scatterplot(x='global_x', y='global_y', hue='region', data=current_complete_obs, palette='tab10', s=1, linewidth=0, ax=ax)
    plt.gca().set_aspect('equal')
    plt.axis('off')
    plt.tight_layout()
    plt.savefig(os.path.join(fig_path, f"sct_{current_sample}_region.png"), dpi=300)
    plt.show()

## Heatmap

In [None]:
# change scanpy plot setting 
sc.settings.figdir = fig_path
sc.set_figure_params(format='pdf', dpi=150)

### level 1

In [None]:
# create subset without nas
pdata = rdata[rdata.obs['level_1'] != 'NA', ]

In [None]:
# create gene dict for visualization
level_1_gene_dict = {
 'T cells': ['Ccr7', 'Cd3d', 'Cd3e', 'Cd3g', 'Lef1'],
 'B cells': ['Cd19', 'Cd22', 'Cd79a', 'Ms4a1'],
 'Macrophages': ['Adgre1', 'Cd68', 'Csf1r', 'Itgam', 'Lyz2'],
 'Dendritic cells': ['Irf4', 'Irf8', 'Itgax', 'Sirpa', 'Xcr1']
}

In [None]:
# plot dot plots
sc.pl.dotplot(pdata, level_1_gene_dict, groupby=f'level_1', dendrogram=False, use_raw=True, cmap='Reds', swap_axes=False, save='level_1_h')
sc.pl.dotplot(pdata, level_1_gene_dict, groupby=f'level_1', dendrogram=False, use_raw=True, cmap='Reds', swap_axes=False, standard_scale='var', save='level_1_var_h')
sc.pl.dotplot(pdata, level_1_gene_dict, groupby=f'level_1', dendrogram=False, use_raw=True, cmap='Reds', swap_axes=False, standard_scale='group', save='level_1_group_h')

### level 2

In [None]:
# create subset without nas
pdata = rdata[rdata.obs['level_1'] != 'NA', ]

In [None]:
# create gene dict for visualization
level_2_gene_dict = {
 'T cells': ['Cd3e', 'Cd3g', 'Cd3d'],
 'CD4+ T cells': ['Cd4'],
 'CD8+ T cells': ['Cd8a'],
 'Synthetic T cells': ['Ccr7'],
 'B cells': ['Cd19', 'Cd22', 'Cd79a', 'Ms4a1'],
 'Macrophages': ['Adgre1', 'Itgam'],
 'Activated Macrophages': ['Cd68'],
 'Monocytes': ['Csf1r', 'Lyz2'],
 'cDC1': ['Irf8'],
 'cDC2': ['Irf4'],
 'Other Dendritic cells': ['Itgax', 'Xcr1'],
}

In [None]:
# reorder categories
level_2_order = list(level_2_gene_dict.keys())
pdata.obs['level_2'] = pdata.obs['level_2'].astype('category')
pdata.obs['level_2'] = pdata.obs['level_2'].cat.reorder_categories(level_2_order)

In [None]:
# plot dot plots
sc.pl.dotplot(pdata, level_2_gene_dict, groupby=f'level_2', dendrogram=False, use_raw=True, cmap='Reds', swap_axes=False, save='level_2_h')
sc.pl.dotplot(pdata, level_2_gene_dict, groupby=f'level_2', dendrogram=False, use_raw=True, cmap='Reds', swap_axes=False, standard_scale='var', save='level_2_var_h')
sc.pl.dotplot(pdata, level_2_gene_dict, groupby=f'level_2', dendrogram=False, use_raw=True, cmap='Reds', swap_axes=False, standard_scale='group', save='level_2_group_h')

### level 3 T cells

In [None]:
# create subset
pdata = rdata[rdata.obs['level_2'].isin(['CD4+ T cells', 'CD8+ T cells']), ]
current_order = ['CD4+ T cells', 'CD8+ T cells', 'Treg', 'Th1', 'Th2', 'Th17', 'Naive CD4+ T cells', 'Naive CD8+ T cells', 'PD-1+ T cells']
pdata.obs['level_3'] = pdata.obs['level_3'].astype('category')
pdata.obs['level_3'] = pdata.obs['level_3'].cat.reorder_categories(current_order)

In [None]:
# create gene dict for visualization
level_3_t_gene_dict = {
 'T cells': ['Cd3d', 'Cd3e', 'Cd3g'],
 'CD4 T cells': ['Cd4'],
 'CD8 T cells': ['Cd8a'],
 'Treg': ['Foxp3', 'Il2ra'],
 'Th1': ['Ifng', 'Tbx21'],
 'Th2': ['Il4'],
 'Th17': ['Il17a'],
 'Naïve T cells': ['Ccr7', 'Lef1', 'Sell'],
 'PD-1+ T cells': ['Pdcd1']}

In [None]:
# plot dot plots
sc.pl.dotplot(pdata, level_3_t_gene_dict, groupby=f'level_3', dendrogram=False, use_raw=True, cmap='Reds', swap_axes=False, save='level_3_t_cells_h')
sc.pl.dotplot(pdata, level_3_t_gene_dict, groupby=f'level_3', dendrogram=False, use_raw=True, cmap='Reds', swap_axes=False, standard_scale='var', save='level_3_t_cells_var_h')
sc.pl.dotplot(pdata, level_3_t_gene_dict, groupby=f'level_3', dendrogram=False, use_raw=True, cmap='Reds', swap_axes=False, standard_scale='group', save='level_3_t_cells_group_h')

### DEG DC

In [None]:
# copy annotations to raw data
raw_cdata.obs['level_2'] = rdata.obs['level_2']
raw_cdata.obs['condition'] = rdata.obs['condition']

In [None]:
# create subset and new labels for visualization 
pdata = raw_cdata[raw_cdata.obs['level_2'].isin(level_2_dc_order), ]
pdata.obs['type-condition'] = pdata.obs['level_2'].astype(str) + '_' + pdata.obs['condition'].astype(str)

In [None]:
# reorder categories
dc_condition_order = []
for i in condition_order:
    for j in level_2_dc_order:
        dc_condition_order.append(f"{j}_{i}")
pdata.obs['type-condition'] = pdata.obs['type-condition'].astype('category')
pdata.obs['type-condition'] = pdata.obs['type-condition'].cat.reorder_categories(dc_condition_order)

In [None]:
# Add log layer
pdata.layers['log_raw'] = np.log1p(pdata.layers['raw'])
sc.pp.normalize_total(pdata, layer='log_raw')

# Find gene markers for each cluster
sc.tl.rank_genes_groups(pdata, 'type-condition', method='wilcoxon', layer='log_raw', pts=False, use_raw=False, n_genes=pdata.shape[1])

In [None]:
# plot dot plots
sc.pl.rank_genes_groups_dotplot(pdata, key='rank_genes_groups', var_names=['Ccr7', 'Cd40', 'Cd83', 'Cd86', 'H2-K1'], dendrogram=False, values_to_plot='logfoldchanges', cmap='bwr', vmin=-1, vmax=1, save='level_2_dc_logfc')