# Visualization for LN dataset

2024-06-10

In [None]:
# import packages
import os
import numpy as np
import pandas as pd
import seaborn as sns
import scanpy as sc
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

In [None]:
# load matplotlib to change output figure format
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

In [None]:
# ignore jupyter warnings 
import warnings
warnings.filterwarnings('ignore')

## Input

In [None]:
# define IO path and load data object
base_path = './'
expr_path = os.path.join(base_path, 'data')

output_path = os.path.join(base_path, 'output')
if not os.path.exists(output_path): os.makedirs(output_path)
fig_path = os.path.join(base_path, "figures")
if not os.path.exists(fig_path): os.makedirs(fig_path)

# load object with cell type lables 
from datetime import datetime
date = datetime.today().strftime('%Y-%m-%d')
cdata = sc.read_h5ad(os.path.join(output_path, f'{date}-combined-celltyping-LN.h5ad'))

# example
# cdata = sc.read_h5ad(os.path.join(output_path, f'2024-06-24-combined-celltyping-LN.h5ad'))
cdata

## Add molecular region info

In [None]:
# load dataframe with region label from SPIN
region_label_df = pd.read_csv(os.path.join(expr_path, 'LN_SPIN_region_label.csv'), index_col=0)
region_label_df

In [None]:
# copy label to the cell typing object
cdata.obs['region'] = region_label_df.loc[cdata.obs.index, 'region']
cdata.obs

## Add RBD quantification

In [None]:
# load dataframe with RBD quantification
rbd_counts_df = pd.read_csv(os.path.join(expr_path, 'LN_RBD_quantification.csv'), index_col=0)
rbd_counts_df

In [None]:
# add RBD quantification to the cell typing object
cdata.obs['RBD_count'] = 0
overlap_index = rbd_counts_df.index.intersection(cdata.obs.index)
cdata.obs.loc[overlap_index, 'RBD_count'] = rbd_counts_df.loc[overlap_index, 'count'].values
cdata.obs['RBD_log'] = np.log1p(cdata.obs['RBD_count'])
cdata.obs

## Colors

In [None]:
# set plot style
plt.style.use('default')

# change scanpy plotting settings
sc.settings.figdir = fig_path
sc.set_figure_params(format='pdf', dpi=150)

In [None]:
# create a dictionary for cell type color
level_1_dictionary = {
    'T cells': '#1a5fd6',
    'B cells': '#FBB040',
    'Macrophages': '#00A651',
    'Dendritic cells': '#92278F',
    'NK cells': '#386363',
    'Endothelial cells': '#d12852',
    'NA': '#dbdbdb'
}

b_cell_dictionary = {
    'B cells': '#FBB040',
}

rbd_level_1_dictionary = {
    'B cells': '#FBB040',
    'Macrophages': '#00A651',
    'Dendritic cells': '#92278F',
}

level_2_t_cells_dictionary = {
    'CD4+ T cells': '#42cf13',
    'CD8+ T cells': '#2a73bd',
    'T cells': '#ad7307',
}

level_2_dc_dictionary = {
    'cDC1': '#fa6e52',
    'cDC2': '#4164a6',
    'Other Dendritic cells': '#b079f2',
}

level_2_mc_dictionary = {
    'Macrophages': '#8120f5',
    'Activated Macrophages': '#e0416c',
    'Monocytes': '#13ede6',
}

condition_dictionary = {
    'LNA': '#C87E77',
    'GG': '#DFDEDE',
    'PolyC': '#519AD4',
}

region_dictionary = {
    0: '#f593e9',
    1: '#1cd6d6',
    2: '#14cc0a',
    3: '#ed8e11',
}

apc_dictionary = {'APC': '#ed3228', 'Non-APC': '#1c5fed'}

In [None]:
# create palette function
def create_palette(category_to_color, fig_path=None, fig_name=None, save=False):

    pl = sns.color_palette(category_to_color.values())
    cmap = ListedColormap(pl.as_hex())
    sns.palplot(pl)
    plt.xticks(range(len(category_to_color.keys())), category_to_color.keys(), size=5, rotation=45)
    plt.tight_layout()
    if save:
        plt.savefig(os.path.join(fig_path, fig_name))
    plt.show()

    return pl, cmap  

In [None]:
# create palette
level_1_pl, level_1_cmap = create_palette(level_1_dictionary)
b_cells_pl, b_cells_cmap = create_palette(b_cell_dictionary)
rbd_level_1_pl, rbd_level_1_cmap = create_palette(rbd_level_1_dictionary)
level_2_t_cells_pl, level_2_t_cells_cmap = create_palette(level_2_t_cells_dictionary)
level_2_dc_pl, level_2_dc_cmap = create_palette(level_2_dc_dictionary)
level_2_mc_pl, level_2_mc_cmap = create_palette(level_2_mc_dictionary)
condition_pl, condition_cmap = create_palette(condition_dictionary)
region_pl, region_cmap = create_palette(region_dictionary)
apc_pl, apc_cmap = create_palette(apc_dictionary)

## Spatial map

In [None]:
# plot spatial map function
def plot_sct(cdata, sample, category, subset_list, palette, bg_data=None, file_name=None, org_size=False, spot_size=5, bg_color='#dbdbdb'):

    if bg_data:
        current_complete_obs = bg_data.obs.loc[bg_data.obs['sample'] == sample, :].copy()
    else:
        current_complete_obs = cdata.obs.loc[cdata.obs['sample'] == sample, :].copy()
    current_subset_obs = cdata.obs.loc[cdata.obs['sample'] == sample, :].copy()

    if subset_list:
        current_subset_obs = current_subset_obs.loc[current_complete_obs[category].isin(subset_list), :]
        current_subset_obs[category] = current_subset_obs[category].astype(object)
        current_subset_obs[category] = current_subset_obs[category].astype('category')
        current_subset_obs[category] = current_subset_obs[category].cat.reorder_categories(subset_list)

    if org_size:
        fig_size = np.array([current_complete_obs['global_x'].max(), current_complete_obs['global_y'].max()]) / 10000 * 3
    else:
        fig_size = (10,10)

    fig, ax = plt.subplots(figsize=fig_size)
    sns.scatterplot(x='global_x', y='global_y', data=current_complete_obs, color=bg_color, s=spot_size, linewidth=0, ax=ax, rasterized=True)
    sns.scatterplot(x='global_x', y='global_y', hue=category, data=current_subset_obs, palette=palette, s=spot_size, linewidth=0, legend=False, ax=ax, rasterized=True, alpha=.8)
    plt.gca().set_aspect('equal')
    plt.axis('off')
    plt.tight_layout()
    plt.savefig(os.path.join(fig_path, file_name), dpi=300)
    plt.show()

In [None]:
# order plotting categories
cdata.obs['level_1'] = cdata.obs['level_1'].astype('category')
cdata.obs['level_1'] = cdata.obs['level_1'].cat.reorder_categories(level_1_dictionary.keys())

In [None]:
# region
for current_sample in cdata.obs['sample'].cat.categories:
    print(current_sample)
    plot_sct(cdata, current_sample, 'region', [], region_pl, file_name=f'sct_{current_sample}_region.pdf')
    plot_sct(cdata, current_sample, 'level_1', [], level_1_pl, file_name=f'sct_{current_sample}_level_1.pdf', bg_color='#edebeb')

In [None]:
cdata_rbd = cdata[(cdata.obs['RBD_count'] > 0) & (cdata.obs['level_2'] != 'Monocytes'), ]

for current_sample in cdata.obs['sample'].cat.categories:
    print(current_sample)
    plot_sct(cdata_rbd, current_sample, 'level_1', ['B cells', 'Macrophages', 'Dendritic cells'], rbd_level_1_pl, bg_data=cdata, file_name=f'sct_{current_sample}_level_1_rbd.pdf', spot_size=10, bg_color='#edebeb')

## Gene marker

### level_2

In [None]:
# create gene marker dictionary
level_2_gene_dict = {
 'T cells': ['Cd3e', 'Cd3g', 'Cd3d'],
 'CD4+ T cells': ['Cd4'],
 'CD8+ T cells': ['Cd8a'],
 'B cells': ['Cd19', 'Cd22', 'Cd79a', 'Ms4a1'],
 'Macrophages': ['Adgre1', 'Itgam'],
 'Activated Macrophages': ['Cd68'],
 'Monocytes': ['Csf1r', 'Lyz2'],
 'cDC1': ['Irf8'],
 'cDC2': ['Irf4'],
 'Other Dendritic cells': ['Itgax', 'Xcr1'],
 'NK cells': ['Ccl5', 'Gzma', 'Nkg7', 'Ptprc'],
 'Endothelial cells': ['Cdh5', 'Lrg1', 'Ly6a', 'Pecam1'],
}

In [None]:
# subset object for visualization
pdata = cdata[cdata.obs['level_1'] != 'NA', ]

In [None]:
# reorder category
level_2_order = list(level_2_gene_dict.keys())
pdata.obs['level_2'] = pdata.obs['level_2'].astype('category')
pdata.obs['level_2'] = pdata.obs['level_2'].cat.reorder_categories(level_2_order)

In [None]:
# plot dot plot
sc.pl.dotplot(pdata, level_2_gene_dict, groupby=f'level_2', dendrogram=False, use_raw=True, cmap='Reds', swap_axes=False, standard_scale='var', save='level_2_var_h')

### RBD

In [None]:
# create gene marker dictionary
rbd_gene_dict_1 = {
 'B cells': ['Cd79a'],
 'Macrophages': ['Itgam'],
 'Activated Macrophages': ['Cd68'],
 'cDC1': ['Irf8'],
 'cDC2': ['Irf4'],
 'Other Dendritic cells': ['Itgax'],
 'APC': ['H2-K1']
}

rbd_gene_dict_2 = {
 'APC': ['Cd86', 'Cd40', 'Ccr7'], 
}

rbd_gene_dict_3 = {
 'RBD': ['RBD_count']
}

In [None]:
# subset object
pdata = cdata[cdata.obs['level_1'].isin(['B cells', 'Dendritic cells', 'Macrophages']), ]
pdata = pdata[pdata.obs['level_2'] != 'Monocytes', ]

In [None]:
# reorder category
level_2_order = list(rbd_gene_dict_1.keys())[:-1]
pdata.obs['level_2'] = pdata.obs['level_2'].astype('category')
pdata.obs['level_2'] = pdata.obs['level_2'].cat.reorder_categories(level_2_order)

In [None]:
# create new category for visualization 
pdata.obs['type-condition'] = pdata.obs['level_2'].astype(str) + '_' + pdata.obs['condition'].astype(str)

current_order = []
for i in level_2_order:
    for j in ['LNA', 'GG', 'PolyC']:
        current_order.append(f"{i}_{j}")
pdata.obs['type-condition'] = pdata.obs['type-condition'].astype('category')
pdata.obs['type-condition'] = pdata.obs['type-condition'].cat.reorder_categories(current_order)

In [None]:
# plot dot plot
sc.pl.dotplot(pdata, rbd_gene_dict_1, groupby=f'type-condition', dendrogram=False, use_raw=True, cmap='Reds', swap_axes=True, standard_scale='var', save='rbd_var_h_1')
sc.pl.dotplot(pdata, rbd_gene_dict_2, groupby=f'type-condition', dendrogram=False, use_raw=True, cmap='Reds', swap_axes=True, standard_scale='var', save='rbd_var_h_2')
sc.pl.dotplot(pdata, rbd_gene_dict_3, groupby=f'type-condition', dendrogram=False, use_raw=True, cmap='Reds', swap_axes=True, standard_scale='var', save='rbd_var_h_3')