# Analysis & Visualization

In [None]:
# Import Packages

%load_ext autoreload
%autoreload 2

import os
import warnings 
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt
from skimage.filters import threshold_otsu, gaussian
from skimage.morphology import remove_small_objects
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
from anndata import AnnData, concat
from tqdm.notebook import tqdm

# Customized packages
from starmap.utilities import *
from starmap.sequencing import *
# from starmap.obj import STARMapDataset, load_data
# import starmap.analyze as anz
# import starmap.viz as viz

import starmap.sc_util as su

# test()

In [None]:
# Get functions 

import colorsys
from random import shuffle

def intervals(parts, start_point, end_point):
    duration = end_point - start_point
    part_duration = duration / parts
    return [((i * part_duration + (i + 1) * part_duration)/2) + start_point for i in range(parts)]

## IO

In [None]:
# Set path
base_path = 'Z:/Data/Analyzed/2021-11-23-Hu-MouseBrain/'
out_path = os.path.join(base_path, 'output')
fig_path = os.path.join(base_path, 'figures')

out_path = os.path.join(base_path, 'output')
if not os.path.exists(out_path):
    os.mkdir(out_path)
    
fig_path = os.path.join(base_path, 'figures')
if not os.path.exists(fig_path):
    os.mkdir(fig_path)

In [None]:
from datetime import datetime
date = datetime.today().strftime('%Y-%m-%d')

In [None]:
sc.settings.figdir = fig_path
sc.set_figure_params(format='tif', dpi=150)

In [None]:
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42

## Input

In [None]:
# Load new data
adata = sc.read_h5ad(os.path.join(out_path, '2022-04-24-Hu-TissueRIBOmap-level3.h5ad'))
adata

## QC

In [None]:
rdata = sc.read_h5ad(os.path.join(base_path, 'RIBOmap', '2022-03-11-RIBOmap-raw.h5ad'))
rdata.obs['protocol'] = 'RIBOmap'

# Calculate QC metrics
sc.pp.calculate_qc_metrics(rdata, percent_top=None, inplace=True)
# Calculate max count for each gene
rdata.var['max_counts'] = rdata.X.max(axis=0)

In [None]:
from scipy import stats
n = 4
mad = stats.median_absolute_deviation(rdata.obs['log1p_total_counts'], scale=1)
lower_bd = rdata.obs['log1p_total_counts'].median() - n*mad
upper_bd = rdata.obs['log1p_total_counts'].median() + n*mad
print(lower_bd)
print(upper_bd)
print(np.expm1(lower_bd))

print(np.expm1(upper_bd))

In [None]:
sns.reset_orig()
fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(20, 5))

sns.histplot(rdata.obs['total_counts'], bins=100, ax=axs[0])
median_counts = rdata.obs['total_counts'].median()
axs[0].axvline(median_counts, c='r')
axs[0].set_title(f'Median: {median_counts}')

sns.histplot(rdata.obs['n_genes_by_counts'], bins=100, ax=axs[1])
median_genes = rdata.obs['n_genes_by_counts'].median()
axs[1].axvline(median_genes, c='r')
axs[1].set_title(f'Median: {median_genes}')

sns.histplot(rdata.obs['log1p_total_counts'], bins=100, ax=axs[2])
axs[2].axvline(lower_bd, c='r')
axs[2].axvline(upper_bd, c='r')
axs[2].set_title(f'LB: {lower_bd}, UB: {upper_bd}')

# plt.savefig(os.path.join(fig_path, 'qc_histogram_3col.pdf'))
plt.show()

In [None]:
sns.reset_orig()
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(13, 5))

sns.histplot(rdata.obs['total_counts'], bins=50, ax=axs[0])
median_counts = rdata.obs['total_counts'].median()
axs[0].axvline(median_counts, c='r')
axs[0].set_title(f'Median: {median_counts}')

sns.histplot(rdata.obs['n_genes_by_counts'], bins=50, ax=axs[1])
median_genes = rdata.obs['n_genes_by_counts'].median()
axs[1].axvline(median_genes, c='r')
axs[1].set_title(f'Median: {median_genes}')

# plt.savefig(os.path.join(fig_path, 'qc_histogram_2col.pdf'))
plt.show()

In [None]:
sns.reset_orig()
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(13, 5))

sns.histplot(rdata.obs['log1p_total_counts'], bins=100, ax=axs[0])
axs[0].axvline(lower_bd, c='r')
axs[0].axvline(upper_bd, c='r')
axs[0].set_title(f'LB: {lower_bd}, UB: {upper_bd}')

sns.histplot(rdata.obs['log1p_n_genes_by_counts'], bins=100, ax=axs[1])

plt.savefig(os.path.join(fig_path, 'qc_histogram_2col.pdf'))
plt.show()

## Level_1

### embedding

In [None]:
# Run PCA
sc.tl.pca(adata, svd_solver='arpack', use_highly_variable=True)

# Plot explained variance 
sc.pl.pca_variance_ratio(adata, log=False)

In [None]:
%%time
# Computing the neighborhood graph
n_neighbors = 50
n_pcs = 30 ## 30
min_dist = 0.15 ## 0.05

sc.pp.neighbors(adata, n_neighbors=n_neighbors, n_pcs=n_pcs)

# Run UMAP
sc.tl.umap(adata, min_dist=min_dist, random_state=0, spread=1) ## 0.5 1

In [None]:
# Save log
with open(f'{fig_path}/log_level_1.txt', 'w') as f:
    f.write(f"""Number of neighbor: {n_neighbors}
Number of PC: {n_pcs}
Min-distance: {min_dist}""")
    
# save embeddings
np.savetxt(f'{fig_path}/embedding_level_1_umap.csv', adata.obsm['X_umap'], delimiter=",")

In [None]:
# Plot single meta UMAP
sc.pl.umap(adata, color='protocol')
fig, ax = plt.subplots(figsize=(7,5))
sc.pl.umap(adata, color='total_counts', save='_total_counts', ax=ax)

fig, ax = plt.subplots(figsize=(7,5))
sc.pl.umap(adata, color='n_genes', save='_n_genes', ax=ax)

In [None]:
# Get colormap
# level_1_colors = ['', '']
# level_1_pl = sns.color_palette("hls", adata.obs['level_1'].nunique())
level_1_pl = sns.color_palette(adata.uns['level_1_colors'])
level_1_cmap = ListedColormap(level_1_pl.as_hex())
sns.palplot(level_1_pl)

In [None]:
# Save plots
# Plot UMAP with cluster labels w/ new color
sc.pl.umap(adata, color='level_1', legend_loc='on data',
           legend_fontsize=12, legend_fontoutline=2, frameon=False, 
           title='Level 1 clustering', palette=level_1_pl, save=False)

In [None]:
adata.uns['level_1_color_list'] = adata.uns['level_1_colors']
adata.uns['level_1_order'] = ['Neuron', 'Glia']

In [None]:
sc.set_figure_params(format='pdf', dpi=150)

fig, ax = plt.subplots(figsize=(10, 7))
sc.pl.umap(adata, color='level_1', frameon=False, legend_loc=False, ax=ax, title='', size=10, alpha=.5,
           palette=level_1_pl, save='_level_1_no_legend_larger')

In [None]:
sc.set_figure_params(format='pdf', dpi=150)

fig, ax = plt.subplots(figsize=(10, 7))
sc.pl.umap(adata, color='level_1', frameon=False, legend_loc=False, ax=ax, title='',
           palette=level_1_pl, save='_level_1_no_legend')

fig, ax = plt.subplots(figsize=(10, 7))
sc.pl.umap(adata, color='level_1', frameon=False, legend_loc='right margin', ax=ax, title='',
           palette=level_1_pl, save='_level_1_legend')

### gene markers

In [None]:
# Get markers for each cluster
sc.tl.rank_genes_groups(adata, 'level_1', method='wilcoxon', pts=True)

# Filter markers
sc.tl.filter_rank_genes_groups(adata, min_fold_change=1)

In [None]:
current_group = 'Glia'

current_df = sc.get.rank_genes_groups_df(adata, group=current_group, key='rank_genes_groups')
current_df.head(10)

In [None]:
# Dot plot mean expression (##)
sc.pl.rank_genes_groups_dotplot(adata, key='rank_genes_groups_filtered', n_genes=15, dendrogram=False)

In [None]:
# Other type of plots
# Plot z-score heatmap
sc.pl.rank_genes_groups_heatmap(adata, n_genes=15, groupby='level_1', min_logfoldchange=1, use_raw=False, swap_axes=True, 
                                vmin=-5, vmax=5, cmap='bwr', show_gene_labels=True,
                                dendrogram=False, figsize=(30, 20), save=False)

sc.pl.rank_genes_groups_heatmap(adata, n_genes=15, groupby='level_1', min_logfoldchange=1, use_raw=False, swap_axes=True, 
                                vmin=-5, vmax=5, cmap='bwr', show_gene_labels=True, key='rank_genes_groups_filtered', 
                                dendrogram=False, figsize=(30, 20), save=False)

In [None]:
# gene_list = ['Slc17a7', 'Gad1', 'Gad2', 'Sst', 'Pvalb', 'Slc1a3', 'Aqp4', 'Gja1', 'Ctss', 'Plp1', 'Mobp', 'Pdgfra', 'Dcn', 'Myh9', 'Vtn']
gene_list = ['Slc17a7', 'Gad1', 'Sst', 'Pvalb', 'Slc1a3', 'Aqp4', 'Ctss', 'Plp1', 'Mobp', 'Pdgfra', 'Dcn', 'Vtn']

fig, axs = plt.subplots(nrows=3, ncols=4, figsize=(20, 10))
axs = axs.flatten()
for i, gene in enumerate(gene_list):
    ax = sc.pl.umap(adata, color=gene, title=gene, ax=axs[i], show=False)
    
plt.tight_layout(pad=0)
plt.savefig(os.path.join(fig_path, 'markers_on_umap.pdf'))
plt.show()

In [None]:
sc.set_figure_params(format='pdf', dpi=150)
sc.pl.rank_genes_groups_heatmap(adata, n_genes=30, groupby='level_1', min_logfoldchange=1, use_raw=False, swap_axes=True, 
                                vmin=-5, vmax=5, cmap='bwr', show_gene_labels=True, key='rank_genes_groups', 
                                dendrogram=False, figsize=(30, 20), save='_level_1')

In [None]:
marker_df = sc.get.rank_genes_groups_df(adata, group=['Neuron', 'Glia'], key='rank_genes_groups', log2fc_min=1)
marker_df.to_csv(f'{fig_path}/markers_level_1.csv')

### spatial map

In [None]:
fig, ax = plt.subplots(figsize=(10, 10))
sns.scatterplot(x='column', y='row', data=adata.obs, color='#1111', s=2, legend=False, ax=ax)
sns.scatterplot(x='column', y='row', hue='level_1', data=adata.obs, palette=level_1_pl, s=2, legend=False, ax=ax)
ax.axis('off')
plt.show()

In [None]:
for current_group in adata.obs['level_1'].cat.categories:
    
    # Plot single cluster
    fig, ax = plt.subplots(figsize=(10, 10))
    sns.scatterplot(x='column', y='row', data=adata.obs, color='#d4d4d4', s=2, legend=False, ax=ax)
    sns.scatterplot(x='column', y='row', hue='level_1', data=adata.obs.loc[adata.obs['level_1'] == current_group, :], palette=level_1_pl, s=4, legend=False, ax=ax)
    ax.title.set_text(current_group)
    ax.axis('off')
    plt.show()

## Level 2

### embedding

In [None]:
level_2_colors_dict = {

    'Excitatory neuron': '#75db57', 
    'Inhibitory neuron': '#db5f57', 
    'Astrocyte': '#dbd057', 
    'Oligodendrocyte': '#57dbdb', 
    'Oligodendrocytes precursor cell': '#667872', 
    'Microglia': '#8597c6', 
    'Vascular cell': '#5e0737', 
    'Unknown': '#cccccc',
    
}

In [None]:
level_2_colors = list(level_2_colors_dict.values())
level_2_pl = sns.color_palette(level_2_colors)
level_2_cmap = ListedColormap(level_2_pl.as_hex())

level_2_order = list(level_2_colors_dict.keys())
sns.palplot(level_2_pl)
plt.xticks(range(len(level_2_order)), level_2_order, size=10, rotation=45)
plt.tight_layout()
plt.show()

In [None]:
adata.uns['level_2_color_list'] = level_2_colors
adata.uns['level_2_order'] = level_2_order

In [None]:
fig, ax = plt.subplots(figsize=(10, 7))
sc.pl.umap(adata, color='level_2', frameon=False, legend_loc=False, ax=ax, title='', size=10, alpha=.5,
           palette=level_2_pl, save='_level_2_no_legend_larger')

In [None]:
fig, ax = plt.subplots(figsize=(10, 7))
sc.pl.umap(adata, color='level_2', frameon=False, legend_loc=False, ax=ax, title='',
           palette=level_2_pl, save='_level_2_no_legend')

sc.set_figure_params(format='pdf', dpi=150)
fig, ax = plt.subplots(figsize=(10, 7))
sc.pl.umap(adata, color='level_2', frameon=False, legend_loc='right margin', ax=ax, title='',
           palette=level_2_pl, save='_level_2_legend')

### gene markers

In [None]:
adata.raw.X.max()

In [None]:
# Get markers for each cluster
sc.tl.rank_genes_groups(adata, 'level_2', method='wilcoxon', pts=True)

# Filter markers
sc.tl.filter_rank_genes_groups(adata, min_fold_change=1)

In [None]:
marker_df = sc.get.rank_genes_groups_df(adata, group=adata.uns['level_2_order'], key='rank_genes_groups_filtered', log2fc_min=1)
marker_df.to_csv(f'{fig_path}/level_2_markers.csv')

In [None]:
current_group = 'Astrocyte'

current_df = sc.get.rank_genes_groups_df(adata, group=current_group, key='rank_genes_groups')
current_df.head(10)

In [None]:
# Dot plot mean expression (##)
sc.pl.rank_genes_groups_dotplot(adata, key='rank_genes_groups_filtered', n_genes=15, dendrogram=False)

In [None]:
# Other type of plots
# Plot z-score heatmap
sc.pl.rank_genes_groups_heatmap(adata, n_genes=10, groupby='level_2', min_logfoldchange=1, use_raw=False, swap_axes=True, 
                                vmin=-5, vmax=5, cmap='bwr', show_gene_labels=True,
                                dendrogram=False, figsize=(30, 20), save=False)

sc.pl.rank_genes_groups_heatmap(adata, n_genes=10, groupby='level_2', min_logfoldchange=1, use_raw=False, swap_axes=True, 
                                vmin=-5, vmax=5, cmap='bwr', show_gene_labels=True, key='rank_genes_groups_filtered', 
                                dendrogram=False, figsize=(30, 20), save=False)

In [None]:
sc.pl.rank_genes_groups_heatmap(adata, n_genes=10, groupby='level_2', min_logfoldchange=1, use_raw=False, swap_axes=True, 
                                vmin=-5, vmax=5, cmap='bwr', show_gene_labels=True, key='rank_genes_groups_filtered', 
                                dendrogram=False, figsize=(30, 20), save='_level_2')

### spatial map

In [None]:
fig, ax = plt.subplots(figsize=(10, 10))
sns.scatterplot(x='column', y='row', data=adata.obs, color='#1111', s=2, legend=False, ax=ax)
sns.scatterplot(x='column', y='row', hue='level_2', data=adata.obs, palette=level_2_pl, s=2, legend=False, ax=ax)
ax.axis('off')
plt.show()

In [None]:
for current_group in adata.obs['level_2'].cat.categories:
    
    # Plot single cluster
    print(current_group)
    fig, ax = plt.subplots(figsize=(10, 10))
    sns.scatterplot(x='column', y='row', data=adata.obs, color='#1111', s=2, legend=False, ax=ax)
    sns.scatterplot(x='column', y='row', hue='level_2', data=adata.obs.loc[adata.obs['level_2'] == current_group, :], palette=level_2_pl, s=4, legend=False, ax=ax)
    # ax.title.set_text(current_group)
    ax.axis('off')
    plt.show()

## Level 3

In [None]:
# Construct label dict based on 2022-03-19 discussion
level_3_dict = {

    'Ex L2/3 1': 'Ex L2', 
    'Ex L2/3 2': 'Ex L2/3',
    'Ex L2/3 3': 'Ex L4',
    'Ex L4': 'Ex L4/5',
    'Ex L4/5 1': 'Ex L5',
    'Ex L6': 'Ex L6 EP 1',
    'Ex L6a 1': 'Ex L6',
    'Ex CTXsp': 'Ex L6 EP 2', 
    'Ex MEA': 'Ex NLOT', 
    'Ex Mix 1': 'Ex Mix', 
    'Ex Mix 2': 'Ex CTX', 
    'Ex Mix 3': 'Ex RSP', 
    'Ex Mix 4': 'Ex L2', 
    'Ex Mix 5': 'Ex RSP',
    'Inh Mix 1': 'Inh Amygdala', 
    'Inh Mix 2': 'Inh Amygdala',
    'Vascular leptomeningeal cell 1': 'Pericytes/Vascular endothelial cell 1', 
    'Vascular leptomeningeal cell 2': 'Vascular leptomeningeal cell 1', 
    'Vascular leptomeningeal cell 3': 'Vascular smooth muscle cell', 
    'Vascular leptomeningeal cell 4': 'Vascular leptomeningeal cell 2',
    'Pericytes/Vascular endothelial cell 1': 'Pericytes/Vascular endothelial cell 2', 
    'Pericytes/Vascular endothelial cell 2': 'Pericytes/Vascular endothelial cell 3', 

}

In [None]:
adata.uns['level_3_order']

In [None]:
# set order 
level_3_order = [
    'Ex L2', 'Ex L2/3', 'Ex L4', 'Ex L4/5', 'Ex L5', 'Ex L6', 'Ex L6 EP 1', 'Ex L6 EP 2',
    'Ex CTX', 'Ex RSP', 'Ex PIR', 'Ex CA1', 'Ex CA2', 'Ex CA3', 'Ex DG', 'Ex NLOT', 'Ex MH',
    'Ex TH 1', 'Ex TH 2', 'Ex TH 3', 'Ex Mix',
    'Inh Npy', 'Inh Pvalb 1', 'Inh Pvalb 2', 'Inh Sst', 'Inh HY 1', 'Inh HY 2', 'Inh HY 3',
    'Inh HY 4', 'Inh HY 5', 'Inh LH/HY 1', 'Inh LH/HY 2', 'Inh LH/HY 3', 'Inh STR 1', 'Inh STR 2', 'Inh STR 3', 'Inh STR 4', 'Inh STR 5',
    'Inh Amygdala',
    'Astrocyte 1', 'Astrocyte 2', 'Astrocyte 3',
    'Oligodendrocyte 1', 'Oligodendrocyte 2', 'Oligodendrocyte 3', 'Oligodendrocyte 4', 'Oligodendrocytes precursor cell',
    'Microglia', 
    'Vascular leptomeningeal cell 1', 'Vascular leptomeningeal cell 2', 'Vascular smooth muscle cell',
    'Pericytes/Vascular endothelial cell 1', 'Pericytes/Vascular endothelial cell 2', 'Pericytes/Vascular endothelial cell 3',
    'Chorid plexus epithelial cells', 'Ependymal cells',
    'Unknown',
]

In [None]:
# Construct label dict based on 2022-03-29 discussion
level_3_dict = {

    'Astrocyte 1': 'Astro 1', 
    'Astrocyte 2': 'Astro 2',
    'Astrocyte 3': 'Astro 3',
    'Oligodendrocyte 1': 'Oligo 1', 
    'Oligodendrocyte 2': 'Oligo 2',
    'Oligodendrocyte 3': 'Oligo 3', 
    'Oligodendrocyte 4': 'Oligo 4',
    'Oligodendrocytes precursor cell': 'OPC', 
    'Microglia': 'Micro',
    'Vascular leptomeningeal cell 1': 'VLMC 1', 
    'Vascular leptomeningeal cell 2': 'VLMC 2',
    'Vascular smooth muscle cell': 'VSMC',
    'Pericytes/Vascular endothelial cell 1': 'Peri/VEC 1',
    'Pericytes/Vascular endothelial cell 2': 'Peri/VEC 2',
    'Pericytes/Vascular endothelial cell 3': 'Peri/VEC 3',
    'Chorid plexus epithelial cells': 'CHOR', 
    'Ependymal cells': 'EPEN'

}

In [None]:
# set order 
level_3_order = [
    'Ex L2', 'Ex L2/3', 'Ex L4', 'Ex L4/5', 'Ex L5', 'Ex L6', 'Ex L6 EP 1', 'Ex L6 EP 2',
    'Ex CTX', 'Ex RSP', 'Ex PIR', 'Ex CA1', 'Ex CA2', 'Ex CA3', 'Ex DG', 'Ex NLOT', 'Ex MH',
    'Ex TH 1', 'Ex TH 2', 'Ex TH 3', 'Ex Mix',
    'Inh Npy', 'Inh Pvalb 1', 'Inh Pvalb 2', 'Inh Sst', 'Inh HY 1', 'Inh HY 2', 'Inh HY 3',
    'Inh HY 4', 'Inh HY 5', 'Inh LH/HY 1', 'Inh LH/HY 2', 'Inh LH/HY 3', 'Inh STR 1', 'Inh STR 2', 'Inh STR 3', 'Inh STR 4', 'Inh STR 5',
    'Inh Amygdala',
    'Astro 1', 'Astro 2', 'Astro 3',
    'Oligo 1', 'Oligo 2', 'Oligo 3', 'Oligo 4', 'OPC',
    'Micro', 
    'VLMC 1', 'VLMC 2', 'VSMC',
    'Peri/VEC 1', 'Peri/VEC 2', 'Peri/VEC 3',
    'CHOR', 'EPEN',
    'Unknown',
]

In [None]:
# change label
adata.obs['level_3'] = adata.obs['level_3'].astype(object)
adata.obs['level_3'] = adata.obs['level_3'].replace(level_3_dict)
adata.obs['level_3'] = adata.obs['level_3'].astype('category')
adata.obs['level_3'] = adata.obs['level_3'].cat.reorder_categories(level_3_order)

In [None]:
adata

In [None]:
# backup 
from datetime import datetime
date = datetime.today().strftime('%Y-%m-%d')
adata.write_h5ad(f"{out_path}/{date}-Hu-TissueRIBOmap-level3.h5ad")

### embedding

In [None]:
level_3_colors_dict = {

    'Ex L2': '#c4ff45', 
    'Ex L2/3': '#9ee800', 
    'Ex L4': '#05ba02', 
    'Ex L4/5': '#32a630', 
    'Ex L5': '#256b00', 
    'Ex L6': '#316e10', 
    'Ex L6 EP 1': '#406e27', 
    'Ex L6 EP 2': '#496e36',
    'Ex CTX': '#6fa66f', 
    'Ex Mix': '#c5fcc5',
    'Ex RSP': '#00e67a', 
    'Ex CA1': '#77ed8f', 
    'Ex CA2': '#28330b', 
    'Ex CA3': '#ffd438', 
    'Ex DG': '#295029', 
    'Ex NLOT': '#317531', 
    'Ex PIR': '#82ad2d', 
    'Ex MH': '#d93f3c',
    'Ex TH 1': '#fa8380', 
    'Ex TH 2': '#ed5e5b', 
    'Ex TH 3': '#a84240', 
    'Inh Npy': '#ffdcbd', 
    'Inh Sst': '#fead65', 
    'Inh Pvalb 1': '#ee750a', 
    'Inh Pvalb 2': '#b76319', 
    'Inh HY 1': '#f29ed8', 
    'Inh HY 2': '#ee90d1', 
    'Inh HY 3': '#e883c9',
    'Inh HY 4': '#e377c1', 
    'Inh HY 5': '#d863b4', 
    'Inh LH/HY 1': '#cc52a7', 
    'Inh LH/HY 2': '#be4298', 
    'Inh LH/HY 3': '#a34085', 
    'Inh STR 1': '#7aecf8', 
    'Inh STR 2': '#3cdeef', 
    'Inh STR 3': '#17bdce', 
    'Inh STR 4': '#228b96', 
    'Inh STR 5': '#266065',
    'Inh Amygdala': '#7c4d24',
    'Astro 1': '#eaeaa2', 
    'Astro 2': '#bcbc5e', 
    'Astro 3': '#a6a64d',
    'Oligo 1': '#a8e1eb',
    'Oligo 2': '#9ed9e4', 
    'Oligo 3': '#7dc7d5', 
    'Oligo 4': '#61b2c1', 
    'OPC': '#667872',
    'Micro': '#8597c6', 
    'VLMC 1': '#1f76b3', 
    'VLMC 2': '#246693', 
    'VSMC': '#774d44',
    'Peri/VEC 1': '#d3a59c', 
    'Peri/VEC 2': '#c49c94', 
    'Peri/VEC 3': '#b2847b',
    'CHOR': '#7f52a9', 
    'EPEN': '#c4b0d4',
    'Unknown': '#cccccc',
    
}

In [None]:
adata

In [None]:
# Get colormap
# level_3_colors = sns.color_palette("hls", adata.obs['level_3'].nunique() - 1)
# level_3_colors.append((0.8, 0.8, 0.8))
level_3_colors = adata.uns['level_3_color_list']
# level_3_colors = list(level_3_colors_dict.values())
level_3_pl = sns.color_palette(level_3_colors)
level_3_cmap = ListedColormap(level_3_pl.as_hex())

# level_3_order = list(level_3_colors_dict.keys())
level_3_order = adata.uns['level_3_order']
sns.palplot(level_3_pl)
plt.xticks(range(len(level_3_order)), level_3_order, size=10, rotation=45)
plt.tight_layout()
plt.show()

In [None]:
adata.obs['level_3'] = adata.obs['level_3'].cat.reorder_categories(level_3_order)

In [None]:
# set order
level_3_order = [
    'Ex L2/3 1', 'Ex L2/3 2', 'Ex L2/3 3', 'Ex L4', 'Ex L4/5 1', 'Ex L6', 'Ex L6a 1',
    'Ex CTXsp', 'Ex PIR', 'Ex CA1', 'Ex CA2', 'Ex CA3', 'Ex DG', 'Ex MEA', 'Ex MH',
    'Ex TH 1', 'Ex TH 2', 'Ex TH 3', 'Ex Mix 1', 'Ex Mix 2', 'Ex Mix 3', 'Ex Mix 4', 'Ex Mix 5',
    'Inh Npy', 'Inh Pvalb 1', 'Inh Pvalb 2', 'Inh Sst', 'Inh HY 1', 'Inh HY 2', 'Inh HY 3',
    'Inh HY 4', 'Inh HY 5', 'Inh LH/HY 1', 'Inh LH/HY 2', 'Inh LH/HY 3', 'Inh STR 1', 'Inh STR 2', 'Inh STR 3', 'Inh STR 4', 'Inh STR 5',
    'Inh Mix 1', 'Inh Mix 2',
    'Astrocyte 1', 'Astrocyte 2', 'Astrocyte 3',
    'Oligodendrocyte 1', 'Oligodendrocyte 2', 'Oligodendrocyte 3', 'Oligodendrocyte 4', 'Oligodendrocytes precursor cell',
    'Microglia', 
    'Vascular leptomeningeal cell 1', 'Vascular leptomeningeal cell 2', 'Vascular leptomeningeal cell 3', 'Vascular leptomeningeal cell 4',
    'Pericytes/Vascular endothelial cell 1', 'Pericytes/Vascular endothelial cell 2', 'Chorid plexus epithelial cells', 'Ependymal cells',
    'Unknown',
]

adata.obs['level_3'] = adata.obs['level_3'].cat.reorder_categories(level_3_order)

In [None]:
# Save plots
# Plot UMAP with cluster labels w/ new color
sc.pl.umap(adata, color='level_3', legend_loc='on data',
           legend_fontsize=12, legend_fontoutline=2, frameon=False, 
           title='Level 3 clustering', palette=level_3_pl, save=False)

fig, ax = plt.subplots(figsize=(10, 7))
sc.pl.umap(adata, color='level_3', frameon=False, legend_loc='right margin', ax=ax,
           palette=level_3_pl, save=False)

In [None]:
fig, ax = plt.subplots(figsize=(10, 7))
sc.pl.umap(adata, color='level_3', frameon=False, legend_loc='right margin', ax=ax, title='', s=3,
           palette=level_3_pl, save=False)

In [None]:
fig, ax = plt.subplots(figsize=(10, 7))
sc.pl.umap(adata, color='level_3', frameon=False, legend_loc=False, ax=ax, title='', size=10, alpha=.5,
           palette=level_3_pl, save='_no_legend_larger')

In [None]:
fig, ax = plt.subplots(figsize=(10, 7))
sc.pl.umap(adata, color='level_3', frameon=False, legend_loc=False, ax=ax, title='',
           palette=level_3_pl, save='_no_legend')

sc.set_figure_params(format='pdf', dpi=150)
fig, ax = plt.subplots(figsize=(10, 7))
sc.pl.umap(adata, color='level_3', frameon=False, legend_loc='right margin', ax=ax, title='',
           palette=level_3_pl, save='_legend')

In [None]:
adata.obs['level_3_code'] = adata.obs['level_3'].cat.codes.astype('category')

sc.set_figure_params(format='pdf', dpi=150)
fig, ax = plt.subplots(figsize=(10, 7))
sc.pl.umap(adata, color='level_3_code', frameon=False, legend_loc='on data', ax=ax, title='', legend_fontsize=4, legend_fontoutline=1,
           palette=level_3_pl, save='_code')

In [None]:
adata.uns['level_3_color_list'] = level_3_colors
adata.uns['level_3_order'] = level_3_order

### gene markers

In [None]:
# Get markers for each cluster
sc.tl.rank_genes_groups(adata, 'level_3', method='wilcoxon', pts=True)

# Filter markers
sc.tl.filter_rank_genes_groups(adata, min_fold_change=1)

In [None]:
current_group = 'Ex L2'

current_df = sc.get.rank_genes_groups_df(adata, group=current_group, key='rank_genes_groups_filtered')
current_df.head(10)

In [None]:
# Print markers 
markers = []
temp = pd.DataFrame(adata.uns['rank_genes_groups']['names']).head(15)
for i in range(temp.shape[1]):
    curr_col = temp.iloc[:, i].to_list()
    markers.append(' '.join(curr_col))
    
filtered_markers = []
temp = pd.DataFrame(adata.uns['rank_genes_groups_filtered']['names']).head(15)
for i in range(temp.shape[1]):
    curr_col = temp.iloc[:, i].to_list()
    # print(curr_col)
    curr_col = [d for d in curr_col if type(d) == str]
    filtered_markers.append(' '.join(curr_col))
    
markers_df = pd.DataFrame({'level_3': level_3_order, 'markers': markers, 'filtered_markers':filtered_markers})
date = datetime.today().strftime('%Y-%m-%d')
markers_df.to_csv(f"{out_path}/{date}-level-3-gene-markers.csv")
markers_df


In [None]:
# Dot plot mean expression (##)
sc.pl.rank_genes_groups_dotplot(adata, key='rank_genes_groups_filtered', n_genes=10, dendrogram=False, save=True)

In [None]:
inh_group = [    'Inh Npy', 'Inh Pvalb 1', 'Inh Pvalb 2', 'Inh Sst', 'Inh HY 1', 'Inh HY 2', 'Inh HY 3',
    'Inh HY 4', 'Inh HY 5', 'Inh LH/HY 1', 'Inh LH/HY 2', 'Inh LH/HY 3', 'Inh STR 1', 'Inh STR 2', 'Inh STR 3', 'Inh STR 4', 'Inh STR 5',
    'Inh Amygdala',]
sc.pl.rank_genes_groups_dotplot(adata, groups=inh_group, key='rank_genes_groups_filtered', n_genes=5, dendrogram=False, save='Inh')

In [None]:
# Other type of plots
# Plot z-score heatmap
sc.pl.rank_genes_groups_heatmap(adata, n_genes=10, groupby='level_3', min_logfoldchange=1, use_raw=False, swap_axes=True, 
                                vmin=-5, vmax=5, cmap='bwr', show_gene_labels=True,
                                dendrogram=False, figsize=(30, 20), save=False)

sc.pl.rank_genes_groups_heatmap(adata, n_genes=5, groupby='level_3', min_logfoldchange=.1, use_raw=False, swap_axes=True, 
                                vmin=-5, vmax=5, cmap='bwr', show_gene_labels=False, key='rank_genes_groups_filtered', 
                                dendrogram=False, figsize=(30, 20), save='_level_3')

### spatial map

In [None]:
sns.set(rc={'axes.facecolor':'#fefff5', 'figure.facecolor':'#fefff5'})

In [None]:
sc.set_figure_params(format='tif', dpi=150)
fig, ax = plt.subplots(figsize=(18, 23))
sns.scatterplot(x='column', y='row', data=adata.obs, color='#1111', s=2, legend=False, ax=ax)
sns.scatterplot(x='column', y='row', hue='level_3', data=adata.obs, palette=level_3_pl, s=12, edgecolor=None, legend=False, ax=ax)

ax.axis('off')
plt.tight_layout(pad=0)
plt.savefig(os.path.join(fig_path, 'spatial-map-level-3.tif')) # fdf7ec
plt.show()

In [None]:
for current_group in adata.obs['level_3'].cat.categories:
    
    # Plot single cluster
    print(current_group)
    fig, ax = plt.subplots(figsize=(10, 10))
    sns.scatterplot(x='column', y='row', data=adata.obs, color='#1111', s=2, legend=False, ax=ax)
    sns.scatterplot(x='column', y='row', hue='level_3', data=adata.obs.loc[adata.obs['level_3'] == current_group, :], palette=level_3_pl, s=4, legend=False, ax=ax)
    ax.title.set_text(current_group)
    ax.axis('off')
    plt.show()

### sankey diagram

## Output backup

In [None]:
adata

In [None]:
# backup 
from datetime import datetime
date = datetime.today().strftime('%Y-%m-%d')
adata.write_h5ad(f"{out_path}/{date}-Hu-TissueRIBOmap-level3.h5ad")

## Sub level clustering

### Glia

In [None]:
# Embedding parameters
emb_dict = {
    'Neuron': {'n_neighbors': 50, 'n_pcs': 10, 'min_dist': .1, 'cluster_resolution': 2},
    'Glia': {'n_neighbors': 50, 'n_pcs': 15, 'min_dist': .1, 'cluster_resolution': 1.2},
}

save_embedding = True

In [None]:
# Subset
sub_id = 'Glia'
curr_cells = adata.obs['level_1'] == sub_id
sdata = adata[curr_cells, :]
sdata

In [None]:
sub_level_fig_path = os.path.join(fig_path, sub_id)
if not os.path.exists(sub_level_fig_path):
    os.mkdir(sub_level_fig_path)

#### clustering

In [None]:
# Run PCA
sc.tl.pca(sdata, svd_solver='arpack', use_highly_variable=True)

# Plot explained variance 
sc.pl.pca_variance_ratio(sdata, log=False)

# Plot PCA
sc.pl.pca(sdata, color='protocol')

In [None]:
%%time
# Computing the neighborhood graph
n_neighbors = emb_dict[sub_id]['n_neighbors']
n_pcs = emb_dict[sub_id]['n_pcs']
min_dist = emb_dict[sub_id]['min_dist']

sc.pp.neighbors(sdata, n_neighbors=n_neighbors, n_pcs=n_pcs, random_state=0)

# Run UMAP
sc.tl.umap(sdata, min_dist=min_dist, random_state=0)
sc.tl.diffmap(sdata, n_comps=n_pcs, random_state=0)

In [None]:
%%time
# Run leiden cluster
cluster_resolution = emb_dict[sub_id]['cluster_resolution']
sc.tl.leiden(sdata, resolution = cluster_resolution)

# Plot UMAP with cluster labels 
sc.pl.umap(sdata, color='leiden')
sc.pl.diffmap(sdata, color='leiden')
n_clusters = sdata.obs['leiden'].unique().shape[0]

if save_embedding:
    # Save log
    with open(f'{fig_path}/log_{sub_id}.txt', 'w') as f:
        f.write(f"""Number of neighbor: {n_neighbors}
    Number of PC: {n_pcs}
    Resolution: {cluster_resolution}
    Min-distance: {min_dist}
    Number of clusters: {n_clusters}""")

    # save embeddings
    np.savetxt(f'{fig_path}/embedding_{sub_id}_umap.csv', sdata.obsm['X_umap'], delimiter=",")
    # np.savetxt(f'{fig_path}/embedding_{sub_id}_diffmap.csv', sdata.obsm['X_diffmap'], delimiter=",")

In [None]:
current_order = []
current_cpl = []
for i, current_type in enumerate(sdata.uns['level_3_order']):
    if current_type in sdata.obs['level_3'].values:
        current_order.append(current_type)
        current_cpl.append(sdata.uns['level_3_color_list'][i])
        
sns.palplot(current_cpl)
plt.xticks(range(len(current_order)), current_order, size=10, rotation=45)
plt.tight_layout()
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(10, 7))
sc.pl.umap(sdata, color='level_3', frameon=False, legend_loc=False, ax=ax, title='',
           save=False)

In [None]:
sc.set_figure_params(format='pdf', dpi=150)

fig, ax = plt.subplots(figsize=(10, 7))
sc.pl.umap(sdata, color='level_3', frameon=False, legend_loc=False, ax=ax, title='',
           save='_Glia_no_legend')

fig, ax = plt.subplots(figsize=(10, 7))
sc.pl.umap(sdata, color='level_3', frameon=False, legend_loc='right margin', ax=ax, title='',
           save='_Glia_legend')

fig, ax = plt.subplots(figsize=(10, 7))
sc.pl.umap(sdata, color='level_3', frameon=False, legend_loc='on data', ax=ax, title='', 
           legend_fontsize=8, legend_fontoutline=2,
           save='_Glia_on_data')

In [None]:
# Get markers for each cluster
sc.tl.rank_genes_groups(sdata, 'level_3', method='wilcoxon', pts=True)

# Filter markers
sc.tl.filter_rank_genes_groups(sdata, min_fold_change=.1)

In [None]:
sc.pl.rank_genes_groups_heatmap(sdata, n_genes=5, groupby='level_3', min_logfoldchange=.1, use_raw=False, swap_axes=True, 
                                vmin=-5, vmax=5, cmap='bwr', show_gene_labels=True, key='rank_genes_groups_filtered', 
                                dendrogram=False, figsize=(30, 20), save='_Glia')

In [None]:
marker_df = sc.get.rank_genes_groups_df(sdata, group=current_order, key='rank_genes_groups_filtered', log2fc_min=.1)
marker_df.to_csv(f'{sub_level_fig_path}/Glia_markers.csv')

#### spatial map

In [None]:
clustermap_center_path = os.path.join(base_path, 'RIBOmap', 'cell_center_polished.csv')
cell_center_df = pd.read_csv(clustermap_center_path, index_col=0)

In [None]:
save_as = True
sc.set_figure_params(format='tif', dpi=150)

dot_size = 12
fig, ax = plt.subplots(figsize=(18, 23))
sns.scatterplot(x='column', y='row', data=cell_center_df, color='#ededed', s=dot_size, legend=False, edgecolor=None, ax=ax)
sns.scatterplot(x='column', y='row', hue='level_3', data=sdata.obs, palette=current_cpl, s=dot_size, edgecolor=None, legend=False, ax=ax)
ax.axis('off')
plt.tight_layout(pad=0)

if save_as:
    plt.savefig(os.path.join(fig_path, 'spatial-map-Glia.tif'))
plt.show()

In [None]:
save_as = True
sc.set_figure_params(format='tif', dpi=150)
dot_size = 20

for current_type in tqdm(current_order):
    
    fig, ax = plt.subplots(figsize=(18, 23))
    sns.scatterplot(x='column', y='row', data=cell_center_df, color='#ededed', s=dot_size, legend=False, edgecolor=None, ax=ax)
    sns.scatterplot(x='column', y='row', hue='level_3', data=sdata.obs.loc[sdata.obs['level_3'] == current_type, :], palette=current_cpl, s=dot_size, edgecolor=None, legend=False, ax=ax)
    ax.axis('off')
    plt.tight_layout(pad=0)

    if save_as:
        current_file_name = current_type.replace('/', '_')
        plt.savefig(os.path.join(sub_level_fig_path, f'spatial-map-{current_file_name}.tif'))
    plt.close()
    # plt.show()
    

### Neuron

In [None]:
# Embedding parameters
emb_dict = {
    'Excitatory neuron': {'n_neighbors': 50, 'n_pcs': 15, 'min_dist': .1, 'cluster_resolution': 2},
    'Inhibitory neuron': {'n_neighbors': 50, 'n_pcs': 10, 'min_dist': .1, 'cluster_resolution': 1.5},
}

save_embedding = True

In [None]:
# Subset
sub_id = 'Excitatory neuron'
curr_cells = adata.obs['level_2'] == sub_id
sdata = adata[curr_cells, :]
sdata

In [None]:
sub_level_fig_path = os.path.join(fig_path, sub_id)
if not os.path.exists(sub_level_fig_path):
    os.mkdir(sub_level_fig_path)

#### clustering

In [None]:
# Run PCA
sc.tl.pca(sdata, svd_solver='arpack', use_highly_variable=True)

# Plot explained variance 
sc.pl.pca_variance_ratio(sdata, log=False)

# Plot PCA
sc.pl.pca(sdata, color='protocol')

In [None]:
%%time
# Computing the neighborhood graph
n_neighbors = emb_dict[sub_id]['n_neighbors']
n_pcs = emb_dict[sub_id]['n_pcs']
min_dist = emb_dict[sub_id]['min_dist']

sc.pp.neighbors(sdata, n_neighbors=n_neighbors, n_pcs=n_pcs, random_state=0)

# Run UMAP
sc.tl.umap(sdata, min_dist=min_dist, random_state=0)
sc.tl.diffmap(sdata, n_comps=n_pcs, random_state=0)

In [None]:
%%time
# Run leiden cluster
cluster_resolution = emb_dict[sub_id]['cluster_resolution']
sc.tl.leiden(sdata, resolution = cluster_resolution)

# Plot UMAP with cluster labels 
sc.pl.umap(sdata, color='leiden')
sc.pl.diffmap(sdata, color='leiden')
n_clusters = sdata.obs['leiden'].unique().shape[0]

if save_embedding:
    # Save log
    with open(f'{fig_path}/log_{sub_id}.txt', 'w') as f:
        f.write(f"""Number of neighbor: {n_neighbors}
    Number of PC: {n_pcs}
    Resolution: {cluster_resolution}
    Min-distance: {min_dist}
    Number of clusters: {n_clusters}""")

    # save embeddings
    np.savetxt(f'{fig_path}/embedding_{sub_id}_umap.csv', sdata.obsm['X_umap'], delimiter=",")
    # np.savetxt(f'{fig_path}/embedding_{sub_id}_diffmap.csv', sdata.obsm['X_diffmap'], delimiter=",")

In [None]:
current_order = []
current_cpl = []
for i, current_type in enumerate(sdata.uns['level_3_order']):
    if current_type in sdata.obs['level_3'].values:
        current_order.append(current_type)
        current_cpl.append(sdata.uns['level_3_color_list'][i])
        
sns.palplot(current_cpl)
plt.xticks(range(len(current_order)), current_order, size=10, rotation=45)
plt.tight_layout()
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(10, 7))
sc.pl.umap(sdata, color='level_3', frameon=False, legend_loc=False, ax=ax, title='',
           save=False)

In [None]:
sc.set_figure_params(format='pdf', dpi=150)

fig, ax = plt.subplots(figsize=(10, 7))
sc.pl.umap(sdata, color='level_3', frameon=False, legend_loc=False, ax=ax, title='',
           save=f'_{sub_id}_no_legend')

fig, ax = plt.subplots(figsize=(10, 7))
sc.pl.umap(sdata, color='level_3', frameon=False, legend_loc='right margin', ax=ax, title='',
           save=f'_{sub_id}_legend')

fig, ax = plt.subplots(figsize=(10, 7))
sc.pl.umap(sdata, color='level_3', frameon=False, legend_loc='on data', ax=ax, title='', 
           legend_fontsize=8, legend_fontoutline=2,
           save=f'_{sub_id}_on_data')

In [None]:
# Get markers for each cluster
sc.tl.rank_genes_groups(sdata, 'level_3', method='wilcoxon', pts=True)

# Filter markers
sc.tl.filter_rank_genes_groups(sdata, min_fold_change=.1)

In [None]:
sc.set_figure_params(format='pdf', dpi=150)
sc.pl.rank_genes_groups_heatmap(sdata, n_genes=5, groupby='level_3', min_logfoldchange=.1, use_raw=False, swap_axes=True, 
                                vmin=-5, vmax=5, cmap='bwr', show_gene_labels=True, key='rank_genes_groups_filtered', 
                                dendrogram=False, figsize=(30, 20), save=f'_{sub_id}')

#### spatial map

In [None]:
clustermap_center_path = os.path.join(base_path, 'RIBOmap', 'cell_center_polished.csv')
cell_center_df = pd.read_csv(clustermap_center_path, index_col=0)

In [None]:
save_as = True
sc.set_figure_params(format='tif', dpi=150)

dot_size = 12
fig, ax = plt.subplots(figsize=(18, 23))
sns.scatterplot(x='column', y='row', data=cell_center_df, color='#ededed', s=dot_size, legend=False, edgecolor=None, ax=ax)
sns.scatterplot(x='column', y='row', hue='level_3', data=sdata.obs, palette=current_cpl, s=dot_size, edgecolor=None, legend=False, ax=ax)
ax.axis('off')
plt.tight_layout(pad=0)

if save_as:
    plt.savefig(os.path.join(fig_path, f'spatial-map-{sub_id}.tif'))
plt.show()

In [None]:
save_as = True
sc.set_figure_params(format='tif', dpi=150)
dot_size = 20

for current_type in tqdm(current_order):
    
    fig, ax = plt.subplots(figsize=(18, 23))
    sns.scatterplot(x='column', y='row', data=cell_center_df, color='#ededed', s=dot_size, legend=False, edgecolor=None, ax=ax)
    sns.scatterplot(x='column', y='row', hue='level_3', data=sdata.obs.loc[sdata.obs['level_3'] == current_type, :], palette=current_cpl, s=dot_size, edgecolor=None, legend=False, ax=ax)
    ax.axis('off')
    plt.tight_layout(pad=0)

    if save_as:
        current_file_name = current_type.replace('/', '_')
        plt.savefig(os.path.join(sub_level_fig_path, f'spatial-map-{current_file_name}.tif'))
    plt.close()
    # plt.show()
    