In [None]:
import os
import json

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import to_rgba
import seaborn as sns
import scipy.stats
from tqdm import tqdm

import anndata
import scanpy as sc

from scmg.preprocessing.data_standardization import GeneNameMapper

gene_name_mapper = GeneNameMapper()

In [None]:
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

plt.rcParams["figure.autolayout"] = False
matplotlib.rc('pdf', fonttype=42)
plt.rcParams['font.family'] = 'FreeSans'
sc.set_figure_params(vector_friendly=True, dpi_save=300)
plt.rcParams['axes.grid'] = False

In [None]:
plot_output_path = 'hesc_pseudobulk_readout_gene_module_umaps'
os.makedirs(plot_output_path, exist_ok=True)

In [None]:
dg_cluster_df = pd.read_csv('clustering/downstream_gene_clusters_hESC.csv', index_col=0)

In [None]:
adata_ct_ref = sc.read_h5ad('../../manifold_generator/ref_cell_adata_measured_count.h5ad')
sc.pp.normalize_total(adata_ct_ref, target_sum=1e4)
sc.pp.log1p(adata_ct_ref)
#sc.pp.scale(adata_ct_ref, max_value=10)

adata_ct_ref.obs['total_exp'] = adata_ct_ref.X.sum(axis=1)
adata_ct_ref

In [None]:
named_adata = adata_ct_ref.copy()
named_adata.var.index = adata_ct_ref.var['human_gene_name']

In [None]:
sc.pp.scale(named_adata, max_value=10)

In [None]:
readout_gc_cols = []

for ds_gene_module in np.unique(dg_cluster_df['leiden']):
    selected_genes = dg_cluster_df[dg_cluster_df['leiden'] == ds_gene_module]['gene_name'].values

    readout_gc = f'readout gene cluster {ds_gene_module}'
    readout_gc_cols.append(readout_gc)
    named_adata.obs[readout_gc] = named_adata[:, named_adata.var.index.isin(selected_genes)].X.mean(axis=1)

    fig, ax = plt.subplots(figsize=(4, 4), dpi=300)
    sc.pl.umap(named_adata, color=readout_gc, cmap='seismic', vmin=-3, vmax=3, 
               title=readout_gc, ax=ax, show=False)
    fig.savefig(f'{plot_output_path}/hESC_readout_g_module_{ds_gene_module}_global_umap.pdf')
    plt.close(fig)

In [None]:
fig = sc.pl.umap(named_adata, color=readout_gc_cols, cmap='seismic', vmin=-3, vmax=3, ncols=5, return_fig=True)
fig.savefig(f'{plot_output_path}/hESC_readout_g_modules_global_umap.pdf')

In [None]:
pg_cluster_df = pd.read_csv('clustering/perturbed_gene_clusters_hESC.csv', index_col=0)
pg_cluster_df

In [None]:
os.makedirs('hesc_pseudobulk_perturbed_gene_module_umaps', exist_ok=True)

perturbed_gc_cols = []

for ds_gene_module in np.unique(pg_cluster_df['leiden']):
    selected_genes = pg_cluster_df[pg_cluster_df['leiden'] == ds_gene_module]['perturbed_gene_name'].values

    perturbed_gc = f'perturbed gene cluster {ds_gene_module}'
    perturbed_gc_cols.append(perturbed_gc)
    named_adata.obs[perturbed_gc] = named_adata[:, named_adata.var.index.isin(selected_genes)].X.mean(axis=1)

    fig, ax = plt.subplots(figsize=(4, 4), dpi=300)
    sc.pl.umap(named_adata, color=perturbed_gc, cmap='seismic', vmin=-3, vmax=3, 
               title=perturbed_gc, ax=ax, show=False)
    fig.savefig(f'hesc_pseudobulk_perturbed_gene_module_umaps/hESC_perturbed_g_module_{ds_gene_module}_global_umap.pdf')
    plt.close(fig)

In [None]:
fig = sc.pl.umap(named_adata, color=perturbed_gc_cols, cmap='seismic', vmin=-3, vmax=3, ncols=5, return_fig=True)
fig.savefig(f'hesc_pseudobulk_perturbed_gene_module_umaps/hESC_perturbed_g_modules_global_umap.pdf')