In [1]:
import scanpy as sc
import anndata as ad
import numpy as np 
import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
import os

In [2]:
sc._settings.settings._vector_friendly=True

In [None]:
# load in the adata after CellANOVA
adata_prep = sc.read_h5ad("04B_post_CellANOVA.h5ad")

In [None]:
adata_prep.obs

In [None]:
%%time
integrated = ad.AnnData(adata_prep.layers['denoised'], dtype=np.float32)
integrated.obs = adata_prep.obs.copy()
integrated.var_names = adata_prep.var_names
sc.pp.neighbors(integrated, n_neighbors=15, n_pcs=30)
sc.tl.umap(integrated)

### Examine the UMAP embedding with respect to several batch effects and biological effects:

Batch effects
- cell vs. nuclei (very large)
- study (also quite substantial)

Biological
- cell type

In [None]:
human_heart_sn_vs_sc_plots = "CellANOVA_plots/"
os.makedirs(human_heart_sn_vs_sc_plots, exist_ok=True)

In [None]:
with plt.rc_context():
    sc.pl.umap(integrated, color = "cell_or_nuclei", show=False)
    plt.savefig(human_heart_sn_vs_sc_plots + "UMAP_cell_vs_nuclei.pdf")
    plt.show()

In [None]:
with plt.rc_context():
    sc.pl.umap(integrated, color = "consistent_cell_type", legend_loc = 'on data', show=False)
    plt.savefig(human_heart_sn_vs_sc_plots + "UMAP_cell_type.pdf")
    plt.show()

In [None]:
with plt.rc_context():
    sc.pl.umap(integrated, color = "study", show=False)
    plt.savefig(human_heart_sn_vs_sc_plots + "UMAP_study.pdf")
    plt.show()

In [None]:
with plt.rc_context():
    sc.pl.umap(integrated, color = "technology", show=False)
    plt.savefig(human_heart_sn_vs_sc_plots + "UMAP_technology.pdf")
    plt.show()