In [1]:
# Integration of organoids within subsets
import anndata, numpy as np, pandas as pd, imp, lpy, scanpy as sc
sc.logging.print_versions()

scanpy==1.5.1 anndata==0.7.4 umap==0.4.6 numpy==1.19.1 scipy==1.5.2 pandas==1.0.5 scikit-learn==0.23.2 statsmodels==0.11.1 python-igraph==0.8.2 louvain==0.6.1 leidenalg==0.8.1


In [2]:
# Set some constants
adata = anndata.read_h5ad("N1-integrated_organoids.h5ad")

In [3]:
#make subset
filter = ([ (x in ["U1", "U2", "B44"]) for x in adata.obs["demultiplexed"] ])
filter = filter & (adata.obs["Lane"] == 12)
filter = filter & (adata.obs["filtered_cells"] == False)
filter = filter & (adata.obs["log2p1_count"] > 13)
subsample_filter = lpy.subsample(filter, 600, adata.obs["sample_names"])

In [4]:
# Use genotypes for batch correction, no subsampling
das = lpy.scvi_prepare(adata, "demultiplexed", filter, doinspect=True)

[35;46;1mPrepare Data for Scvi/TotalVi[0m[34m
def scvi_prepare(anndatapath, field, cellfilter = None, nbgenes = 5000, genes_to_filter= None, use_ccfilter_prefix=None, citeseqkey = "protein_expression", use_raw_slot_instead =None, min_cell_threshold= 0, doinspect=False):
    if doinspect is True: print("\033[35;46;1mPrepare Data for Scvi/TotalVi\033[0m\033[34m"); print(inspect.getsource(scvi_prepare));print("\033[31;43;1mExecution:\033[0m")
    if use_ccfilter_prefix is not None :
        genes_to_filter = [use_ccfilter_prefix + x for x in ["HMGB2","CDK1","NUSAP1","UBE2C","BIRC5","TPX2","TOP2A","NDC80","CKS2","NUF2","CKS1B","MKI67","TMPO","CENPF","TACC3","FAM64A","SMC4","CCNB2","CKAP2L","CKAP2","AURKB","BUB1","KIF11","ANP32E","TUBB4B","GTSE1","KIF20B","HJURP","CDCA3","HN1","CDC20","TTK","CDC25C","KIF2C","RANGAP1","NCAPD2","DLGAP5","CDCA2","CDCA8","ECT2","KIF23","HMMR","AURKA","PSRC1","ANLN","LBR","CKAP5","CENPE","CTCF","NEK2","G2E3","GAS2L3","CBX5","CENPA","MCM5","PCNA","TYMS","FEN1"

In [5]:
# run scvi
latent = lpy.runSCVI(das["dataset"], doinspect=True)

[35;46;1mRun scvi[0m[34m
def runSCVI(dataset, nbstep = 500, n_latent = 64, doinspect= False):
    if doinspect is True: print("\033[35;46;1mRun scvi\033[0m\033[34m"); print(inspect.getsource(runSCVI));print("\033[31;43;1mExecution:\033[0m")
    vae = VAE(dataset.nb_genes, n_batch= dataset.n_batches, n_labels= dataset.n_labels, n_latent = n_latent)
    trainer = UnsupervisedTrainer(vae, dataset, train_size=0.9, frequency=5, use_cuda=True)
    trainer.train(n_epochs=nbstep)
    full = trainer.create_posterior(trainer.model, dataset, indices=np.arange(len(dataset)))
    return(full.sequential().get_latent()[0])

[31;43;1mExecution:[0m
[2020-11-02 18:59:58,836] INFO - scvi.inference.inference | KL warmup for 400 epochs


HBox(children=(FloatProgress(value=0.0, description='training', max=500.0, style=ProgressStyle(description_wid…




In [6]:
# store latent variable back in th object, and produce UMAP coordinates and leiden clusters 
adata = lpy.insertLatent(adata,latent, "scvi_genotype_bulkorg", "X_umap_scvi_genotype_bulkorg", None, "leiden_scvi_genotype_bulkorg", cellnames = das["names"], doinspect=True)

[35;46;1mCompute Clusters and Reduces representations[0m[34m
def insertLatent(adata, latent , latent_key= "latent", umap_key= "X_umap", tsne_key = "X_tsne", leiden_key = "leiden", rename_cluster_key= None,cellfilter = None, cellnames =None, leiden_resolution=1.0,doinspect=False):
    if doinspect is True: print("\033[35;46;1mCompute Clusters and Reduces representations\033[0m\033[34m"); print(inspect.getsource(insertLatent));print("\033[31;43;1mExecution:\033[0m")


    if cellnames is None:
        #order of full must match
        assert latent.shape[0] == len(adata.obs_names),  "cell names need for be provided if size of latent mismatches adata"
        map = range(len(adata.obs_names))
        if latent_key is not None:
            adata.obsm[latent_key] = latent
    else:
        print("defining permutation")
        dalist = list(adata.obs_names)
        tmap = {}
        for i in range(len(adata.obs_names)):
            tmap.update( {adata.obs_names[i] : i})
# for i in range(

In [7]:
# No batch correction, no subsampling
das = lpy.scvi_prepare(adata, None, filter)
latent = lpy.runSCVI(das["dataset"])
adata = lpy.insertLatent(adata,latent, "scvi_nobatch_bulkorg", "X_umap_scvi_nobatch_bulkorg", None, "leiden_scvi_nobatch_bulkorg", cellnames = das["names"])

[2020-11-02 19:09:49,333] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-02 19:09:49,337] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-02 19:09:50,717] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-11-02 19:09:50,926] INFO - scvi.dataset.dataset | Downsampled from 17632 to 17632 cells
[2020-11-02 19:09:50,932] INFO - scvi.dataset.dataset | Merging datasets. Input objects are modified in place.
[2020-11-02 19:09:50,934] INFO - scvi.dataset.dataset | Gene names and cell measurement names are assumed to have a non-null intersection between datasets.
[2020-11-02 19:09:50,962] INFO - scvi.dataset.dataset | Keeping 27569 genes
[2020-11-02 19:09:52,537] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-11-02 19:09:52,750] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-02 19:09:52,753] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-02 19:09:53,

HBox(children=(FloatProgress(value=0.0, description='training', max=500.0, style=ProgressStyle(description_wid…


defining permutation
Inserting Latent coords
computing UMAP
Inserting Umap coords
Finding clusters
Inserting Cluster Id


In [8]:
# Use genotypes for batch correction, subsamppling
das = lpy.scvi_prepare(adata, "demultiplexed", subsample_filter)
latent = lpy.runSCVI(das["dataset"])
adata = lpy.insertLatent(adata,latent,"scvi_genotype_bulkorg_subsmpl", "X_umap_scvi_genotype_bulkorg_subsmpl", None, "leiden_scvi_genotype_bulkorg_subsmpl", cellnames = das["names"])

[2020-11-02 19:19:58,305] INFO - scvi.dataset.anndataset | Dense size under 1Gb, casting to dense format (np.ndarray).
[2020-11-02 19:19:58,463] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-02 19:19:58,465] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-02 19:19:58,786] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-11-02 19:19:58,836] INFO - scvi.dataset.dataset | Downsampled from 1071 to 1071 cells
[2020-11-02 19:19:58,842] INFO - scvi.dataset.anndataset | Dense size under 1Gb, casting to dense format (np.ndarray).
[2020-11-02 19:19:58,971] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-02 19:19:58,974] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-02 19:19:59,226] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-11-02 19:19:59,267] INFO - scvi.dataset.dataset | Downsampled from 867 to 867 cells
[2020-11-02 19:19:59,273] INFO - scvi.

HBox(children=(FloatProgress(value=0.0, description='training', max=500.0, style=ProgressStyle(description_wid…


defining permutation
Inserting Latent coords
computing UMAP
Inserting Umap coords
Finding clusters
Inserting Cluster Id


In [9]:
# No batch correction
das = lpy.scvi_prepare(adata, None, subsample_filter)
latent = lpy.runSCVI(das["dataset"])
adata = lpy.insertLatent(adata,latent, "scvi_nobatch_bulkorg_subsmpl", "X_umap_scvi_nobatch_bulkorg_subsmpl", None, "leiden_scvi_nobatch_bulkorg_subsmpl", cellnames = das["names"])

[2020-11-02 19:22:01,928] INFO - scvi.dataset.anndataset | Dense size under 1Gb, casting to dense format (np.ndarray).
[2020-11-02 19:22:02,312] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-02 19:22:02,315] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-02 19:22:03,204] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-11-02 19:22:03,342] INFO - scvi.dataset.dataset | Downsampled from 2905 to 2905 cells
[2020-11-02 19:22:03,353] INFO - scvi.dataset.dataset | Merging datasets. Input objects are modified in place.
[2020-11-02 19:22:03,355] INFO - scvi.dataset.dataset | Gene names and cell measurement names are assumed to have a non-null intersection between datasets.
[2020-11-02 19:22:03,382] INFO - scvi.dataset.dataset | Keeping 27569 genes
[2020-11-02 19:22:04,317] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-11-02 19:22:04,675] INFO - scvi.dataset.dataset | Remapping labels to

HBox(children=(FloatProgress(value=0.0, description='training', max=500.0, style=ProgressStyle(description_wid…


defining permutation
Inserting Latent coords
computing UMAP
Inserting Umap coords
Finding clusters
Inserting Cluster Id


In [10]:
das = lpy.scvi_prepare(adata, "demultiplexed", [ x in ["A", "D"] for x in adata.obs["demultiplexed"] ]&(adata.obs["filtered_cells"] == False))
latent = lpy.runSCVI(das["dataset"])
adata = lpy.insertLatent(adata,latent, "scvi_genotype_E001", "X_umap_scvi_genotype_E001", None, "leiden_scvi_scvi_genotype_E001", cellnames = das["names"])
adata = lpy.insertLatent(adata,latent, None, None, None, "leidenres2_scvi_scvi_genotype_E001", cellnames = das["names"], leiden_resolution=2)

[2020-11-10 20:30:37,693] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-10 20:30:37,695] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-10 20:30:39,073] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-11-10 20:30:39,278] INFO - scvi.dataset.dataset | Downsampled from 16474 to 16474 cells
[2020-11-10 20:30:39,796] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-10 20:30:39,799] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-10 20:30:41,363] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-11-10 20:30:41,598] INFO - scvi.dataset.dataset | Downsampled from 20847 to 20847 cells
[2020-11-10 20:30:41,604] INFO - scvi.dataset.dataset | Merging datasets. Input objects are modified in place.
[2020-11-10 20:30:41,605] INFO - scvi.dataset.dataset | Gene names and cell measurement names are assumed to have a non-null intersection between datasets.
[20

HBox(children=(FloatProgress(value=0.0, description='training', max=500.0, style=ProgressStyle(description_wid…


defining permutation
Inserting Latent coords
computing UMAP
Inserting Umap coords
Finding clusters
Inserting Cluster Id
defining permutation
Finding clusters
Inserting Cluster Id


In [11]:
das = lpy.scvi_prepare(adata, "demultiplexed", (adata.obs["demultiplexed"]== "B44")&(adata.obs["filtered_cells"] == False)&(adata.obs["Inhibitor"] == "Ctrl")&(adata.obs["IsClonal"] == True))
latent = lpy.runSCVI(das["dataset"])
adata = lpy.insertLatent(adata,latent, "scvi_genotype_noinhibit_B44clone", "X_umap_scvi_genotype_noinhibit_B44clone", None, "leiden_scvi_scvi_genotype_noinhibit_B44clone", cellnames = das["names"])
adata = lpy.insertLatent(adata,latent, None, None, None, "leidenres2_scvi_scvi_genotype_noinhibit_B44clone", cellnames = das["names"], leiden_resolution=2)

[2020-11-10 20:58:47,949] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-10 20:58:47,951] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-10 20:58:49,616] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-11-10 20:58:49,855] INFO - scvi.dataset.dataset | Downsampled from 17420 to 17420 cells
[2020-11-10 20:58:49,861] INFO - scvi.dataset.dataset | Merging datasets. Input objects are modified in place.
[2020-11-10 20:58:49,863] INFO - scvi.dataset.dataset | Gene names and cell measurement names are assumed to have a non-null intersection between datasets.
[2020-11-10 20:58:49,890] INFO - scvi.dataset.dataset | Keeping 27569 genes
[2020-11-10 20:58:51,731] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-11-10 20:58:51,975] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-10 20:58:51,978] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-10 20:58:52,

HBox(children=(FloatProgress(value=0.0, description='training', max=500.0, style=ProgressStyle(description_wid…


defining permutation
Inserting Latent coords
computing UMAP
Inserting Umap coords
Finding clusters
Inserting Cluster Id
defining permutation
Finding clusters
Inserting Cluster Id


In [12]:
das = lpy.scvi_prepare(adata, "demultiplexed", [ x in ["A", "D", "B44"] for x in adata.obs["demultiplexed"] ]&(adata.obs["filtered_cells"] == False)&(adata.obs["Inhibitor"] == "Ctrl")&(adata.obs["IsClonal"] == True))
latent = lpy.runSCVI(das["dataset"])
adata = lpy.insertLatent(adata,latent, "scvi_genotype_noinhibit_clones", "X_umap_scvi_genotype_noinhibit_clones", None, "leiden_scvi_scvi_genotype_noinhibit_clones", cellnames = das["names"])
adata = lpy.insertLatent(adata,latent, None, None,None, "leidenres2_scvi_scvi_genotype_noinhibit_clones", cellnames = das["names"], leiden_resolution=2)

[2020-11-10 21:11:36,399] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-10 21:11:36,401] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-10 21:11:37,773] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-11-10 21:11:37,971] INFO - scvi.dataset.dataset | Downsampled from 16474 to 16474 cells
[2020-11-10 21:11:38,489] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-10 21:11:38,492] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-10 21:11:40,048] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-11-10 21:11:40,274] INFO - scvi.dataset.dataset | Downsampled from 20847 to 20847 cells
[2020-11-10 21:11:40,814] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-10 21:11:40,816] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-10 21:11:42,481] INFO - scvi.dataset.dataset | Computing the library size for the new data

HBox(children=(FloatProgress(value=0.0, description='training', max=500.0, style=ProgressStyle(description_wid…


defining permutation
Inserting Latent coords
computing UMAP
Inserting Umap coords
Finding clusters
Inserting Cluster Id
defining permutation
Finding clusters
Inserting Cluster Id


In [13]:
das = lpy.scvi_prepare(adata, "demultiplexed", [ x in ["A", "D"] for x in adata.obs["demultiplexed"] ]&(adata.obs["filtered_cells"] == False)&((adata.obs["Hormones"] != "Ctrl")|(adata.obs["Days"] == 0)))
latent = lpy.runSCVI(das["dataset"])
adata = lpy.insertLatent(adata,latent, "scvi_genotype_E001_hormones", "X_umap_scvi_genotype_E001_hormones", None, "leiden_scvi_scvi_genotype_E001_hormones", cellnames = das["names"])
adata = lpy.insertLatent(adata,latent, None, None, None, "leidenres2_scvi_scvi_genotype_E001_hormones", cellnames = das["names"], leiden_resolution=2)

[2020-11-16 14:04:16,437] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-16 14:04:16,438] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-16 14:04:17,835] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-11-16 14:04:18,046] INFO - scvi.dataset.dataset | Downsampled from 13399 to 13399 cells
[2020-11-16 14:04:18,484] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-16 14:04:18,487] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-16 14:04:19,653] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-11-16 14:04:19,832] INFO - scvi.dataset.dataset | Downsampled from 10316 to 10316 cells
[2020-11-16 14:04:19,837] INFO - scvi.dataset.dataset | Merging datasets. Input objects are modified in place.
[2020-11-16 14:04:19,842] INFO - scvi.dataset.dataset | Gene names and cell measurement names are assumed to have a non-null intersection between datasets.
[20

HBox(children=(FloatProgress(value=0.0, description='training', max=500.0, style=ProgressStyle(description_wid…


defining permutation
Inserting Latent coords
computing UMAP
Inserting Umap coords
Finding clusters
Inserting Cluster Id
defining permutation
Finding clusters
Inserting Cluster Id


In [14]:
filter = ([ (x in ["U1", "U2", "B44"]) for x in adata.obs["demultiplexed"] ])
filter = filter & (adata.obs["Lane"] == 12)
filter = filter & (adata.obs["filtered_cells"] == False)
filter = filter & (adata.obs["log2p1_count"] > 13)
filter = filter & ((adata.obs["Hormones"] != "Ctrl")|(adata.obs["Days"] == 0))
subsample_filter = lpy.subsample(filter, 600, adata.obs["sample_names"])

das = lpy.scvi_prepare(adata, "demultiplexed", subsample_filter)
latent = lpy.runSCVI(das["dataset"])
adata = lpy.insertLatent(adata,latent, "scvi_genotype_bulkorg_hormones", "X_umap_scvi_genotype_bulkorg_hormones", None, "leiden_scvi_genotype_bulkorg_hormones", cellnames = das["names"])
adata = lpy.insertLatent(adata,latent, None, None, None, "leidenres2_scvi_genotype_bulkorg_hormones", cellnames = das["names"], leiden_resolution=2)

[2020-11-16 14:22:12,048] INFO - scvi.dataset.anndataset | Dense size under 1Gb, casting to dense format (np.ndarray).
[2020-11-16 14:22:12,194] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-16 14:22:12,195] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-16 14:22:12,464] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-11-16 14:22:12,503] INFO - scvi.dataset.dataset | Downsampled from 592 to 592 cells
[2020-11-16 14:22:12,510] INFO - scvi.dataset.anndataset | Dense size under 1Gb, casting to dense format (np.ndarray).
[2020-11-16 14:22:12,636] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-16 14:22:12,638] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-16 14:22:12,869] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-11-16 14:22:12,904] INFO - scvi.dataset.dataset | Downsampled from 516 to 516 cells
[2020-11-16 14:22:12,910] INFO - scvi.da

HBox(children=(FloatProgress(value=0.0, description='training', max=500.0, style=ProgressStyle(description_wid…


defining permutation
Inserting Latent coords
computing UMAP
Inserting Umap coords
Finding clusters
Inserting Cluster Id
defining permutation
Finding clusters
Inserting Cluster Id


In [15]:
das = lpy.scvi_prepare(adata, "demultiplexed", [ x in ["A", "D"] for x in adata.obs["demultiplexed"] ]&(adata.obs["Hormones"] != "Ctrl")&(adata.obs["filtered_cells"] == False)&(adata.obs["percent_mito"] < 0.15))
latent = lpy.runSCVI(das["dataset"])
adata = lpy.insertLatent(adata,latent, "scvi_genotype_E001_strictlyhormones", "X_umap_scvi_genotype_E001_strictlyhormones", None, "leiden_scvi_scvi_genotype_E001_strictlyhormones", cellnames = das["names"])
adata = lpy.insertLatent(adata,latent, None, None, None, "leidenres2_scvi_scvi_genotype_E001_strictlyhormones", cellnames = das["names"], leiden_resolution=2)

[2020-11-21 12:48:34,141] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-21 12:48:34,144] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-21 12:48:35,640] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-11-21 12:48:35,855] INFO - scvi.dataset.dataset | Downsampled from 19450 to 19450 cells
[2020-11-21 12:48:36,305] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-21 12:48:36,307] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-21 12:48:37,620] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-11-21 12:48:37,811] INFO - scvi.dataset.dataset | Downsampled from 15705 to 15705 cells
[2020-11-21 12:48:37,817] INFO - scvi.dataset.dataset | Merging datasets. Input objects are modified in place.
[2020-11-21 12:48:37,818] INFO - scvi.dataset.dataset | Gene names and cell measurement names are assumed to have a non-null intersection between datasets.
[20

HBox(children=(FloatProgress(value=0.0, description='training', max=500.0, style=ProgressStyle(description_wid…


defining permutation
Inserting Latent coords
computing UMAP
Inserting Umap coords
Finding clusters
Inserting Cluster Id
defining permutation
Finding clusters
Inserting Cluster Id


In [16]:
adata = lpy.addCycleCycleAnnotation(adata, doinspect=True)

[35;46;1mAdd Cell Cycle annotation to anndata object[0m[34m
def addCycleCycleAnnotation(adata, s_genes = None, g2m_genes = None, geneprefix = "", use_raw_data= True, doinspect=False):
    if doinspect is True: print("\033[35;46;1mAdd Cell Cycle annotation to anndata object\033[0m\033[34m"); print(inspect.getsource(addCycleCycleAnnotation));print("\033[31;43;1mExecution:\033[0m")

    # uses Seurat Cell Cycles default genes by default
    if s_genes is None: # "MLF1IP"
        s_genes = ["MCM5","PCNA","TYMS","FEN1","MCM2","MCM4","RRM1","UNG","GINS2","MCM6","CDCA7","DTL","PRIM1","UHRF1","HELLS","RFC2","RPA2","NASP","RAD51AP1","GMNN","WDR76","SLBP","CCNE2","UBR7","POLD3","MSH2","ATAD2","RAD51","RRM2","CDC45","CDC6","EXO1","TIPIN","DSCC1","BLM","CASP8AP2","USP1","CLSPN","POLA1","CHAF1B","BRIP1","E2F8"]
    if g2m_genes is None: #use default list
        g2m_genes =["HMGB2","CDK1","NUSAP1","UBE2C","BIRC5","TPX2","TOP2A","NDC80","CKS2","NUF2","CKS1B","MKI67","TMPO","CENPF","TACC3","FAM64A

In [17]:
das = lpy.scvi_prepare(adata, "demultiplexed", [ x in ["A", "D"] for x in adata.obs["demultiplexed"] ]&(adata.obs["filtered_cells"] == False)&((adata.obs["Hormones"] != "Ctrl")|(adata.obs["Days"] == 0))&(adata.obs["percent_mito"] < 0.15))
latent = lpy.runSCVI(das["dataset"])
adata = lpy.insertLatent(adata,latent, "scvi_genotype_E001_LMT_hormones", "X_umap_scvi_genotype_E001_LMT_hormones", None, "leiden_scvi_genotype_E001_LMT_hormones", cellnames = das["names"])
adata = lpy.insertLatent(adata,latent, None, None, None, "leidenres2_scvi_genotype_LMT_E001_hormones", cellnames = das["names"], leiden_resolution=2)

[2020-11-23 17:43:36,154] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-23 17:43:36,156] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-23 17:43:37,220] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-11-23 17:43:37,346] INFO - scvi.dataset.dataset | Downsampled from 9801 to 9801 cells
[2020-11-23 17:43:37,701] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-23 17:43:37,704] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-23 17:43:38,677] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-11-23 17:43:38,824] INFO - scvi.dataset.dataset | Downsampled from 12514 to 12514 cells
[2020-11-23 17:43:38,829] INFO - scvi.dataset.dataset | Merging datasets. Input objects are modified in place.
[2020-11-23 17:43:38,831] INFO - scvi.dataset.dataset | Gene names and cell measurement names are assumed to have a non-null intersection between datasets.
[2020

HBox(children=(FloatProgress(value=0.0, description='training', max=500.0, style=ProgressStyle(description_wid…


defining permutation
Inserting Latent coords
computing UMAP
Inserting Umap coords
Finding clusters
Inserting Cluster Id
defining permutation
Finding clusters
Inserting Cluster Id


In [18]:
das = lpy.scvi_prepare(adata, "demultiplexed", [ x in ["A", "D"] for x in adata.obs["demultiplexed"] ]&(adata.obs["filtered_cells"] == False)&(adata.obs["percent_mito"] < 0.15))
latent = lpy.runSCVI(das["dataset"])
adata = lpy.insertLatent(adata,latent, "scvi_genotype_E001_LMT", "X_umap_scvi_genotype_E001_LMT", None, "leiden_scvi_genotype_E001_LMT", cellnames = das["names"])
adata = lpy.insertLatent(adata,latent, None, None, None, "leidenres2_scvi_genotype_E001_LMT", cellnames = das["names"], leiden_resolution=2)

[2020-11-23 18:08:05,513] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-23 18:08:05,516] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-23 18:08:07,575] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-11-23 18:08:07,842] INFO - scvi.dataset.dataset | Downsampled from 15705 to 15705 cells
[2020-11-23 18:08:08,575] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-23 18:08:08,577] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-23 18:08:10,859] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-11-23 18:08:11,162] INFO - scvi.dataset.dataset | Downsampled from 19450 to 19450 cells
[2020-11-23 18:08:11,172] INFO - scvi.dataset.dataset | Merging datasets. Input objects are modified in place.
[2020-11-23 18:08:11,173] INFO - scvi.dataset.dataset | Gene names and cell measurement names are assumed to have a non-null intersection between datasets.
[20

HBox(children=(FloatProgress(value=0.0, description='training', max=500.0, style=ProgressStyle(description_wid…


defining permutation
Inserting Latent coords
computing UMAP
Inserting Umap coords
Finding clusters
Inserting Cluster Id
defining permutation
Finding clusters
Inserting Cluster Id


In [18]:
cellfilter = [ x in ["8", "3", "0", "6", "5"] for x in adata.obs["leiden_scvi_genotype_E001_LMT_hormones"] ]
das = lpy.scvi_prepare(adata, "demultiplexed", cellfilter)
latent = lpy.runSCVI(das["dataset"])
adata = lpy.insertLatent(adata,latent, "scvi_genotype_E001_traj", "X_umap_scvi_genotype_E001_traj", None, None, cellnames = das["names"])

[2020-12-18 17:32:49,925] INFO - scvi.dataset.anndataset | Dense size under 1Gb, casting to dense format (np.ndarray).
[2020-12-18 17:32:58,529] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-12-18 17:32:58,593] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-12-18 17:33:11,753] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-12-18 17:33:12,232] INFO - scvi.dataset.dataset | Downsampled from 5773 to 5773 cells
[2020-12-18 17:33:12,268] INFO - scvi.dataset.anndataset | Dense size under 1Gb, casting to dense format (np.ndarray).
[2020-12-18 17:33:13,168] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-12-18 17:33:13,170] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-12-18 17:33:21,818] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-12-18 17:33:22,288] INFO - scvi.dataset.dataset | Downsampled from 5370 to 5370 cells
[2020-12-18 17:33:22,321] INFO - scv

HBox(children=(FloatProgress(value=0.0, description='training', max=500.0, style=ProgressStyle(description_wid…


defining permutation
Inserting Latent coords
computing UMAP
Inserting Umap coords


In [19]:
adata.write_h5ad("N2-integrated_organoids.h5ad")