In [1]:
# Integration of organoids within subsets
import anndata, numpy as np, pandas as pd, imp, lpy, scanpy as sc
sc.logging.print_versions()

scanpy==1.5.1 anndata==0.7.4 umap==0.4.6 numpy==1.19.1 scipy==1.5.2 pandas==1.0.5 scikit-learn==0.23.2 statsmodels==0.11.1 python-igraph==0.8.2 louvain==0.6.1 leidenalg==0.8.1


In [2]:
# Set some constants
adata = anndata.read_h5ad("N1-integrated_organoids.h5ad")

In [3]:
#make subset
filter = ([ (x in ["A", "D", "U1", "U2", "B44"]) for x in adata.obs["demultiplexed"] ])
filter = filter & (adata.obs["filtered_cells"] == False)
filter = filter & (adata.obs["Inhibitor"] == "Ctrl")
filter = filter & (adata.obs["percent_mito"] < 0.2)

# Use genotypes for batch correction, no subsampling
das = lpy.scvi_prepare(adata, "demultiplexed", filter, doinspect=True)

[35;46;1mPrepare Data for Scvi/TotalVi[0m[34m
def scvi_prepare(anndatapath, field, cellfilter = None, nbgenes = 5000, genes_to_filter= None, use_ccfilter_prefix=None, citeseqkey = "protein_expression", use_raw_slot_instead =None, min_cell_threshold= 0, doinspect=False):
    if doinspect is True: print("\033[35;46;1mPrepare Data for Scvi/TotalVi\033[0m\033[34m"); print(inspect.getsource(scvi_prepare));print("\033[31;43;1mExecution:\033[0m")
    if use_ccfilter_prefix is not None :
        genes_to_filter = [use_ccfilter_prefix + x for x in ["HMGB2","CDK1","NUSAP1","UBE2C","BIRC5","TPX2","TOP2A","NDC80","CKS2","NUF2","CKS1B","MKI67","TMPO","CENPF","TACC3","FAM64A","SMC4","CCNB2","CKAP2L","CKAP2","AURKB","BUB1","KIF11","ANP32E","TUBB4B","GTSE1","KIF20B","HJURP","CDCA3","HN1","CDC20","TTK","CDC25C","KIF2C","RANGAP1","NCAPD2","DLGAP5","CDCA2","CDCA8","ECT2","KIF23","HMMR","AURKA","PSRC1","ANLN","LBR","CKAP5","CENPE","CTCF","NEK2","G2E3","GAS2L3","CBX5","CENPA","MCM5","PCNA","TYMS","FEN1"

In [4]:
# run scvi
latent = lpy.runSCVI(das["dataset"], doinspect=True)

[35;46;1mRun scvi[0m[34m
def runSCVI(dataset, nbstep = 500, n_latent = 64, doinspect= False):
    if doinspect is True: print("\033[35;46;1mRun scvi\033[0m\033[34m"); print(inspect.getsource(runSCVI));print("\033[31;43;1mExecution:\033[0m")
    vae = VAE(dataset.nb_genes, n_batch= dataset.n_batches, n_labels= dataset.n_labels, n_latent = n_latent)
    trainer = UnsupervisedTrainer(vae, dataset, train_size=0.9, frequency=5, use_cuda=True)
    trainer.train(n_epochs=nbstep)
    full = trainer.create_posterior(trainer.model, dataset, indices=np.arange(len(dataset)))
    return(full.sequential().get_latent()[0])

[31;43;1mExecution:[0m
[2020-11-26 13:11:31,951] INFO - scvi.inference.inference | KL warmup for 400 epochs


HBox(children=(FloatProgress(value=0.0, description='training', max=500.0, style=ProgressStyle(description_wid…




In [5]:
# store latent variable back in th object, and produce UMAP coordinates and leiden clusters 
adata = lpy.insertLatent(adata,latent, "scvi_noinhib", "X_umap_noinhib", None, "leiden_noinhib", cellnames = das["names"], doinspect=True)

[35;46;1mCompute Clusters and Reduces representations[0m[34m
def insertLatent(adata, latent , latent_key= "latent", umap_key= "X_umap", tsne_key = "X_tsne", leiden_key = "leiden", rename_cluster_key= None,cellfilter = None, cellnames =None, leiden_resolution=1.0,doinspect=False):
    if doinspect is True: print("\033[35;46;1mCompute Clusters and Reduces representations\033[0m\033[34m"); print(inspect.getsource(insertLatent));print("\033[31;43;1mExecution:\033[0m")

    if cellnames is None:
        #order of full must match
        assert latent.shape[0] == len(adata.obs_names),  "cell names need for be provided if size of latent mismatches adata"
        map = range(len(adata.obs_names))
        if latent_key is not None:
            adata.obsm[latent_key] = latent
    else:
        print("defining permutation")
        dalist = list(adata.obs_names)
        tmap = {}
        for i in range(len(adata.obs_names)):
            tmap.update( {adata.obs_names[i] : i})
# for i in range(l

In [6]:
adata = lpy.insertLatent(adata,latent, None, None, None, "leidenres2_noinhib", cellnames = das["names"], leiden_resolution=2)

defining permutation
Finding clusters
Inserting Cluster Id


In [7]:
# Use genotypes for batch correction, subsamppling#
filter = ([ (x in ["U1", "U2", "B44"]) for x in adata.obs["demultiplexed"] ])
filter = filter & (adata.obs["filtered_cells"] == False)
filter = filter & (adata.obs["Inhibitor"] == "Ctrl")
filter = filter & (adata.obs["Lane"] == 13)
filter = filter & (adata.obs["percent_mito"] < 0.2)
# Use genotypes for batch correction, no subsampling
das = lpy.scvi_prepare(adata, "demultiplexed", filter)
latent = lpy.runSCVI(das["dataset"])
adata = lpy.insertLatent(adata,latent,"scvi_bulkorg_noinhib", "X_umap_bulkorg_noinhib", None, "leiden_bulkorg_noinhib", cellnames = das["names"])
adata = lpy.insertLatent(adata,latent, None, None, None, "leidenres2_bulkorg_noinhib", cellnames = das["names"], leiden_resolution=2)

[2020-11-27 11:25:35,068] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-27 11:25:35,071] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-27 11:25:36,051] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-11-27 11:25:36,204] INFO - scvi.dataset.dataset | Downsampled from 16217 to 16217 cells
[2020-11-27 11:25:36,476] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-27 11:25:36,479] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-27 11:25:37,157] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-11-27 11:25:37,269] INFO - scvi.dataset.dataset | Downsampled from 12593 to 12593 cells
[2020-11-27 11:25:37,275] INFO - scvi.dataset.anndataset | Dense size under 1Gb, casting to dense format (np.ndarray).
[2020-11-27 11:25:38,339] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-27 11:25:38,342] INFO - scvi.dataset.dataset | Remapp

HBox(children=(FloatProgress(value=0.0, description='training', max=500.0, style=ProgressStyle(description_wid…


defining permutation
Inserting Latent coords
computing UMAP
Inserting Umap coords
Finding clusters
Inserting Cluster Id
defining permutation
Finding clusters
Inserting Cluster Id


In [8]:
filter = ([ (x in ["U1", "U2", "B44"]) for x in adata.obs["demultiplexed"] ])
filter = filter & (adata.obs["filtered_cells"] == False)
filter = filter & ((adata.obs["Hormones"] != "Ctrl")|(adata.obs["Days"] == 0))
filter = filter & (adata.obs["percent_mito"] < 0.2)
filter = filter & (adata.obs["Lane"] == 13)

# Use genotypes for batch correction, no subsampling
das = lpy.scvi_prepare(adata, "demultiplexed", filter)
latent = lpy.runSCVI(das["dataset"])
adata = lpy.insertLatent(adata,latent,"scvi_bulkorg_hormones", "X_umap_bulkorg_hormones", None, "leiden_bulkorg_hormones", cellnames = das["names"])
adata = lpy.insertLatent(adata,latent, None, None, None, "leidenres2_bulkorg_hormones", cellnames = das["names"],leiden_resolution=2)

[2020-11-27 11:53:26,251] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-27 11:53:26,254] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-27 11:53:27,903] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-11-27 11:53:28,177] INFO - scvi.dataset.dataset | Downsampled from 32523 to 32523 cells
[2020-11-27 11:53:28,602] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-27 11:53:28,605] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-27 11:53:29,685] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-11-27 11:53:29,866] INFO - scvi.dataset.dataset | Downsampled from 22246 to 22246 cells
[2020-11-27 11:53:30,185] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-27 11:53:30,188] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-27 11:53:30,917] INFO - scvi.dataset.dataset | Computing the library size for the new data

HBox(children=(FloatProgress(value=0.0, description='training', max=500.0, style=ProgressStyle(description_wid…


defining permutation
Inserting Latent coords
computing UMAP
Inserting Umap coords
Finding clusters
Inserting Cluster Id
defining permutation
Finding clusters
Inserting Cluster Id


... storing 'leiden_bulkorg_noinhib' as categorical
... storing 'leidenres2_bulkorg_noinhib' as categorical
... storing 'leiden_bulkorg_hormones' as categorical
... storing 'leidenres2_bulkorg_hormones' as categorical


In [9]:
filter = ([ (x in ["U1", "U2", "B44"]) for x in adata.obs["demultiplexed"] ])
filter = filter & (adata.obs["filtered_cells"] == False)
filter = filter & (adata.obs["percent_mito"] < 0.2)
filter = filter & (adata.obs["Lane"] == 13)

# Use genotypes for batch correction, no subsampling
das = lpy.scvi_prepare(adata, "demultiplexed", filter)
latent = lpy.runSCVI(das["dataset"])
adata = lpy.insertLatent(adata,latent,"scvi_bulkorg", "X_umap_bulkorg", None, "leiden_bulkorg", cellnames = das["names"])
adata = lpy.insertLatent(adata,latent, None, None, None, "leidenres2_bulkorg", cellnames = das["names"],leiden_resolution=2)

[2020-11-27 12:48:39,357] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-27 12:48:39,361] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-27 12:48:42,263] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-11-27 12:48:42,728] INFO - scvi.dataset.dataset | Downsampled from 54954 to 54954 cells
[2020-11-27 12:48:43,393] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-27 12:48:43,396] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-27 12:48:45,129] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-11-27 12:48:45,423] INFO - scvi.dataset.dataset | Downsampled from 35604 to 35604 cells
[2020-11-27 12:48:45,915] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-27 12:48:45,918] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-27 12:48:47,115] INFO - scvi.dataset.dataset | Computing the library size for the new data

HBox(children=(FloatProgress(value=0.0, description='training', max=500.0, style=ProgressStyle(description_wid…


defining permutation
Inserting Latent coords
computing UMAP
Inserting Umap coords
Finding clusters
Inserting Cluster Id
defining permutation
Finding clusters
Inserting Cluster Id


... storing 'leiden_bulkorg' as categorical
... storing 'leidenres2_bulkorg' as categorical


In [10]:
filter = ([ (x in ["U1", "U2", "B44"]) for x in adata.obs["demultiplexed"] ])
filter = filter & (adata.obs["filtered_cells"] == False)
filter = filter & ((adata.obs["Hormones"] != "Ctrl")|(adata.obs["Days"] == 0))
filter = filter & (adata.obs["percent_mito"] < 0.2)
filter = filter & (adata.obs["Lane"] == 13)

# Use genotypes for batch correction, no subsampling
das = lpy.scvi_prepare(adata, "sample_names", filter)
latent = lpy.runSCVI(das["dataset"])
adata = lpy.insertLatent(adata,latent,"scvi_samplecorr_bulkorg_hormones", "X_umap_samplecorr_bulkorg_hormones", None, "leiden_samplecorr_bulkorg_hormones", cellnames = das["names"])
adata = lpy.insertLatent(adata,latent, None, None, None, "leidenres2_samplecorr_bulkorg_hormones", cellnames = das["names"],leiden_resolution=2)
adata.write_h5ad("N2-integrated_organoids.h5ad")

[2020-11-27 14:55:23,384] INFO - scvi.dataset.anndataset | Dense size under 1Gb, casting to dense format (np.ndarray).
[2020-11-27 14:55:24,134] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-27 14:55:24,136] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-27 14:55:25,705] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-11-27 14:55:25,962] INFO - scvi.dataset.dataset | Downsampled from 5310 to 5310 cells
[2020-11-27 14:55:25,997] INFO - scvi.dataset.anndataset | Dense size under 1Gb, casting to dense format (np.ndarray).
[2020-11-27 14:55:27,005] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-27 14:55:27,007] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-27 14:55:29,206] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-11-27 14:55:29,678] INFO - scvi.dataset.dataset | Downsampled from 7425 to 7425 cells
[2020-11-27 14:55:29,725] INFO - scv

HBox(children=(FloatProgress(value=0.0, description='training', max=500.0, style=ProgressStyle(description_wid…


defining permutation
Inserting Latent coords
computing UMAP
Inserting Umap coords
Finding clusters
Inserting Cluster Id
defining permutation
Finding clusters
Inserting Cluster Id


... storing 'leiden_samplecorr_bulkorg_hormones' as categorical
... storing 'leidenres2_samplecorr_bulkorg_hormones' as categorical


In [11]:
filter = ([ (x in ["U1", "U2", "B44"]) for x in adata.obs["demultiplexed"] ])
filter = filter & (adata.obs["filtered_cells"] == False)
filter = filter & (adata.obs["percent_mito"] < 0.2)
filter = filter & (adata.obs["Lane"] == 13)
# Use genotypes for batch correction, no subsampling
das = lpy.scvi_prepare(adata, "sample_names", filter)
latent = lpy.runSCVI(das["dataset"])
adata = lpy.insertLatent(adata,latent,"scvi_samplecorr_bulkorg", "X_umap_samplecorr_bulkorg", None, "leiden_samplecorr_bulkorg", cellnames = das["names"])
adata = lpy.insertLatent(adata,latent, None, None, None, "leidenres2_samplecorr_bulkorg", cellnames = das["names"],leiden_resolution=2)


[2020-11-27 15:54:54,593] INFO - scvi.dataset.anndataset | Dense size under 1Gb, casting to dense format (np.ndarray).
[2020-11-27 15:54:55,450] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-27 15:54:55,452] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-27 15:54:57,271] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-11-27 15:54:57,584] INFO - scvi.dataset.dataset | Downsampled from 6652 to 6652 cells
[2020-11-27 15:54:57,614] INFO - scvi.dataset.anndataset | Dense size under 1Gb, casting to dense format (np.ndarray).
[2020-11-27 15:54:58,306] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-11-27 15:54:58,308] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-11-27 15:54:59,856] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-11-27 15:55:00,106] INFO - scvi.dataset.dataset | Downsampled from 5310 to 5310 cells
[2020-11-27 15:55:00,131] INFO - scv

HBox(children=(FloatProgress(value=0.0, description='training', max=500.0, style=ProgressStyle(description_wid…


defining permutation
Inserting Latent coords
computing UMAP
Inserting Umap coords
Finding clusters
Inserting Cluster Id
defining permutation
Finding clusters
Inserting Cluster Id


In [12]:
# subsample to balance sample with identical conditions (some replicate exists)
conditions = lpy.applyFactorRename(adata.obs["sample_names"], {
    tuple(["WSSS_END9397527", "WSSS_END9397533"]) : "Day6 E+P Ctrl",
    tuple(["WSSS_END9397529", "WSSS_END9397534"]) : "Day6 E+P NOTCHi",
    tuple(["WSSS_END9397531", "WSSS_END9397535"]) : "Day6 E+P WNTi"}, doinspect =True)
cond_and_geno = np.array([x + y  for x,y in zip(conditions, adata.obs["demultiplexed"]) ])
subsample = (adata.obs["filtered_cells"] == False) & (adata.obs["percent_mito"] < 0.2) & (adata.obs["Lane"] == 13)
subsample = lpy.subsample( subsample , 1500 , cond_and_geno, doinspect =True)
das = lpy.scvi_prepare(adata, "demultiplexed", subsample)
latent = lpy.runSCVI(das["dataset"])
adata = lpy.insertLatent(adata,latent,"scvi_bulksmpl", "X_umap_bulksmpl", None, "leiden_bulksmpl", cellnames = das["names"])
adata = lpy.insertLatent(adata,latent, None, None, None, "leidenres2_bulksmpl", cellnames = das["names"],leiden_resolution=2)

[35;46;1mRename classes in list[0m[34m
def applyFactorRename(input, dico, doinspect = False):
    if doinspect is True: print("\033[35;46;1mRename classes in list\033[0m\033[34m"); print(inspect.getsource(applyFactorRename));
    import numpy as np
    tmp = np.array(input, dtype=object)
    out = tmp.copy()
    for k,x in dico.items():
        if isinstance(k, tuple):
            for l in k:
                out[tmp == l] = x
        else:
            out[tmp == k] = x
    leftover_categories = set(out)
    leftover_categories = leftover_categories.difference(set(dico.values()))
    categories = list(dico.values())
    categories = categories + list(leftover_categories)
    return(pd.Categorical(out, categories, ordered=True))

[35;46;1mSample a subset of a defined size[0m[34m
def subsample(truefalse_vector, subsamplesize, partition = None, doinspect = False):
    if doinspect is True: print("\033[35;46;1mSample a subset of a defined size\033[0m\033[34m"); print(inspect.getsource

HBox(children=(FloatProgress(value=0.0, description='training', max=500.0, style=ProgressStyle(description_wid…


defining permutation
Inserting Latent coords
computing UMAP
Inserting Umap coords
Finding clusters
Inserting Cluster Id
defining permutation
Finding clusters
Inserting Cluster Id


In [13]:
das = lpy.scvi_prepare(adata, "demultiplexed", subsample & ((adata.obs["Hormones"] != "Ctrl")|(adata.obs["Days"] == 0)))
latent = lpy.runSCVI(das["dataset"])
adata = lpy.insertLatent(adata,latent,"scvi_bulksmpl_hormones", "X_umap_bulksmpl_hormones", None, "leiden_bulksmpl_hormones", cellnames = das["names"])
adata = lpy.insertLatent(adata,latent, None, None, None, "leidenres2_bulksmpl_hormones", cellnames = das["names"],leiden_resolution=2)

[2020-12-01 15:59:48,377] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-12-01 15:59:48,379] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-12-01 15:59:48,794] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-12-01 15:59:48,867] INFO - scvi.dataset.dataset | Downsampled from 9597 to 9597 cells
[2020-12-01 15:59:49,105] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-12-01 15:59:49,108] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-12-01 15:59:49,707] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-12-01 15:59:49,804] INFO - scvi.dataset.dataset | Downsampled from 10500 to 10500 cells
[2020-12-01 15:59:50,062] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-12-01 15:59:50,065] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-12-01 15:59:50,716] INFO - scvi.dataset.dataset | Computing the library size for the new data
[

HBox(children=(FloatProgress(value=0.0, description='training', max=500.0, style=ProgressStyle(description_wid…


defining permutation
Inserting Latent coords
computing UMAP
Inserting Umap coords
Finding clusters
Inserting Cluster Id
defining permutation
Finding clusters
Inserting Cluster Id


In [14]:
das = lpy.scvi_prepare(adata, "demultiplexed", subsample  & (adata.obs["Inhibitor"] == "Ctrl") )
latent = lpy.runSCVI(das["dataset"])
adata = lpy.insertLatent(adata,latent,"scvi_bulksmpl_noinhib", "X_umap_bulksmpl_noinhib", None, "leiden_bulksmpl_noinhib", cellnames = das["names"])
adata = lpy.insertLatent(adata,latent, None, None, None, "leidenres2_bulksmpl_noinhib", cellnames = das["names"], leiden_resolution=2)

[2020-12-01 16:18:38,641] INFO - scvi.dataset.anndataset | Dense size under 1Gb, casting to dense format (np.ndarray).
[2020-12-01 16:18:39,418] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-12-01 16:18:39,420] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-12-01 16:18:41,104] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-12-01 16:18:41,376] INFO - scvi.dataset.dataset | Downsampled from 5932 to 5932 cells
[2020-12-01 16:18:41,409] INFO - scvi.dataset.anndataset | Dense size under 1Gb, casting to dense format (np.ndarray).
[2020-12-01 16:18:42,380] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-12-01 16:18:42,382] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-12-01 16:18:44,470] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-12-01 16:18:44,812] INFO - scvi.dataset.dataset | Downsampled from 7500 to 7500 cells
[2020-12-01 16:18:44,855] INFO - scv

HBox(children=(FloatProgress(value=0.0, description='training', max=500.0, style=ProgressStyle(description_wid…


defining permutation
Inserting Latent coords
computing UMAP
Inserting Umap coords
Finding clusters
Inserting Cluster Id
defining permutation
Finding clusters
Inserting Cluster Id


In [15]:
filter = ([ (x in ["U1", "U2", "B44"]) for x in adata.obs["demultiplexed"] ])
filter = filter & (adata.obs["filtered_cells"] == False)
filter = filter & ((adata.obs["Hormones"] != "Ctrl")|(adata.obs["Days"] == 0))
filter = filter & (adata.obs["Inhibitor"] == "Ctrl")
filter = filter & (adata.obs["percent_mito"] < 0.2)
filter = filter & (adata.obs["Lane"] == 13)

# Use genotypes for batch correction, no subsampling
das = lpy.scvi_prepare(adata, "demultiplexed", filter)
latent = lpy.runSCVI(das["dataset"])
adata = lpy.insertLatent(adata,latent,"scvi_bulkorg_hormnoinhib", "X_umap_bulkorg_hormnoinhib", None, "leiden_bulkorg_hormnoinhib", cellnames = das["names"])
adata = lpy.insertLatent(adata,latent, None, None, None, "leidenres2_bulkorg_hormnoinhib", cellnames = das["names"],leiden_resolution=2)

[2020-12-04 11:43:00,651] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-12-04 11:43:00,654] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-12-04 11:43:01,334] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-12-04 11:43:01,439] INFO - scvi.dataset.dataset | Downsampled from 10592 to 10592 cells
[2020-12-04 11:43:01,445] INFO - scvi.dataset.anndataset | Dense size under 1Gb, casting to dense format (np.ndarray).
[2020-12-04 11:43:02,942] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-12-04 11:43:02,944] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-12-04 11:43:05,553] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-12-04 11:43:05,975] INFO - scvi.dataset.dataset | Downsampled from 8839 to 8839 cells
[2020-12-04 11:43:06,025] INFO - scvi.dataset.anndataset | Dense size under 1Gb, casting to dense format (np.ndarray).
[2020-12-04 11:43:06,850] INFO - s

HBox(children=(FloatProgress(value=0.0, description='training', max=500.0, style=ProgressStyle(description_wid…


defining permutation
Inserting Latent coords
computing UMAP
Inserting Umap coords
Finding clusters
Inserting Cluster Id
defining permutation
Finding clusters
Inserting Cluster Id


In [16]:
filter = ([ (x in ["U1", "U2", "B44"]) for x in adata.obs["demultiplexed"] ])
filter = filter & (adata.obs["filtered_cells"] == False)
filter = filter & (adata.obs["Hormones"] != "Ctrl")
filter = filter & (adata.obs["Inhibitor"] == "Ctrl")
filter = filter & (adata.obs["percent_mito"] < 0.2)
filter = filter & (adata.obs["Lane"] == 13)

# Use genotypes for batch correction, no subsampling
das = lpy.scvi_prepare(adata, "demultiplexed", filter)
latent = lpy.runSCVI(das["dataset"])
adata = lpy.insertLatent(adata,latent,"scvi_bulkorg_strhormnoinhib", "X_umap_bulkorg_strhormnoinhib", None, "leiden_bulkorg_strhormnoinhib", cellnames = das["names"])
adata = lpy.insertLatent(adata,latent, None, None, None, "leidenres2_bulkorg_strhormnoinhib", cellnames = das["names"],leiden_resolution=2)

[2020-12-05 12:05:51,437] INFO - scvi.dataset.anndataset | Dense size under 1Gb, casting to dense format (np.ndarray).
[2020-12-05 12:05:52,020] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-12-05 12:05:52,023] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-12-05 12:05:53,099] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-12-05 12:05:53,270] INFO - scvi.dataset.dataset | Downsampled from 3754 to 3754 cells
[2020-12-05 12:05:53,285] INFO - scvi.dataset.anndataset | Dense size under 1Gb, casting to dense format (np.ndarray).
[2020-12-05 12:05:54,207] INFO - scvi.dataset.dataset | Remapping labels to [0,N]
[2020-12-05 12:05:54,209] INFO - scvi.dataset.dataset | Remapping batch_indices to [0,N]
[2020-12-05 12:05:56,200] INFO - scvi.dataset.dataset | Computing the library size for the new data
[2020-12-05 12:05:56,523] INFO - scvi.dataset.dataset | Downsampled from 7148 to 7148 cells
[2020-12-05 12:05:56,549] INFO - scv

HBox(children=(FloatProgress(value=0.0, description='training', max=500.0, style=ProgressStyle(description_wid…


defining permutation
Inserting Latent coords
computing UMAP
Inserting Umap coords
Finding clusters
Inserting Cluster Id
defining permutation
Finding clusters
Inserting Cluster Id


In [17]:
adata.obs["Days"] = pd.Categorical(adata.obs["Days"], [0, 2, 6], ordered=True)
adata.uns["Days_colors"] = ["#00CCFF", "#559900", "#FF0000"]
adata.uns["Inhibitor_colors"] = ["#CC0000", "#AAFFAA", "#8888FF"]
adata.uns["Hormones_colors"] = ["#00CCFF", "#00CC00", "#EEAA00"]
adata.uns["demultiplexed_colors"] = ["#AA0000", "#CCCC00", "#00DD00", "#FF0000", "#CC0000", "#00CCCC", "#888888"]
adata.write_h5ad("N2-integrated_organoids.h5ad")

... storing 'leiden_bulkorg_strhormnoinhib' as categorical
... storing 'leidenres2_bulkorg_strhormnoinhib' as categorical
