In [1]:
# Curating organoids cell types and projection with in vivo data
import anndata, numpy as np, pandas as pd, imp, lpy, scanpy as sc, random
sc.logging.print_header()
%load_ext rpy2.ipython
%matplotlib inline



scanpy==1.6.0 anndata==0.7.5 umap==0.3.10 numpy==1.18.1 scipy==1.5.4 pandas==1.0.1 scikit-learn==0.22.2 statsmodels==0.11.1 python-igraph==0.8.0 louvain==0.6.1 leidenalg==0.8.3


In [4]:
# load organoid and subset to the E001 clone clusters are PGR+ 
adata = anndata.read_h5ad("N2-integrated_organoids.h5ad")
adatasub = adata[ [ x in ["8", "3", "0", "6", "5"] for x in adata.obs["leiden_scvi_genotype_E001_LMT_hormones"] ] ,: ]

In [54]:
# run Palentir on the subset, picks a start cell that is in the "highest entropy" area, detected from a previous run of palentir
which = lambda lst:list(np.where(lst)[0])
startcell = random.sample(which(adatasub.obs["leiden_scvi_genotype_E001_LMT_hormones"] == "8"), 1)
print(startcell)
imp.reload(lpy)
res = lpy.runPalantir(adatasub, adatasub.obs_names[startcell], doinspect=True)

[1830]
[35;46;1mCompute Trajectory using Palantir[0m[34m
def runPalantir(adata, startcell, doinspect = False):
    if doinspect is True: print("\033[35;46;1mCompute Trajectory using Palantir\033[0m\033[34m"); print(inspect.getsource(runPalantir));print("\033[31;43;1mExecution:\033[0m")
    import palantir
    #import harmony
    if 'highly_variable' not in adata.var.keys():
        sc.pp.highly_variable_genes(adata,inplace=True)
    pca_projections, _ = palantir.utils.run_pca(adata)
    dm_res = palantir.utils.run_diffusion_maps(pca_projections, n_components=5)
    ms_data = palantir.utils.determine_multiscale_space(dm_res)
    pr_res = palantir.core.run_palantir(ms_data, startcell, num_waypoints=500)
    

    return(pr_res)

[31;43;1mExecution:[0m
Determing nearest neighbor graph...
Sampling and flocking waypoints...
Time for determining waypoints: 0.009520912170410156 minutes
Determining pseudotime...
Shortest path distances using 30-nearest neighbor graph...
Time for shortest 

In [None]:

adata.obs["pseudotime"] = res.pseudotime
adata.obs["entropy"] = res.entropy
adata.obs["prob1"] = res.branch_probs[res.branch_probs.keys()[0]]
adata.obs["prob2"] = res.branch_probs[res.branch_probs.keys()[1]]
#import palantir
#palantir.plot.plot_palantir_results(res, adata.obsm["X_umap_scvi_genotype_E001_LMT_hormones"])
#fig, ax = palantir.plot.plot_tsne(adata.obsm["X_umap_scvi_genotype_E001_LMT_hormones"])

In [None]:
sc.set_figure_params(figsize= [6,6])
#adata =  adata_primary[ [ x not in ["filtered", "doublets", "Low QC"] for x in  adata_primary.obs["subcluster_epithelial_balanced"] ], :]
adata.obsm["umap"] = adata.obsm["X_umap_scvi_genotype_E001_traj"]
sc.pl.umap(adata, color=["pseudotime", "entropy", "prob1", "prob2"], save= "color.pdf", color_map = "viridis",size=10)

In [None]:
# run Palentir on the subset, picks a start cell that is in the "highest entropy" area, detected from a previous run of palentir
startcell = random.sample((adata.obsm["umap"][:,0] > 7)&(adata.obsm["umap"][:,0] < 8)&(adata.obsm["umap"][:,1] >0.5), 1)
res = lpy.runPalantir(adata, adata.obs_names[startcell], doinspect=False)
adata.obs["pseudotime"] = res.pseudotime
adata.obs["entropy"] = res.entropy

In [56]:
adata.obs["pseudotime"] = res.pseudotime
adata.obs["entropy"] = res.entropy

adata.obs["prob1"] = res.branch_probs[res.branch_probs.keys()[0]]
adata.obs["prob2"] = res.branch_probs[res.branch_probs.keys()[1]]
#del adata.obs["prob3"] # = res.branch_probs[res.branch_probs.keys()[2]]
sc.pl.umap(adata, color=["pseudotime", "entropy", "prob1", "prob2"], save= "color.pdf", color_map = "viridis",size=10)

IndexError: index 1 is out of bounds for axis 0 with size 1

In [None]:
adata.write_h5ad("N3-integrated_organoids.h5ad")

In [61]:
which = lambda lst:list(np.where(lst)[0])
while True:
    startcell = random.sample(which(adatasub.obs["leiden_scvi_genotype_E001_LMT_hormones"] == "8"), 1)
    print(startcell)
    imp.reload(lpy)
    res = lpy.runPalantir(adatasub, adatasub.obs_names[startcell], doinspect=False)
    if (res.branch_probs.shape[1] == 2): break

[2078]
Determing nearest neighbor graph...
Sampling and flocking waypoints...
Time for determining waypoints: 0.010150388876597086 minutes
Determining pseudotime...
Shortest path distances using 30-nearest neighbor graph...
Time for shortest paths: 0.2811689257621765 minutes
Iteratively refining the pseudotime...
Correlation at iteration 1: 1.0000
Entropy and branch probabilities...
Markov chain construction...
Identification of terminal states...
Computing fundamental matrix and absorption probabilities...
Project results to all cells...
[938]
Determing nearest neighbor graph...
Sampling and flocking waypoints...
Time for determining waypoints: 0.010262533028920492 minutes
Determining pseudotime...
Shortest path distances using 30-nearest neighbor graph...
Time for shortest paths: 0.12562367916107178 minutes
Iteratively refining the pseudotime...
Correlation at iteration 1: 0.9999
Entropy and branch probabilities...
Markov chain construction...
Identification of terminal states...
Com

In [62]:
res.branch_probs

Unnamed: 0,organoids7090732_GAGTCCGAGAGCTATA,organoids7090732_GGGCACTCAATCGAAA
organoids7090723_CATCCACAGCAGCGTA,0.944930,0.055070
organoids7090723_CTAACTTGTGGTTTCA,0.300472,0.699528
organoids7090723_CTACATTCAGACTCGC,0.264681,0.735319
organoids7090723_CTGGTCTCACCGTTGG,0.999522,0.000000
organoids7090723_GCGCGATAGCTGCAAG,0.999524,0.000000
...,...,...
organoids7090732_TTTGGTTTCTACTATC,0.999576,0.000000
organoids7090732_TTTGTCAAGTTACCCA,0.046820,0.953180
organoids7090732_TTTGTCACAATGCCAT,0.040513,0.959487
organoids7090732_TTTGTCAGTACCCAAT,0.999984,0.000000


In [63]:
adata.obs["pseudotime"] = res.pseudotime
adata.obs["entropy"] = res.entropy

adata.obs["prob1"] = res.branch_probs[res.branch_probs.keys()[0]]
adata.obs["prob2"] = res.branch_probs[res.branch_probs.keys()[1]]
#del adata.obs["prob3"] # = res.branch_probs[res.branch_probs.keys()[2]]
sc.pl.umap(adata, color=["pseudotime", "entropy", "prob1", "prob2"], save= "color.pdf", color_map = "viridis",size=10)



In [64]:
adata.obs.columns

Index(['souporcell', 'demultiplexed', 'sample_names', 'log2p1_count',
       'percent_mito', 'n_genes', 'batch', 'Lane', 'Days', 'Hormones',
       'Inhibitor', 'IsClonal', 'scrublet_pred', 'scrublet_local_pred',
       'scrublet_score', 'scrublet_cluster_score', 'filtered_cells',
       'leiden_scvi_sampl_cc', 'leiden_scvi_genotype_bulkorg',
       'leiden_scvi_nobatch_bulkorg', 'leiden_scvi_genotype_bulkorg_subsmpl',
       'leiden_scvi_nobatch_bulkorg_subsmpl',
       'leiden_scvi_genotype_bulkorg_subsmpl_renamed',
       'leiden_scvi_scvi_genotype_E001', 'leidenres2_scvi_scvi_genotype_E001',
       'leiden_scvi_scvi_genotype_noinhibit_B44clone',
       'leidenres2_scvi_scvi_genotype_noinhibit_B44clone',
       'leiden_scvi_scvi_genotype_noinhibit_clones',
       'leidenres2_scvi_scvi_genotype_noinhibit_clones',
       'leiden_scvi_scvi_genotype_E001_hormones',
       'leidenres2_scvi_scvi_genotype_E001_hormones',
       'leiden_scvi_genotype_bulkorg_hormones',
       'leidenres2_sc

In [65]:
adata.write_h5ad("N4-integrated_organoids.h5ad")