In [1]:
import warnings

import dandelion as ddl
import milopy.core as milo
import numpy as np
import pandas as pd
import scanpy as sc

warnings.filterwarnings("ignore")
sc.logging.print_header()

scanpy==1.9.3 anndata==0.9.1 umap==0.5.3 numpy==1.24.4 scipy==1.11.2 pandas==1.5.3 scikit-learn==1.3.0 statsmodels==0.14.0 python-igraph==0.10.6 pynndescent==0.5.10


In [2]:
import palantir

print(palantir.__version__)

1.3.1


In [3]:
adata = sc.read(
    "/Users/uqztuong/Google Drive/My Drive/dandelion/demo-pseudobulk.h5ad"
)
adata = ddl.tl.setup_vdj_pseudobulk(adata)
sc.pp.neighbors(adata, use_rep="X_scvi", n_neighbors=50)
milo.make_nhoods(adata)
sc.tl.umap(adata)
pb_adata = ddl.tl.vdj_pseudobulk(
    adata, pbs=adata.obsm["nhoods"], obs_to_take="anno_lvl_2_final_clean"
)
sc.tl.pca(pb_adata)
rootcell = np.argmax(pb_adata.obsm["X_pca"][:, 0])
terminal_states = pd.Series(
    ["CD8+T", "CD4+T"],
    index=pb_adata.obs_names[
        [
            np.argmax(pb_adata.obsm["X_pca"][:, 1]),
            np.argmin(pb_adata.obsm["X_pca"][:, 1]),
        ]
    ],
)
print(pb_adata.obs_names[rootcell], terminal_states.index)

710 Index(['21', '536'], dtype='object')


In [4]:
# Run diffusion maps
pca_projections = pd.DataFrame(pb_adata.obsm["X_pca"], index=pb_adata.obs_names)
dm_res = palantir.utils.run_diffusion_maps(pca_projections, n_components=5)
ms_data = palantir.utils.determine_multiscale_space(dm_res)
# ms_data.index = ms_data.index.astype(str)

In [5]:
pca_projections

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,-0.055858,-0.145181,-0.032804,-0.114320,0.037940,-0.016419,0.031216,0.016197,-0.025701,0.030766,...,0.006208,-0.006244,0.010329,-0.026345,0.003148,0.026965,0.004769,-0.013688,0.007879,-0.018179
1,-0.037712,-0.137408,-0.017241,-0.091450,0.072299,-0.008709,0.005672,0.014643,0.044867,0.050976,...,-0.008164,-0.017086,-0.004317,-0.002775,0.010091,-0.001478,-0.024017,-0.012920,-0.010457,-0.002529
2,0.008326,-0.028569,0.036591,0.079991,-0.044030,-0.001210,-0.002328,-0.046066,0.028894,-0.069599,...,-0.006652,-0.001806,-0.008396,-0.027807,-0.031104,0.014089,0.008205,0.019404,0.020659,-0.008329
3,-0.041681,-0.134724,-0.021765,-0.046041,0.032007,0.008929,0.008891,0.008678,0.034753,0.011824,...,-0.001145,-0.026576,0.014356,-0.026067,0.003057,0.012405,0.002287,-0.003118,0.021542,-0.010855
4,-0.004631,-0.013049,0.001323,-0.002315,-0.036576,0.013827,0.095065,-0.006282,0.043328,-0.053349,...,0.011683,-0.024583,0.000299,-0.055858,0.024469,-0.000483,0.029953,0.012009,0.002888,-0.013951
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1356,-0.078528,0.044726,-0.019651,0.035980,0.064424,-0.024891,-0.002080,0.029024,0.016849,0.030910,...,0.042268,-0.015013,0.003375,-0.006701,0.018098,0.000524,-0.015322,0.019099,0.027247,-0.010091
1357,-0.065456,0.006880,0.014784,0.021931,0.012185,0.079895,0.032993,0.058986,0.012170,0.000746,...,-0.002136,0.035374,0.016673,-0.002240,-0.007103,-0.022382,0.009361,0.000127,0.006327,-0.024816
1358,-0.080839,0.030192,0.073635,0.106279,0.016112,0.041294,0.016036,0.011278,-0.013849,0.016557,...,0.021308,-0.009120,-0.013592,-0.024244,0.012498,0.024430,-0.011851,0.011209,-0.020363,-0.025466
1359,0.002935,0.018682,0.093519,0.048942,-0.024210,0.027070,0.024845,-0.029396,-0.003365,0.020492,...,0.020258,-0.011835,-0.011812,0.002201,-0.011202,0.004280,0.019908,-0.020716,0.000543,0.033543


In [6]:
pca_projections.index

Index(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
       ...
       '1351', '1352', '1353', '1354', '1355', '1356', '1357', '1358', '1359',
       '1360'],
      dtype='object', length=1361)

In [7]:
dm_res

{'T': <1361x1361 sparse matrix of type '<class 'numpy.float64'>'
 	with 61818 stored elements in Compressed Sparse Row format>,
 'EigenVectors':              0         1         2         3         4
 0    -0.027106  0.012012  0.026247  0.008475  0.013737
 1    -0.027106  0.011949  0.025940  0.008112  0.012368
 2    -0.027106  0.009833  0.021182  0.002893 -0.003990
 3    -0.027106  0.011900  0.026068  0.008014  0.012194
 4    -0.027106  0.010721  0.022030  0.002131 -0.003671
 ...        ...       ...       ...       ...       ...
 1356 -0.027106  0.014209 -0.020552  0.002235 -0.005380
 1357 -0.027106  0.010828  0.009601 -0.001959 -0.016282
 1358 -0.027106  0.007161  0.003137 -0.017476 -0.041692
 1359 -0.027106 -0.003575  0.008214 -0.050283 -0.078806
 1360 -0.027106  0.011788  0.023906  0.006714  0.006564
 
 [1361 rows x 5 columns],
 'EigenValues': 0    1.000000
 1    0.990653
 2    0.969741
 3    0.911290
 4    0.794423
 dtype: float64,
 'kernel': <1361x1361 sparse matrix of type '<cla

In [8]:
dm_res["EigenVectors"].index, dm_res["EigenValues"].index

(RangeIndex(start=0, stop=1361, step=1), RangeIndex(start=0, stop=5, step=1))

In [9]:
ms_data

Unnamed: 0,0,1,2
0,1.273057,0.841175,0.087057
1,1.266427,0.831342,0.083328
2,1.042149,0.678855,0.029721
3,1.261223,0.835432,0.082328
4,1.136206,0.706030,0.021888
...,...,...,...
1356,1.505926,-0.658666,0.022958
1357,1.147581,0.307684,-0.020121
1358,0.758919,0.100522,-0.179526
1359,-0.378931,0.263230,-0.516537


In [10]:
ms_data.index

RangeIndex(start=0, stop=1361, step=1)

In [11]:
pr_res = palantir.core.run_palantir(
    ms_data,
    pb_adata.obs_names[rootcell],
    num_waypoints=500,
    terminal_states=terminal_states.index,
)

KeyError: '710'

In [12]:
# run the following as per https://github.com/dpeerlab/Palantir/issues/119#issuecomment-1719904250
# Run diffusion maps
palantir.utils.run_diffusion_maps(pb_adata, n_components=5)
palantir.utils.determine_multiscale_space(pb_adata)
# why does it print?

Unnamed: 0,0,1,2
0,1.273057,0.841175,0.087057
1,1.266427,0.831342,0.083328
2,1.042149,0.678855,0.029721
3,1.261223,0.835432,0.082328
4,1.136206,0.706030,0.021888
...,...,...,...
1356,1.505926,-0.658666,0.022958
1357,1.147581,0.307684,-0.020121
1358,0.758919,0.100522,-0.179526
1359,-0.378931,0.263230,-0.516537


In [13]:
pr_res = palantir.core.run_palantir(
    pb_adata,
    early_cell=pb_adata.obs_names[rootcell],
    num_waypoints=500,
    terminal_states=terminal_states.index,
)

Sampling and flocking waypoints...
Time for determining waypoints: 0.00046378374099731445 minutes
Determining pseudotime...
Shortest path distances using 30-nearest neighbor graph...
Time for shortest paths: 0.08461300134658814 minutes
Iteratively refining the pseudotime...
Correlation at iteration 1: 1.0000
Entropy and branch probabilities...
Markov chain construction...
Computing fundamental matrix and absorption probabilities...
Project results to all cells...
