In [1]:
import scanpy as sc

import warnings
warnings.filterwarnings("ignore")

In [2]:
import scTenifoldXct as st

In [3]:
# load scRNA-seq data
adata = sc.read_h5ad('data/adata_merge_example.h5ad')
adata

AnnData object with n_obs × n_vars = 4096 × 2608
    obs: 'n_genes', 'percent_mito', 'n_counts', 'louvain', 'patient_id', 'patient_tumorsection', 'NormalvsTumor', 'patientno', 'PNC', 'PIC', 'ViralvsNonViral', 'ident', 'batch'
    var: 'gene_ids', 'n_cells'
    obsm: 'X_pca', 'X_umap'
    layers: 'log1p', 'raw'

In [4]:
# Normal vs Tumor samples
ada_N = adata[adata.obs['NormalvsTumor'] == 'N', :].copy()
ada_T = adata[adata.obs['NormalvsTumor'] == 'T', :].copy()
ada_N.shape, ada_T.shape

((2057, 2608), (2039, 2608))

In [5]:
# build a scTenifoldXct object
xct_N = st.scTenifoldXct(data = ada_N, 
                        cell_names = ['B cells', 'Fibroblasts'],
                        obs_label = "ident",
                        rebuild_GRN = True, # timer
                        GRN_file_dir = 'Net_example/Net_B2Fib_N/',  
                        verbose = True,
                        n_cpus = -1)

(cell, feature): (65, 2608)
(cell, feature): (36, 2608)
selected 249 LR pairs
building GRN of B cells...
ray init, using 16 CPUs
execution time of making pcNet: 36.17 s
building GRN of Fibroblasts...
ray init, using 16 CPUs
execution time of making pcNet: 21.18 s
building correspondence...
concatenating GRNs...
scTenifoldXct init completed


In [7]:
xct_T = st.scTenifoldXct(data = ada_T, 
                        cell_names = ['B cells', 'Fibroblasts'],
                        obs_label = "ident",
                        rebuild_GRN = True, 
                        GRN_file_dir = 'Net_example/Net_B2Fib_T/',  
                        verbose = True,
                        n_cpus = 8)

(cell, feature): (39, 2608)
(cell, feature): (59, 2608)
selected 338 LR pairs
building GRN of B cells...
ray init, using 8 CPUs
execution time of making pcNet: 23.31 s
building GRN of Fibroblasts...
ray init, using 8 CPUs
execution time of making pcNet: 30.61 s
building correspondence...
concatenating GRNs...
scTenifoldXct init completed


In [8]:
# merge two scTenifoldXct objects
XCTs = st.merge_scTenifoldXct(xct_T, xct_N)

merging samples and building correspondence...
merge_scTenifoldXct init completed


In [None]:
# # show model architecture
# XCTs._nn_trainer.arch()

In [None]:
# load model if trained
# XCTs.trainer.load_model_states('model_mergeXct')

In [9]:
# get embeddings
emb = XCTs.get_embeds(train = True)
emb.shape

training...: 100%|████████████████████████████████████████████████████████████████████████████| 1000/1000 [20:14<00:00,  1.21s/it]


(10432, 3)

In [10]:
# # save model
XCTs.trainer.save_model_states('model_mergeXct_B2Fib')

save model to model_mergeXct_B2Fib/model_1.th
save model to model_mergeXct_B2Fib/model_2.th
save model to model_mergeXct_B2Fib/model_3.th
save model to model_mergeXct_B2Fib/model_4.th


In [None]:
# # show loss (only when train)
# XCTs.plot_losses()

In [11]:
# get significant L-R pairs
XCTs.nn_aligned_diff(emb) 
xcts_pairs_diff = XCTs.chi2_diff_test()
xcts_pairs_diff

computing pair-wise euclidean distances...
computing pair-wise euclidean distances...
merged pair-wise distances

Total enriched: 4 / 6801664


Unnamed: 0,ligand,receptor,dist,correspondence,dist.1,correspondence.1,diff2,FC,p_val,q_val,enriched_rank
VEGFA_NRP1,VEGFA,NRP1,0.01135,0.938058,0.019848,0.0,7.2e-05,15.011393,0.000107,0.01837,1
MDK_SDC2,MDK,SDC2,0.011746,1.418245,0.003256,13.266869,7.2e-05,14.985146,0.000108,0.018579,2
MDK_ITGA6,MDK,ITGA6,0.017074,0.082409,0.009189,1.420393,6.2e-05,12.922403,0.000325,0.040788,3
ANG_PLXNB2,ANG,PLXNB2,0.008679,0.988701,0.016268,0.249772,5.8e-05,11.972731,0.00054,0.049846,4
