In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scanpy as sc
sc.settings.verbosity = 0
import warnings
warnings.filterwarnings("ignore")

%load_ext autoreload
%autoreload 2

In [2]:
import GenKI as gk

In [3]:
ada_WT = sc.read_h5ad('data/microglial_seurat_WT.h5ad')
ada_WT.var_names = ada_WT.var_names.str.upper()

ada_WT

AnnData object with n_obs × n_vars = 1139 × 3000
    obs: 'sce_source', 'treatment', 'trem2_genotype', 'snn_cluster', 'nCount_RNA', 'nFeature_RNA'
    var: 'vst.mean', 'vst.variance', 'vst.variance.expected', 'vst.variance.standardized', 'vst.variable'

In [4]:
ada_WT.obs[['sce_source']].value_counts()

sce_source
WT_12         648
WT_untx       491
dtype: int64

In [5]:
'TREM2' in list(ada_WT.var_names)

True

In [6]:
ada_WT = ada_WT[ada_WT.obs['sce_source'] == 'WT_12', :].copy()
ada_WT

AnnData object with n_obs × n_vars = 648 × 3000
    obs: 'sce_source', 'treatment', 'trem2_genotype', 'snn_cluster', 'nCount_RNA', 'nFeature_RNA'
    var: 'vst.mean', 'vst.variance', 'vst.variance.expected', 'vst.variance.standardized', 'vst.variable'

In [7]:
KO_obj = gk.GenKI(ada_WT, 
                target_gene = ['TREM2'], 
                target_cell = None, 
                obs_label = 'ident',
                GRN_file_dir = 'GRNs',
                rebuild_GRN = False,
                pcNet_name = 'pcNet_Trem2_WT_12',
                verbose = True,
                n_cpus = 8)
print(KO_obj)

use all the cells (648) in adata
loading GRN from "GRNs/pcNet_Trem2_WT_12.npz"
init completed

*** Base Object ***
 ._counts: (648, 3000)
 ._net: (3000, 3000)
 ._target_gene: ['TREM2']


In [8]:
KO_obj(['TREM2']) # index

[2999]

In [9]:
data = KO_obj.data_init()
data

Data(x=[3000, 648], edge_index=[2, 1350000], y=[3000])

In [10]:
data_KO = KO_obj.KO_data_init()
data_KO 

set expression of "['TREM2']" to zeros and remove edges


Data(x=[3000, 648], edge_index=[2, 1345574], y=[3000])

#### model

In [None]:
model = gk.train_VGAEmodel(data)

In [None]:
# gk.save_model(model, 'Microglia_TREM2_WT_12')

In [11]:
model = gk.load_model(KO_obj, 'Microglia_TREM2_WT_12')

load model parameters from "./model/Microglia_TREM2_WT_12.th"


In [12]:
model.eval()

VGAE(
  (encoder): VariationalGCNEncoder(
    (conv1): GCNConv(648, 4)
    (conv_mu): GCNConv(4, 2)
    (conv_logstd): GCNConv(4, 2)
  )
  (decoder): InnerProductDecoder()
)

In [None]:
# for name, parameter in model.named_parameters():
#     print(name)
#     print(parameter) # bias + coeff
#     print(parameter.shape)
#     print('\n')

In [13]:
z_mu, z_std = gk.get_latent_vars(data, model)

In [14]:
z_mu_KO, z_std_KO = gk.get_latent_vars(data_KO, model)

### get distance between WT and KO

In [15]:
dis = gk.get_distance(z_mu_KO, z_std_KO, z_mu, z_std, by = 'KL')
print(dis.shape)

dis[KO_obj(['TREM2'])]

(3000,)


array([1.25540649e+28])

In [None]:
# dis = amp.get_distance(z_mu, z_std, z_mu_KO, z_std_KO, by = 'KL')
# print(dis.shape)

# dis[KO_obj('Trem2')]

In [16]:
null = gk.pmt(data, data_KO, model, n = 100, by = 'KL')
null.shape

Permutating: 100%|██████████| 100/100 [01:03<00:00,  1.58it/s]


(100, 3000)

In [17]:
res = gk.get_generank(data, dis, null)
#                       save_significant_as = 'gene_list_Microglia_TREM2')
res

Unnamed: 0,dis,index,hit,rank
TREM2,1.255406e+28,2999,100,1
CTSD,4.323314e+25,1105,100,2
APOE,3.548728e+25,2964,100,3
CD74,1.580187e+25,2998,100,4
LYZ2,5.440485e+24,2961,100,5
...,...,...,...,...
COX6A1,1.277547e+15,1476,100,144
ATOX1,1.272084e+15,2027,100,145
QK,1.189416e+15,1426,100,146
CAMK2N1,1.174189e+15,1372,100,147


In [18]:
res.loc['TREM2']

dis      1.255406e+28
index    2.999000e+03
hit      1.000000e+02
rank     1.000000e+00
Name: TREM2, dtype: float64

In [19]:
res_gsea = gk.get_generank_gsea(data, dis) 
# save_as = 'Microglia_TREM2')
res_gsea

Unnamed: 0,dis,rank,dis_norm
TREM2,1.255406e+28,1,2.287351
CTSD,4.323314e+25,2,2.221377
APOE,3.548728e+25,3,2.218843
CD74,1.580187e+25,4,2.208279
LYZ2,5.440485e+24,5,2.193903
...,...,...,...
CYB5RL,2.996686e-08,2860,-2.462781
NMNAT3,2.412275e-08,2861,-2.500306
ELOVL6,2.342130e-08,2862,-2.505432
TMEM121,2.272594e-08,2863,-2.510672
