In [1]:
import os
import numpy as np
import pandas as pd
import random
import time
import scanpy as sc
import torch

import warnings
warnings.filterwarnings('ignore')
import logging
logging.basicConfig(level=logging.INFO)
import scDualGN

def seed_torch(seed=666):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.enabled = False
seed_torch()

In [2]:
#load data
adata = sc.read_h5ad('/media/bgi/zhouruilong/paper_test/cluster/our_model/Patient_PBMCs/scp_scanpy.h5ad')
adata.obs['celltype'] = adata.obs['cell_type']
adata.obs['celltype_str'] = adata.obs['cell_type']
                                
arr = np.array(adata.obs['celltype'])
np.place(arr,arr=='T',[0])
np.place(arr,arr=='B',[1])
np.place(arr,arr=='NK',[2])
np.place(arr,arr=='Mono',[3])
np.place(arr,arr=='DC',[4])


arr = arr.astype(np.int32)
adata.obs['celltype'] = arr

In [3]:
sc.pp.filter_cells(adata, min_counts=200)
sc.pp.filter_genes(adata, min_cells=3)
sc.pp.normalize_total(adata,target_sum=1e4)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, n_top_genes=2500)
adata = adata[:, adata.var.highly_variable]
    #adata.X = minmax_scale(adata.X, feature_range=(0, 1), axis=1, copy=False)
sc.pp.scale(adata,max_value=10)

In [4]:
#kmeans
start = time.time()
z_final, y_pred_lab, model = scDualGN.run_scDualGN(adata, n_cluster=5,n_z=32,n_epoch_update_pq=5, n_epochs=30,alpha_dualvae=0.025,beta_daulvae=20,gamma_dualvae=2,GPU_id=1)
end = time.time()

print('running time = {}'.format(end-start))

INFO:scDualGN.run:device: cuda:1
INFO:scDualGN.run:pretrain device: cuda:1
INFO:scDualGN.run:start pretraining...
INFO:scDualGN.run:Epoch 0/50,Overall loss:15001.8027,MSE:727.1671,MSE1:228.9205,KL: 24.7618
INFO:scDualGN.run:Epoch 1/50,Overall loss:10991.0715,MSE:546.8658,MSE1:26.1522,KL: 58.0584
INFO:scDualGN.run:Epoch 2/50,Overall loss:10605.2067,MSE:529.2635,MSE1:9.0019,KL: 77.3024
INFO:scDualGN.run:Epoch 3/50,Overall loss:10475.8142,MSE:523.0767,MSE1:6.0657,KL: 85.9542
INFO:scDualGN.run:Epoch 4/50,Overall loss:10375.3375,MSE:518.1557,MSE1:4.9840,KL: 90.1981
INFO:scDualGN.run:Epoch 5/50,Overall loss:10296.5678,MSE:514.2815,MSE1:4.2905,KL: 94.3113
INFO:scDualGN.run:Epoch 6/50,Overall loss:10241.3780,MSE:511.5771,MSE1:3.6820,KL: 98.8817
INFO:scDualGN.run:Epoch 7/50,Overall loss:10198.6434,MSE:509.4809,MSE1:3.2345,KL: 102.2671
INFO:scDualGN.run:Epoch 8/50,Overall loss:10157.3681,MSE:507.4387,MSE1:2.9991,KL: 103.8146
INFO:scDualGN.run:Epoch 9/50,Overall loss:10115.7431,MSE:505.3712,MSE1:

INFO:scDualGN.run:Epoch 29/30, Loss - overall: 8828.0156,daul_VAE:8827.9804,KL:0.0267,Center:92.0811
INFO:scDualGN.run:Epoch 30/30, Loss - overall: 8824.2541,daul_VAE:8824.2188,KL:0.0266,Center:91.7241
INFO:scDualGN.run:scDualGN train finished.
INFO:scDualGN.evalution:acc=0.9019, nmi=0.7987, ari=0.8511


running time = 331.5812213420868


In [4]:
#leiden
start = time.time()
z_final, y_pred_lab, model = scDualGN.run_scDualGN(adata,n_z=32,n_epoch_update_pq=5, n_epochs=30,alpha_dualvae=0.025,beta_daulvae=20,gamma_dualvae=2,GPU_id=1,
                                                   cluster_alg='leiden',n_neighbors=50)
end = time.time()

print('running time = {}'.format(end-start))

INFO:scDualGN.run:device: cuda:1
INFO:scDualGN.run:pretrain device: cuda:1
INFO:scDualGN.run:start pretraining...
INFO:scDualGN.run:Epoch 0/50,Overall loss:15001.8027,MSE:727.1671,MSE1:228.9205,KL: 24.7618
INFO:scDualGN.run:Epoch 1/50,Overall loss:10991.0715,MSE:546.8658,MSE1:26.1522,KL: 58.0584
INFO:scDualGN.run:Epoch 2/50,Overall loss:10605.2067,MSE:529.2635,MSE1:9.0019,KL: 77.3024
INFO:scDualGN.run:Epoch 3/50,Overall loss:10475.8142,MSE:523.0767,MSE1:6.0657,KL: 85.9542
INFO:scDualGN.run:Epoch 4/50,Overall loss:10375.3375,MSE:518.1557,MSE1:4.9840,KL: 90.1981
INFO:scDualGN.run:Epoch 5/50,Overall loss:10296.5678,MSE:514.2815,MSE1:4.2905,KL: 94.3113
INFO:scDualGN.run:Epoch 6/50,Overall loss:10241.3780,MSE:511.5771,MSE1:3.6820,KL: 98.8817
INFO:scDualGN.run:Epoch 7/50,Overall loss:10198.6434,MSE:509.4809,MSE1:3.2345,KL: 102.2671
INFO:scDualGN.run:Epoch 8/50,Overall loss:10157.3681,MSE:507.4387,MSE1:2.9991,KL: 103.8146
INFO:scDualGN.run:Epoch 9/50,Overall loss:10115.7431,MSE:505.3712,MSE1:

INFO:scDualGN.run:Epoch 29/30, Loss - overall: 8827.7202,daul_VAE:8827.6948,KL:0.0166,Center:91.5506
INFO:scDualGN.run:Epoch 30/30, Loss - overall: 8823.3218,daul_VAE:8823.2965,KL:0.0164,Center:91.1501
INFO:scDualGN.run:scDualGN train finished.
INFO:scDualGN.evalution:acc=0.7916, nmi=0.6868, ari=4.5247


running time = 447.1059534549713
