In [1]:
import os
import numpy as np
import pandas as pd
import random
import time
import scanpy as sc
import torch

import warnings
warnings.filterwarnings('ignore')
import logging
logging.basicConfig(level=logging.INFO)
import scDualGN

def seed_torch(seed=666):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.enabled = False
seed_torch()

In [2]:
adata = sc.read_h5ad('/media/bgi/zhouruilong/paper_test/cluster/our_model/scRNA-seq数据新/HIV/HIV.h5ad')
adata.obs['celltype_str'] = adata.obs['celltype']
adata.obs['celltype'].cat.categories = list(range(0,8))
adata.obs['celltype'] = adata.obs['celltype'].astype(np.int)

In [3]:
adata

AnnData object with n_obs × n_vars = 59286 × 16980
    obs: 'celltype', 'celltype_str'

In [4]:
sc.pp.highly_variable_genes(adata, n_top_genes=2500)
adata = adata[:, adata.var.highly_variable]
sc.pp.scale(adata,max_value=10)

In [5]:
adata

AnnData object with n_obs × n_vars = 59286 × 2500
    obs: 'celltype', 'celltype_str'
    var: 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'hvg'

In [None]:
#kmeans
start = time.time()
z_final, y_pred_lab, model = scDualGN.run_scDualGN(adata, n_cluster=8,n_z=32,n_epoch_update_pq=5, n_epochs=30,beta_daulvae=10,gamma_dualvae=4,GPU_id=1)
end = time.time()

print('running time = {}'.format(end-start))

INFO:scDualGN.run:device: cuda:1
INFO:scDualGN.run:pretrain device: cuda:1
INFO:scDualGN.run:start pretraining...
INFO:scDualGN.run:Epoch 0/50,Overall loss:16661.7837,MSE:1634.7912,MSE1:78.3318,KL: 54.4616
INFO:scDualGN.run:Epoch 1/50,Overall loss:15400.7522,MSE:1538.9527,MSE1:2.5083,KL: 119.1711
INFO:scDualGN.run:Epoch 2/50,Overall loss:15300.9876,MSE:1529.2079,MSE1:1.8877,KL: 135.8102
INFO:scDualGN.run:Epoch 3/50,Overall loss:15229.1063,MSE:1522.0935,MSE1:1.6949,KL: 139.2111
INFO:scDualGN.run:Epoch 4/50,Overall loss:15162.1329,MSE:1515.4911,MSE1:1.4372,KL: 147.3379
INFO:scDualGN.run:Epoch 5/50,Overall loss:15099.7342,MSE:1509.3171,MSE1:1.2611,KL: 151.8857
INFO:scDualGN.run:Epoch 6/50,Overall loss:15038.0339,MSE:1503.1942,MSE1:1.1323,KL: 156.2815
INFO:scDualGN.run:Epoch 7/50,Overall loss:14975.7000,MSE:1497.0082,MSE1:0.9979,KL: 162.6222
INFO:scDualGN.run:Epoch 8/50,Overall loss:14911.3404,MSE:1490.6234,MSE1:0.8493,KL: 170.8835
INFO:scDualGN.run:Epoch 9/50,Overall loss:14847.7488,MSE:1

In [6]:
#leiden
start = time.time()
z_final, y_pred_lab, model = scDualGN.run_scDualGN(adata,n_z=32,n_epoch_update_pq=5, n_epochs=30,beta_daulvae=10,gamma_dualvae=4,cluster_alg='leiden',GPU_id=2,n_neighbors=50)
end = time.time()

print('running time = {}'.format(end-start))

INFO:scDualGN.run:device: cuda:2
INFO:scDualGN.run:pretrain device: cuda:2
INFO:scDualGN.run:start pretraining...
INFO:scDualGN.run:Epoch 0/50,Overall loss:16661.7837,MSE:1634.7912,MSE1:78.3318,KL: 54.4616
INFO:scDualGN.run:Epoch 1/50,Overall loss:15400.7522,MSE:1538.9527,MSE1:2.5083,KL: 119.1711
INFO:scDualGN.run:Epoch 2/50,Overall loss:15300.9876,MSE:1529.2079,MSE1:1.8877,KL: 135.8102
INFO:scDualGN.run:Epoch 3/50,Overall loss:15229.1063,MSE:1522.0935,MSE1:1.6949,KL: 139.2111
INFO:scDualGN.run:Epoch 4/50,Overall loss:15162.1329,MSE:1515.4911,MSE1:1.4372,KL: 147.3379
INFO:scDualGN.run:Epoch 5/50,Overall loss:15099.7342,MSE:1509.3171,MSE1:1.2611,KL: 151.8857
INFO:scDualGN.run:Epoch 6/50,Overall loss:15038.0339,MSE:1503.1942,MSE1:1.1323,KL: 156.2815
INFO:scDualGN.run:Epoch 7/50,Overall loss:14975.7000,MSE:1497.0082,MSE1:0.9979,KL: 162.6222
INFO:scDualGN.run:Epoch 8/50,Overall loss:14911.3404,MSE:1490.6234,MSE1:0.8493,KL: 170.8835
INFO:scDualGN.run:Epoch 9/50,Overall loss:14847.7488,MSE:1

INFO:scDualGN.run:Epoch 28/30, Loss - overall: 13831.3253,daul_VAE:13831.3000,KL:0.0173,Center:76.6598
INFO:scDualGN.run:Epoch 29/30, Loss - overall: 13792.0892,daul_VAE:13792.0642,KL:0.0172,Center:76.3561
INFO:scDualGN.run:Epoch 30/30, Loss - overall: 13819.0430,daul_VAE:13819.0177,KL:0.0172,Center:76.3791
INFO:scDualGN.run:scDualGN train finished.
INFO:scDualGN.evalution:acc=0.7317, nmi=0.7040, ari=0.6196


running time = 211.85083961486816
