In [1]:
import os
import numpy as np
import pandas as pd
import random
import time
import scanpy as sc
import torch

import warnings
warnings.filterwarnings('ignore')
import logging
logging.basicConfig(level=logging.INFO)
import scDualGN

def seed_torch(seed=666):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.enabled = False
seed_torch()

In [2]:
adata = sc.read_h5ad('/media/bgi/zhouruilong/paper_test/cluster/our_model/scRNA-seq数据新/breast cancers/breast_cancer.h5ad')
adata.obs['celltype_str'] = adata.obs['celltype']
adata.obs['celltype'].cat.categories = list(range(0,9))
adata.obs['celltype'] = adata.obs['celltype'].astype(np.int)

In [3]:
adata

AnnData object with n_obs × n_vars = 100064 × 29733
    obs: 'celltype', 'celltype_str'
    var: 'gene_ids'

In [4]:
sc.pp.highly_variable_genes(adata, n_top_genes=2500)
adata = adata[:, adata.var.highly_variable]
sc.pp.scale(adata,max_value=10)

In [5]:
adata

AnnData object with n_obs × n_vars = 100064 × 2500
    obs: 'celltype', 'celltype_str'
    var: 'gene_ids', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'hvg'

In [None]:
#kmeans
start = time.time()
z_final, y_pred_lab, model = scDualGN.run_scDualGN(adata, n_cluster=9,n_z=32,n_epoch_update_pq=5, n_epochs=30,alpha_dualvae=0.02,beta_daulvae=20,gamma_dualvae=4,GPU_id=1)
end = time.time()

print('running time = {}'.format(end-start))

INFO:scDualGN.run:device: cuda:1
INFO:scDualGN.run:pretrain device: cuda:1
INFO:scDualGN.run:start pretraining...
INFO:scDualGN.run:Epoch 0/50,Overall loss:18985.0642,MSE:939.7350,MSE1:47.2054,KL: 77.0896
INFO:scDualGN.run:Epoch 1/50,Overall loss:16434.3275,MSE:820.8069,MSE1:3.9349,KL: 122.4608
INFO:scDualGN.run:Epoch 2/50,Overall loss:15437.7873,MSE:771.2764,MSE1:2.3053,KL: 151.9344
INFO:scDualGN.run:Epoch 3/50,Overall loss:14832.1239,MSE:741.1442,MSE1:1.4230,KL: 177.3801
INFO:scDualGN.run:Epoch 4/50,Overall loss:14403.6483,MSE:719.7944,MSE1:0.9458,KL: 198.8441
INFO:scDualGN.run:Epoch 5/50,Overall loss:14090.6812,MSE:704.1865,MSE1:0.6513,KL: 217.3032
INFO:scDualGN.run:Epoch 6/50,Overall loss:13830.3279,MSE:691.1872,MSE1:0.4847,KL: 232.2749
INFO:scDualGN.run:Epoch 7/50,Overall loss:13604.5827,MSE:679.9086,MSE1:0.3751,KL: 245.5167
INFO:scDualGN.run:Epoch 8/50,Overall loss:13404.2581,MSE:669.8961,MSE1:0.2948,KL: 257.8583
INFO:scDualGN.run:Epoch 9/50,Overall loss:13234.0979,MSE:661.3886,M

In [6]:
#leiden
start = time.time()
z_final, y_pred_lab, model = scDualGN.run_scDualGN(adata,n_z=32,n_epoch_update_pq=5, n_epochs=30,alpha_dualvae=0.02,beta_daulvae=20,gamma_dualvae=4,GPU_id=1,n_neighbors=50)
end = time.time()

print('running time = {}'.format(end-start))

INFO:scDualGN.run:device: cuda:1
INFO:scDualGN.run:pretrain device: cuda:1
INFO:scDualGN.run:start pretraining...
INFO:scDualGN.run:Epoch 0/50,Overall loss:18985.0642,MSE:939.7350,MSE1:47.2054,KL: 77.0896
INFO:scDualGN.run:Epoch 1/50,Overall loss:16434.3275,MSE:820.8069,MSE1:3.9349,KL: 122.4608
INFO:scDualGN.run:Epoch 2/50,Overall loss:15437.7873,MSE:771.2764,MSE1:2.3053,KL: 151.9344
INFO:scDualGN.run:Epoch 3/50,Overall loss:14832.1239,MSE:741.1442,MSE1:1.4230,KL: 177.3801
INFO:scDualGN.run:Epoch 4/50,Overall loss:14403.6483,MSE:719.7944,MSE1:0.9458,KL: 198.8441
INFO:scDualGN.run:Epoch 5/50,Overall loss:14090.6812,MSE:704.1865,MSE1:0.6513,KL: 217.3032
INFO:scDualGN.run:Epoch 6/50,Overall loss:13830.3279,MSE:691.1872,MSE1:0.4847,KL: 232.2749
INFO:scDualGN.run:Epoch 7/50,Overall loss:13604.5827,MSE:679.9086,MSE1:0.3751,KL: 245.5167
INFO:scDualGN.run:Epoch 8/50,Overall loss:13404.2581,MSE:669.8961,MSE1:0.2948,KL: 257.8583
INFO:scDualGN.run:Epoch 9/50,Overall loss:13234.0979,MSE:661.3886,M

running time = 275.6020607948303
