In [1]:
import os
import numpy as np
import pandas as pd
import random
import time
import scanpy as sc
import torch

import warnings
warnings.filterwarnings('ignore')
import logging
logging.basicConfig(level=logging.INFO)
import scDualGN

def seed_torch(seed=666):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.enabled = False
seed_torch()

In [2]:
adata = sc.read_h5ad('/media/bgi/zhouruilong/genebank_new/tfs11/ST_MCHRI_BIGDATA/zhouruilong/scanpy_plot/covid_6.h5ad')
del adata.obsm['X_tsne'],adata.obsm['X_pca'],adata.obsm['har_emb']

adata.obs['celltype'] = adata.obs['majorType']
del adata.obs['majorType']
adata.obs['celltype_str'] = adata.obs['celltype']

arr = np.array(adata.obs['celltype'])
np.place(arr,arr=='B',[0])
np.place(arr,arr=='CD8',[1])
np.place(arr,arr=='Mono',[2])
np.place(arr,arr=='CD4',[3])
np.place(arr,arr=='NK',[4])
np.place(arr,arr=='Macro',[5])
np.place(arr,arr=='DC',[6])
np.place(arr,arr=='Plasma',[7])
np.place(arr,arr=='Mega',[8])
np.place(arr,arr=='Epi',[9])
np.place(arr,arr=='Neu',[10])
np.place(arr,arr=='Mast',[11])

arr = arr.astype(np.int32)
adata.obs['celltype'] = arr
del arr
print(adata)

AnnData object with n_obs × n_vars = 1462702 × 2499
    obs: 'celltype', 'sampleID', 'PatientID', 'datasets', 'City', 'Age', 'Sex', 'Sample type', 'CoVID-19 severity', 'Sample time', 'Sampling day (Days after symptom onset)', 'SARS-CoV-2', 'Single cell sequencing platform', 'BCR single cell sequencing', 'TCR single cell sequencing', 'Outcome', 'Comorbidities', 'COVID-19-related medication and anti-microbials', 'Leukocytes [G/L]', 'Neutrophils [G/L]', 'Lymphocytes [G/L]', 'Unpublished', 'celltype_str'
    var: 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'hvg', 'neighbors', 'pca'
    obsp: 'connectivities', 'distances'


In [None]:
#kmeans
start = time.time()
z_final, y_pred_lab, model = scDualGN.run_scDualGN(adata, n_cluster=12,n_z=32,n_epoch_update_pq=5, n_epochs=30,alpha_dualvae=0.04,beta_daulvae=20,gamma_dualvae=4,GPU_id=2)
end = time.time()

print('running time = {}'.format(end-start))

INFO:scDualGN.run:device: cuda:2
INFO:scDualGN.run:pretrain device: cuda:2
INFO:scDualGN.run:start pretraining...
INFO:scDualGN.run:Epoch 0/50,Overall loss:32022.2834,MSE:1513.4809,MSE1:438.0606,KL: 10.5723
INFO:scDualGN.run:Epoch 1/50,Overall loss:29067.1840,MSE:1394.8473,MSE1:292.3866,KL: 17.2661
INFO:scDualGN.run:Epoch 2/50,Overall loss:26585.6889,MSE:1287.9692,MSE1:206.3464,KL: 22.9920
INFO:scDualGN.run:Epoch 3/50,Overall loss:24712.1004,MSE:1204.6203,MSE1:154.6500,KL: 27.3390
INFO:scDualGN.run:Epoch 4/50,Overall loss:23362.5169,MSE:1143.9201,MSE1:120.7213,KL: 30.7560
INFO:scDualGN.run:Epoch 5/50,Overall loss:22398.9088,MSE:1100.4845,MSE1:96.9687,KL: 33.5856
INFO:scDualGN.run:Epoch 6/50,Overall loss:21701.1238,MSE:1069.0760,MSE1:79.5400,KL: 36.0701
INFO:scDualGN.run:Epoch 7/50,Overall loss:21179.4102,MSE:1045.6500,MSE1:66.2190,KL: 38.3769
