In [1]:
import os
import numpy as np
import pandas as pd
import random
import time
import scanpy as sc
import torch

import warnings
warnings.filterwarnings('ignore')
import logging
logging.basicConfig(level=logging.INFO)
import scDualGN
import scanpy.external as sce

def seed_torch(seed=666):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.enabled = False
seed_torch()

sc.settings.verbosity = 3
sc.settings.set_figure_params(dpi=100)
sc.settings.set_figure_params(dpi_save=300)

In [None]:
#dataset5,9,15476
adata = sc.read_h5ad('/media/bgi/zhouruilong/deepMNN/data/dataset5.h5ad')
adata.obs['celltype_str'] = adata.obs['celltype']

arr = np.array(adata.obs['celltype'])
np.place(arr,arr=='CD4 T cell',[0])
np.place(arr,arr=='Monocyte_CD14',[1])
np.place(arr,arr=='CD8 T cell',[2])
np.place(arr,arr=='B cell',[3])
np.place(arr,arr=='NK cell',[4])
np.place(arr,arr=='Monocyte_FCGR3A',[5])
np.place(arr,arr=='Plasmacytoid dendritic cell',[6])
np.place(arr,arr=='Megakaryocyte',[7])
np.place(arr,arr=='Hematopoietic stem cell',[8])

arr = arr.astype(np.int32)
adata.obs['celltype'] = arr

In [None]:
adata

In [None]:
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, n_top_genes=2500)
adata = adata[:, adata.var.highly_variable]
sc.pp.scale(adata,max_value=10)

In [None]:
adata

In [None]:
#leiden
start = time.time()
z_final, y_pred_lab, model = scDualGN.run_scDualGN(adata,n_z=32,n_epoch_update_pq=5, n_epochs=30,beta_daulvae=20,gamma_dualvae=2,GPU_id=1,
                                                   cluster_alg='leiden',n_neighbors=20)
end = time.time()

print('running time = {}'.format(end-start))

In [None]:
adata.obs['celltype_scDualGN']=y_pred_lab
adata.obsm['X_scDualGN']=z_final

#sc.pp.neighbors(adata,n_neighbors=20,use_rep="X_scDualGN") 
#sc.tl.umap(adata)

#adata.obs['celltype'] = adata.obs['celltype'].astype('category')
adata.obs['celltype_str'] = adata.obs['celltype_str'].astype('category')
adata.obs['celltype_scDualGN'] = adata.obs['celltype_scDualGN'].astype('category')

In [None]:
#harmony
sce.pp.harmony_integrate(adata, key='batchlb', basis='X_scDualGN', adjusted_basis='X_harmony')
sc.pp.neighbors(adata,use_rep="X_harmony") 
sc.tl.umap(adata)

sc.pl.umap(adata, color=['celltype_str'],save='datset5_celltype_str_scDualGN.pdf')
sc.pl.umap(adata, color=['batchlb'],save='datset5_batch_scDualGN.pdf')