In [9]:
import os
import torch
import pandas as pd
import scanpy as sc
from sklearn import metrics
import multiprocessing as mp
import matplotlib.pyplot as plt
import numpy as np
import gc
import time

In [10]:
from GraphST import GraphST

In [11]:
# Run device, by default, the package is implemented on 'cpu'. We recommend using GPU.
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')

# the location of R, which is necessary for mclust algorithm. Please replace it with local R installation path
os.environ['R_HOME'] = '/home/anaconda3/envs/GraphST/lib/R'

In [12]:
import rpy2.robjects as robjects
from rpy2.robjects.packages import importr

mclust = importr('mclust')

In [13]:
# read data
# file_fold = '/home/yahui/Yahui/Projects/data/S1_A1_S3_A1/' #please replace 'file_fold' with the download path
# adata = sc.read_h5ad(file_fold + 'filtered_feature_bc_matrix.h5ad')
file_fold = '/home/workspace2/zhaofangyuan/data_h5ad/'
adata = sc.read_h5ad(file_fold + 'aging.h5ad')
adata.var_names_make_unique()
adata

AnnData object with n_obs × n_vars = 378918 × 374
    obs: 'fov', 'center_x', 'center_y', 'min_x', 'max_x', 'min_y', 'max_y', 'age', 'clust_annot', 'slice', 'organism_ontology_term_id', 'sex_ontology_term_id', 'suspension_type', 'cell_type_ontology_term_id', 'assay_ontology_term_id', 'tissue_ontology_term_id', 'disease_ontology_term_id', 'self_reported_ethnicity_ontology_term_id', 'development_stage_ontology_term_id', 'donor_id', 'is_primary_data', 'cell_type_annot', 'cell_type', 'assay', 'disease', 'organism', 'sex', 'tissue', 'self_reported_ethnicity', 'development_stage', 'slice_id', 'ct', 'ct_sub'
    var: 'feature_is_filtered', 'feature_name', 'feature_reference', 'feature_biotype'
    uns: 'batch_condition', 'cell_type_colors', 'schema_version', 'title'
    obsm: 'X_pca', 'X_spatial', 'X_umap', 'spatial'

In [14]:
adata.obs['slice_id'] = adata.obs['slice_id'].astype('category')
adata.obs['ct'] = adata.obs['ct'].astype('category')

In [15]:
adata.obs['slice_id'].cat.categories

Index(['MsBrainAgingSpatialDonor_10_0', 'MsBrainAgingSpatialDonor_10_1',
       'MsBrainAgingSpatialDonor_10_2', 'MsBrainAgingSpatialDonor_11_0',
       'MsBrainAgingSpatialDonor_11_1', 'MsBrainAgingSpatialDonor_11_2',
       'MsBrainAgingSpatialDonor_12_0', 'MsBrainAgingSpatialDonor_12_1',
       'MsBrainAgingSpatialDonor_1_0', 'MsBrainAgingSpatialDonor_2_0',
       'MsBrainAgingSpatialDonor_2_1', 'MsBrainAgingSpatialDonor_3_0',
       'MsBrainAgingSpatialDonor_3_1', 'MsBrainAgingSpatialDonor_4_0',
       'MsBrainAgingSpatialDonor_4_1', 'MsBrainAgingSpatialDonor_4_2',
       'MsBrainAgingSpatialDonor_5_0', 'MsBrainAgingSpatialDonor_5_1',
       'MsBrainAgingSpatialDonor_5_2', 'MsBrainAgingSpatialDonor_6_0',
       'MsBrainAgingSpatialDonor_6_1', 'MsBrainAgingSpatialDonor_6_2',
       'MsBrainAgingSpatialDonor_7_0', 'MsBrainAgingSpatialDonor_7_1',
       'MsBrainAgingSpatialDonor_7_2', 'MsBrainAgingSpatialDonor_8_0',
       'MsBrainAgingSpatialDonor_8_1', 'MsBrainAgingSpatialDonor_8_2'

In [16]:
# Select slice
import anndata as ad
# adata_raw0=adata[adata.obs['slice_id']=='MsBrainAgingSpatialDonor_11_0']#3
# adata_raw1=adata[adata.obs['slice_id']=='MsBrainAgingSpatialDonor_11_1']#4
# adata_raw2=adata[adata.obs['slice_id']=='MsBrainAgingSpatialDonor_11_2']#5
adata_raw3=adata[adata.obs['slice_id']=='MsBrainAgingSpatialDonor_10_0']#2
adata_raw4=adata[adata.obs['slice_id']=='MsBrainAgingSpatialDonor_10_1']#1
adatas=[adata_raw3,adata_raw4]
adatas = ad.concat(adatas)

adatas.obs['slice_id'] = adatas.obs['slice_id'].astype('category')
adatas.obs['ct'] = adatas.obs['ct'].astype('category')

In [17]:
adata_raw = adatas.copy()

In [18]:
n_clusters = np.unique(adata_raw.obs['tissue']).shape[0]

In [19]:
adata = adata_raw.copy()
adata.obs['slice_id'] = adata.obs['slice_id'].astype('category')
slice_list = adata.obs['slice_id'].cat.categories

In [20]:
import tracemalloc

tracemalloc.start()  

time_st = time.time()
adata = adata_raw.copy()
adata.obs['slice_id'] = adata.obs['slice_id'].astype('category')
slice_list = adata.obs['slice_id'].cat.categories

# define model
model = GraphST.GraphST(adata, device=device)
# run model
adata = model.train()

tool = 'mclust' # mclust, leiden, and louvain

# clustering
from GraphST.utils import clustering


if tool == 'mclust':
   clustering(adata, n_clusters, method=tool) # For DLPFC dataset, we use optional refinement step.
elif tool in ['leiden', 'louvain']:
   clustering(adata, n_clusters, method=tool, start=0.1, end=2.0, increment=0.01)

time_ed = time.time()
time_cost = time_ed-time_st
print(time_cost/60)

size, peak = tracemalloc.get_traced_memory()
tracemalloc.stop()
memory=peak /1024/1024

adata.uns['memory']=memory
adata.uns['time'] = time_cost

adata.write_h5ad(f'./aging_multi_5_deepst.h5ad')
del adata
gc.collect()


  dispersion = np.log(dispersion)


Begin to train ST data...


  7%|▋         | 40/600 [00:57<12:54,  1.38s/it]