In [1]:
import os
from deepst.DeepST import run
import matplotlib.pyplot as plt
from pathlib import Path
import scanpy as sc
import torch
from sklearn.metrics import adjusted_rand_score
from sklearn.metrics import accuracy_score
import numpy as np
import warnings
from sklearn.metrics import confusion_matrix
from scipy.optimize import linear_sum_assignment
warnings.filterwarnings("ignore")
import pandas as pd
from sklearn import metrics
import matplotlib.lines as mlines

In [2]:
data_path = r"C:\E\JSU\BIO\file\SpaGCN-master\SpaGCN-master\tutorial"  #### to your path
data_name = '151676'  #### project name
slicename = '151676'
save_path = fr"C:\E\JSU\BIO\file\THItoGene-main\mymodel\result\DLPFC\basicmodel\DeepST"  #### save path
n_domains = 7 ###### the number of spatial domains.
deepen = run(save_path=save_path,
			 task="Identify_Domain",
			 #### DeepST includes two tasks, one is "Identify_Domain" and the other is "Integration"
			 pre_epochs=300,  ####  choose the number of training
			 epochs=500,  #### choose the number of training
			 use_gpu=True)

In [3]:
spatial_data = pd.read_csv(fr"C:\E\JSU\BIO\file\SpaGCN-master\SpaGCN-master\tutorial\{slicename}\spatial\tissue_positions_list.csv", sep=",", header=None)
spatial_data.columns = ['barcode', 'in_tissue', 'row', 'col', 'pxl_row_in_fullres', 'pxl_col_in_fullres']
spatial_data = spatial_data[spatial_data['in_tissue'] == 1]  # remain cells within tissue (denoted by 1)
meta = pd.read_csv(fr"C:\E\JSU\BIO\file\SpaGCN-master\SpaGCN-master\tutorial\{slicename}\metadata.tsv", sep="\t",index_col=False)
meta.drop(columns=['row', 'col'], inplace=True)  # delete repeated columns

data_ = spatial_data.merge(meta, on='barcode', how='right')
data_ = data_[['barcode', 'row', 'col', 'pxl_row_in_fullres', 'pxl_col_in_fullres', 'expr_chrM']]
data_ = data_.dropna(subset=['expr_chrM'])  # delete missing data
# labels
label_mapping = {'L1': 1, 'L2': 2, 'L3': 3, 'L4': 4, 'L5': 5, 'L6': 6, 'WM': 0} # DLPFC
labels = data_['expr_chrM'].map(label_mapping)
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
labels = torch.LongTensor(labels).to(device)

In [4]:

data_path = r"C:\E\JSU\BIO\file\SpaGCN-master\SpaGCN-master\tutorial"
###### Read in 10x Visium data, or user can read in themselves.
adata = deepen._get_adata(platform="Visium", data_path=data_path, data_name=data_name)
###### Segment the Morphological Image
adata = deepen._get_image_crop(adata, data_name=data_name)

Tiling image: 100%|██████████ [ time left: 00:00 ]
Extract image feature: 100%|██████████ [ time left: 00:00 ]


In [5]:
adata = deepen._get_augment(adata, spatial_type="LinearRegress", use_morphological=True)

Physical distance calculting Done!
The number of nearest tie neighbors in physical distance is: 31.839306358381503
Gene correlation calculting Done!
Morphological similarity calculting Done!
The weight result of image feature is added to adata.obsm['weights_matrix_all'] !


Find adjacent spots of each spot: 100%|██████████ [ time left: 00:00 ]


Step 1: Augment molecule expression is Done!


In [6]:
adata.obs["imagerow"] = spatial_data['pxl_row_in_fullres']
adata.obs["imagecol"] = spatial_data['pxl_col_in_fullres']
adata.obs["array_row"] = spatial_data['row']
adata.obs["array_col"] = spatial_data['col']


In [7]:
adata = adata[data_.index]

In [8]:
###### Build graphs. "distType" includes "KDTree", "BallTree", "kneighbors_graph", "Radius", etc., see adj.py
graph_dict = deepen._get_graph(adata.obsm["spatial"], distType="BallTree")

###### Enhanced data preprocessing
data = deepen._data_process(adata, pca_n_comps=200)

12.0000 neighbors per cell on average.
Step 2: Graph computing is Done!


In [9]:
deepst_embed = deepen._fit(
		data = data,
		graph_dict = graph_dict,)
adata.obsm["DeepST_embed"] = deepst_embed
adata = deepen._get_cluster_data(adata, n_domains=n_domains, priori = True)

Your task is in full swing, please wait


DeepST trains an initial model: 100%|██████████ [ time left: 00:00 ]
DeepST trains a final model: |           [ time left: 00:00 ]    


Step 3: DeepST training has been Done!
Current memory usage：4.2891 GB
Total time: 0.32 minutes
Your task has been completed, thank you
Of course, you can also perform downstream analysis on the processed data
Best resolution:  0.6799999999999997


In [10]:
adata.obs['ground_truth'] = labels.values

In [11]:
pred = adata.obs['DeepST_refine_domain'].values.astype(int)

In [15]:
conf_mat = confusion_matrix(labels.cpu(), pred, labels=np.arange(7))
row_ind, col_ind = linear_sum_assignment(-conf_mat)
mapping = {pred_label: true_label for true_label, pred_label in zip(row_ind, col_ind)}
pred = np.array([mapping[p] for p in pred])
adata.obs["pred"] = pred
ARI_s = adjusted_rand_score(pred, labels.cpu())
acc_s = accuracy_score(pred, labels.cpu())
print("ARI_s:", ARI_s)
print("acc_s", acc_s)

ARI_s: 0.48072948136019245
acc_s 0.6006995045176333
