In [1]:
import os 
from deepst.DeepST import run
import matplotlib.pyplot as plt
from pathlib import Path
import scanpy as sc
import warnings
import torch
from sklearn.metrics import confusion_matrix
from scipy.optimize import linear_sum_assignment
import numpy as np
from sklearn.decomposition import PCA  # sklearn PCA is used because PCA in scanpy is not stable.
from sklearn.metrics import adjusted_rand_score
from sklearn.metrics import accuracy_score
warnings.filterwarnings("ignore")
import pandas as pd
from anndata import AnnData
from sklearn import metrics
import matplotlib.lines as mlines

In [2]:
slicename = '29'
n_domains = 8
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

In [3]:
folder_path = fr'C:\E\JSU\BIO\file\STrafer\params\merfish\DeepST\{slicename}'
os.makedirs(folder_path, exist_ok=True)
save_path = folder_path

In [4]:
deepen = run(save_path = save_path,
	task = "Identify_Domain",
	pre_epochs = 300, 
	epochs = 500, 
	use_gpu = True)

In [5]:
adata = sc.read_h5ad(fr"C:\E\JSU\BIO\file\STrafer\params\merfish\{slicename}.h5ad")
adata.obs["imagerow"] = adata.obsm['spatial'][:, 0]
adata.obs["imagecol"] = adata.obsm['spatial'][:, 1]
adata.obs["array_row"] = adata.obsm['spatial'][:, 0]
adata.obs["array_col"] = adata.obsm['spatial'][:, 1]


label_mapping = {'MPA': 1, 'MPN': 2, 'BST': 3, 'fx': 4, "PVH": 5, "PVT": 6, "V3": 7, 'PV': 0}
labels = adata.obs['ground_truth'].map(label_mapping)

adata.var_names_make_unique()
# pre-process

adata.layers['count'] = adata.X
sc.pp.filter_genes(adata, min_cells=50)
sc.pp.filter_genes(adata, min_counts=10)
sc.pp.normalize_total(adata, target_sum=1e6)
sc.pp.highly_variable_genes(adata, flavor="seurat_v3", layer='count', n_top_genes=150)


adata_X = PCA(n_components=150, random_state=42).fit_transform(adata.X)
adata.obsm['X_pca'] = adata_X
data = adata.obsm['X_pca']


adata.obsm["image_feat_pca"] =data


adata = adata[:, adata.var['highly_variable'] == True]
sc.pp.scale(adata)
adata.obs['layer_guess'] = labels
adata = deepen._get_augment(adata, spatial_type="LinearRegress", use_morphological=True)
graph_dict = deepen._get_graph(adata.obsm["spatial"], distType = "BallTree")

Physical distance calculting Done!
The number of nearest tie neighbors in physical distance is: 1.0032473389861085
Gene correlation calculting Done!
Morphological similarity calculting Done!
The weight result of image feature is added to adata.obsm['weights_matrix_all'] !


Find adjacent spots of each spot: 100%|██████████ [ time left: 00:00 ]


Step 1: Augment molecule expression is Done!
12.0000 neighbors per cell on average.
Step 2: Graph computing is Done!


In [6]:
deepst_embed = deepen._fit(
		data = data,
		graph_dict = graph_dict,)
adata.obsm["DeepST_embed"] = deepst_embed
adata = deepen._get_cluster_data(adata, n_domains=n_domains, priori = True)

Your task is in full swing, please wait


DeepST trains an initial model: 100%|██████████ [ time left: 00:00 ]
DeepST trains a final model: |           [ time left: 00:00 ]    


Step 3: DeepST training has been Done!
Current memory usage：1.7210 GB
Total time: 0.68 minutes
Your task has been completed, thank you
Of course, you can also perform downstream analysis on the processed data
Best resolution:  0.15999999999999998


In [7]:
# ARI = metrics.adjusted_rand_score(adata.obs['DeepST_refine_domain'], labels)
# adata.uns['ARI'] = ARI
pred = adata.obs['DeepST_refine_domain'].values.astype(int)

In [8]:
conf_mat = confusion_matrix(labels, pred, labels=np.arange(8))
row_ind, col_ind = linear_sum_assignment(-conf_mat)
mapping = {pred_label: true_label for true_label, pred_label in zip(row_ind, col_ind)}
pred = np.array([mapping[p] for p in pred])
adata.obs["pred"]= pred

In [9]:
ARI_s = adjusted_rand_score(pred, labels)
acc_s = accuracy_score(pred, labels)
print("ARI_s:", ARI_s)
print("acc_s", acc_s)

ARI_s: 0.1388359009966879
acc_s 0.4171026519935053
