In [1]:
import os 
from deepst.DeepST import run
import matplotlib.pyplot as plt
from pathlib import Path
import scanpy as sc
import warnings
import torch
from sklearn.metrics import confusion_matrix
from scipy.optimize import linear_sum_assignment
import numpy as np
from sklearn.decomposition import PCA  # sklearn PCA is used because PCA in scanpy is not stable.
from sklearn.metrics import adjusted_rand_score
from sklearn.metrics import accuracy_score
warnings.filterwarnings("ignore")
import pandas as pd
from anndata import AnnData
from sklearn import metrics
import matplotlib.lines as mlines

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
slicename = 'BZ14'
n_domains = 4

In [3]:
folder_path = fr'C:\E\JSU\BIO\file\STrafer\params\starmap\DeepST\{slicename}'
os.makedirs(folder_path, exist_ok=True)
save_path = folder_path

In [4]:
deepen = run(save_path = save_path,
	task = "Identify_Domain",
	pre_epochs = 300, 
	epochs = 500, 
	use_gpu = True)

In [5]:
expr_path = fr"C:\E\JSU\BIO\file\STrafer\params\starmap\starmap_expr_{slicename}.csv"
spatial_path = fr"C:\E\JSU\BIO\file\STrafer\params\starmap\starmap_spatial_{slicename}.csv"
meta = pd.read_csv(expr_path, index_col=0)
spatial_data = pd.read_csv(spatial_path, index_col=0)
data = spatial_data.merge(meta, left_index=True, right_index=True, how='right')
# labels = data['z']
labels = data['z'].replace(4, 0)

adata = AnnData(X=meta.values)
adata.obsm['spatial'] = spatial_data[['x', 'y']].values
adata.var_names = meta.columns
adata.obs_names = spatial_data.index
adata.write_h5ad(fr"C:\E\JSU\BIO\file\STrafer\params\starmap\starmap_{slicename}.h5ad")
adata = sc.read_h5ad(fr"C:\E\JSU\BIO\file\STrafer\params\starmap\starmap_{slicename}.h5ad")
adata = adata[data.index]
adata.var_names_make_unique()
# pre-process
# adata.layers['count'] = adata.X.toarray()
adata.layers['count'] = adata.X
sc.pp.filter_genes(adata, min_cells=50)
sc.pp.filter_genes(adata, min_counts=10)
sc.pp.normalize_total(adata, target_sum=1e6)
sc.pp.highly_variable_genes(adata, flavor="seurat_v3", layer='count', n_top_genes=140)
adata = adata[:, adata.var['highly_variable'] == True]
sc.pp.scale(adata)

In [6]:
adata.obs['layer_guess'] = labels

from sklearn.decomposition import PCA  # sklearn PCA is used because PCA in scanpy is not stable.
adata_X = PCA(n_components=140, random_state=42).fit_transform(adata.X)
adata.obsm['X_pca'] = adata_X

In [7]:
data = adata.obsm['X_pca']
adata.obsm["image_feat_pca"] =data

In [8]:
adata.obs["imagerow"] = spatial_data['x']
adata.obs["imagecol"] = spatial_data['y']
adata.obs["array_row"] = spatial_data['x']
adata.obs["array_col"] = spatial_data['y']

In [9]:
adata = deepen._get_augment(adata, spatial_type="LinearRegress", use_morphological=True)
graph_dict = deepen._get_graph(adata.obsm["spatial"], distType = "BallTree")

Physical distance calculting Done!
The number of nearest tie neighbors in physical distance is: 1.0
Gene correlation calculting Done!
Morphological similarity calculting Done!
The weight result of image feature is added to adata.obsm['weights_matrix_all'] !


Find adjacent spots of each spot: 100%|██████████ [ time left: 00:00 ]

Step 1: Augment molecule expression is Done!
12.0000 neighbors per cell on average.
Step 2: Graph computing is Done!





In [10]:
deepst_embed = deepen._fit(
		data = data,
		graph_dict = graph_dict,)
adata.obsm["DeepST_embed"] = deepst_embed
adata = deepen._get_cluster_data(adata, n_domains=n_domains, priori = True)

Your task is in full swing, please wait


DeepST trains an initial model: 100%|██████████ [ time left: 00:00 ]
DeepST trains a final model: |           [ time left: 00:00 ]    


Step 3: DeepST training has been Done!
Current memory usage：1.3659 GB
Total time: 0.24 minutes
Your task has been completed, thank you
Of course, you can also perform downstream analysis on the processed data
Best resolution:  0.13


In [13]:
pred = adata.obs['DeepST_refine_domain'].values.astype(int)

In [14]:
conf_mat = confusion_matrix(labels, pred, labels=np.arange(4))
row_ind, col_ind = linear_sum_assignment(-conf_mat)
mapping = {pred_label: true_label for true_label, pred_label in zip(row_ind, col_ind)}
pred = np.array([mapping[p] for p in pred])
adata.obs["pred"]= pred

In [15]:
ARI_s = adjusted_rand_score(pred, labels)
acc_s = accuracy_score(pred, labels)
print("ARI_s:", ARI_s)
print("acc_s", acc_s)

ARI_s: 0.2948471553573723
acc_s 0.6268382352941176
