In [1]:
import os
import torch
import pandas as pd
import scanpy as sc
import numpy as np
import matplotlib.lines as mlines
from sklearn import metrics
import multiprocessing as mp
from sklearn.metrics import adjusted_rand_score
from GraphST import GraphST
from sklearn.metrics import accuracy_score
from anndata import AnnData
from sklearn.metrics import confusion_matrix
from scipy.optimize import linear_sum_assignment
import matplotlib.pyplot as plt
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# the location of R, which is necessary for mclust algorithm. Please replace the path below with local R installation path
os.environ['R_HOME'] = r"C:\Program Files\R\R-4.4.2"

In [2]:
slicename = "151507"
n_clusters = 7

In [3]:
spatial_data = pd.read_csv(fr"C:\E\JSU\BIO\file\SpaGCN-master\SpaGCN-master\tutorial\{slicename}\spatial\tissue_positions_list.csv", sep=",", header=None)
spatial_data.columns = ['barcode', 'in_tissue', 'row', 'col', 'pxl_row_in_fullres', 'pxl_col_in_fullres']
spatial_data = spatial_data[spatial_data['in_tissue'] == 1]  # remain cells within tissue (denoted by 1)
meta = pd.read_csv(fr"C:\E\JSU\BIO\file\SpaGCN-master\SpaGCN-master\tutorial\{slicename}\metadata.tsv", sep="\t",index_col=False)
meta.drop(columns=['row', 'col'], inplace=True)  # delete repeated columns

data = spatial_data.merge(meta, on='barcode', how='right')
data = data[['barcode', 'row', 'col', 'pxl_row_in_fullres', 'pxl_col_in_fullres', 'expr_chrM']]
data = data.dropna(subset=['expr_chrM'])  # delete missing data
# labels
label_mapping = {'L1': 1, 'L2': 2, 'L3': 3, 'L4': 4, 'L5': 5, 'L6': 6, 'WM': 0} # DLPFC
labels = data['expr_chrM'].map(label_mapping)

In [4]:
# Annotated Data
file_path = fr"C:\E\JSU\BIO\file\SpaGCN-master\SpaGCN-master\tutorial\{slicename}\filtered_feature_bc_matrix.h5"
adata = sc.read_10x_h5(file_path)
adata = adata[data.index]
adata.obsm['spatial'] = data[['row','col']].values

  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
  adata.obsm['spatial'] = data[['row','col']].values
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")


In [5]:
# define model
model = GraphST.GraphST(adata, device=device)
# train model
adata = model.train()

  utils.warn_names_duplicates("var")


Begin to train ST data...


100%|██████████| 600/600 [00:31<00:00, 19.12it/s]

Optimization finished for ST data!





In [6]:

radius = 50

tool = 'mclust'  # mclust, leiden, and louvain

# clustering
from GraphST.utils import clustering

if tool == 'mclust':
    clustering(adata, n_clusters, radius=radius, method=tool,
               refinement=True)  # For DLPFC dataset, we use optional refinement step.
elif tool in ['leiden', 'louvain']:
    clustering(adata, n_clusters, radius=radius, method=tool, start=0.1, end=2.0, increment=0.01, refinement=False)

R[write to console]:                    __           __ 
   ____ ___  _____/ /_  _______/ /_
  / __ `__ \/ ___/ / / / / ___/ __/
 / / / / / / /__/ / /_/ (__  ) /_  
/_/ /_/ /_/\___/_/\__,_/____/\__/   version 6.1.1
Type 'citation("mclust")' for citing this R package in publications.



fitting ...


In [7]:
y_pred = adata.obs['domain'].values.astype(int)-1

In [8]:
conf_mat = confusion_matrix(labels, y_pred, labels=np.arange(7))
row_ind, col_ind = linear_sum_assignment(-conf_mat)
mapping = {pred_label: true_label for true_label, pred_label in zip(row_ind, col_ind)}
y_pred = np.array([mapping[p] for p in y_pred]) 

In [9]:
ARI_s = adjusted_rand_score(y_pred, labels)
acc_s = accuracy_score(y_pred, labels)
print("ARI_s:", ARI_s)
print("acc_s", acc_s)

ARI_s: 0.42495748120250776
acc_s 0.5245202558635395
