In [1]:
import pandas as pd
import scanpy as sc
import random, torch
import warnings
warnings.filterwarnings("ignore")
import SpaGCN as spg
#In order to read in image data, we need to install some package. Here we recommend package "opencv"
#inatll opencv in python
#!pip3 install opencv-python
import cv2
from anndata import AnnData
from sklearn.metrics import accuracy_score
from sklearn.metrics import adjusted_rand_score
import numpy as np
from sklearn.metrics import confusion_matrix
from scipy.optimize import linear_sum_assignment

In [2]:
slicename = "BZ14"
expr_path = fr"C:\E\JSU\BIO\file\STrafer\params\starmap\starmap_expr_{slicename}.csv"
spatial_path = fr"C:\E\JSU\BIO\file\STrafer\params\starmap\starmap_spatial_{slicename}.csv"
meta = pd.read_csv(expr_path, index_col=0)
spatial_data = pd.read_csv(spatial_path, index_col=0)
data = spatial_data.merge(meta, left_index=True, right_index=True, how='right')
labels = data['z'].replace(4, 0)

In [3]:
adata = AnnData(X=meta.values)
adata.obsm['spatial'] = spatial_data[['x', 'y']].values
adata.var_names = meta.columns
adata.obs_names = spatial_data.index
adata.write_h5ad(fr"C:\E\JSU\BIO\file\STrafer\params\starmap\starmap_{slicename}.h5ad")
adata = sc.read_h5ad(fr"C:\E\JSU\BIO\file\STrafer\params\starmap\starmap_{slicename}.h5ad")
adata = adata[data.index]
adata.var_names_make_unique()
# pre-process
# adata.layers['count'] = adata.X.toarray()
adata.layers['count'] = adata.X
sc.pp.filter_genes(adata, min_cells=50)
sc.pp.filter_genes(adata, min_counts=10)
sc.pp.normalize_total(adata, target_sum=1e6)
sc.pp.highly_variable_genes(adata, flavor="seurat_v3", layer='count', n_top_genes=150)
adata = adata[:, adata.var['highly_variable'] == True]
sc.pp.scale(adata)

In [4]:
x_array = spatial_data['x'].values
y_array = spatial_data['y'].values

In [5]:
x_pixel = spatial_data['x'].values
y_pixel = spatial_data['y'].values

In [6]:
s=1
b=49
adj=spg.calculate_adj_matrix(x=x_pixel,y=y_pixel, x_pixel=x_pixel, y_pixel=y_pixel, beta=b, alpha=s, histology=False)
#If histlogy image is not available, SpaGCN can calculate the adjacent matrix using the fnction below
#adj=calculate_adj_matrix(x=x_pixel,y=y_pixel, histology=False)
np.savetxt(fr'C:\E\JSU\BIO\file\STrafer\params\starmap\SPAGCN\{slicename}\adj.csv', adj, delimiter=',')
adj=np.loadtxt(fr'C:\E\JSU\BIO\file\STrafer\params\starmap\SPAGCN\{slicename}\adj.csv', delimiter=',')

p=0.5
#Find the l value given p
l=spg.search_l(p, adj, start=0.01, end=1000, tol=0.01, max_run=100)

Calculateing adj matrix using xy only...
Run 1: l [0.01, 1000], p [0.0, 75.30559154992433]
Run 2: l [0.01, 500.005], p [0.0, 20.47630442078767]
Run 3: l [0.01, 250.0075], p [0.0, 4.99878217995026]
Run 4: l [0.01, 125.00874999999999], p [0.0, 0.9033971482752359]
Run 5: l [62.509375, 125.00874999999999], p [0.055714749148429554, 0.9033971482752359]
Run 6: l [93.7590625, 125.00874999999999], p [0.3502807332205198, 0.9033971482752359]
Run 7: l [93.7590625, 109.38390625], p [0.3502807332205198, 0.597852175251619]
Run 8: l [101.571484375, 109.38390625], p [0.4663908553024856, 0.597852175251619]
Run 9: l [101.571484375, 105.4776953125], p [0.4663908553024856, 0.530259670206541]
recommended l =  103.52458984375


In [7]:
n_clusters=4
#Set seed
r_seed=t_seed=n_seed=100
#Search for suitable resolution
res=spg.search_res(adata, adj, l, n_clusters, start=0.7, step=0.1, tol=5e-3, lr=0.05, max_epochs=20, r_seed=r_seed, t_seed=t_seed, n_seed=n_seed)

clf=spg.SpaGCN()
clf.set_l(l)
#Set seed
random.seed(r_seed)
torch.manual_seed(t_seed)
np.random.seed(n_seed)
#Run
clf.train(adata,adj,init_spa=True,init="louvain",res=res, tol=5e-3, lr=0.05, max_epochs=200)
y_pred, prob=clf.predict()

Start at res =  0.7 step =  0.1
Initializing cluster centers with louvain, resolution =  0.7
Epoch  0
Epoch  10
Res =  0.7 Num of clusters =  7
Initializing cluster centers with louvain, resolution =  0.6
Epoch  0
Epoch  10
Res =  0.6 Num of clusters =  7
Res changed to 0.6
Initializing cluster centers with louvain, resolution =  0.5
Epoch  0
Epoch  10
Res =  0.5 Num of clusters =  5
Res changed to 0.5
Initializing cluster centers with louvain, resolution =  0.4
Epoch  0
Epoch  10
Res =  0.4 Num of clusters =  3
Step changed to 0.05
Initializing cluster centers with louvain, resolution =  0.45
Epoch  0
Epoch  10
Res =  0.45 Num of clusters =  4
recommended res =  0.45
Initializing cluster centers with louvain, resolution =  0.45
Epoch  0
Epoch  10
Epoch  20
Epoch  30
Epoch  40
Epoch  50
Epoch  60
Epoch  70
Epoch  80
Epoch  90
Epoch  100
Epoch  110
Epoch  120
Epoch  130
Epoch  140
Epoch  150
Epoch  160
Epoch  170
Epoch  180
Epoch  190


In [8]:
conf_mat = confusion_matrix(labels, y_pred, labels=np.arange(4))
row_ind, col_ind = linear_sum_assignment(-conf_mat)
mapping = {pred_label: true_label for true_label, pred_label in zip(row_ind, col_ind)}
y_pred = np.array([mapping[p] for p in y_pred])


In [9]:
pred_labels_list = pd.DataFrame({
    'spot': list(range(1, len(y_pred) + 1)),
    'pred': y_pred})
pred_labels_list.to_csv(fr"C:\E\JSU\BIO\file\STrafer\params\starmap\SPAGCN\{slicename}\pred_labels.csv", index=False)

In [10]:
ARI_s = adjusted_rand_score(y_pred, labels)
acc_s = accuracy_score(y_pred, labels)
print("ARI_s:", ARI_s)
print("acc_s", acc_s)

ARI_s: 0.1941315609956136
acc_s 0.5018382352941176
