In [2]:
import os, csv, re
import pandas as pd
import numpy as np
import scanpy as sc
import math
import SpaGCN as spg
from scipy.sparse import issparse
import random, torch
import warnings

warnings.filterwarnings("ignore")
import matplotlib.colors as clr
import matplotlib.pyplot as plt
import SpaGCN as spg
import cv2
from anndata import AnnData
from sklearn.metrics import accuracy_score
from sklearn.metrics import adjusted_rand_score
import numpy as np
from sklearn.metrics import confusion_matrix
from scipy.optimize import linear_sum_assignment

In [3]:
base_dir = r"C:\E\JSU\BIO\file\SpaGCN-master\SpaGCN-master\tutorial"
slicenames =['151507']
slicenames = ["151507", "151508", "151509",'151510','151669','151670','151671','151672','151673','151674','151675','151676']

for slicename in slicenames:
    spatial_path = os.path.join(base_dir, slicename, "spatial", "tissue_positions_list.csv")
    spatial_data = pd.read_csv(spatial_path, sep=",", header=None)

    spatial_data.columns = ['barcode', 'in_tissue', 'row', 'col', 'pxl_row_in_fullres', 'pxl_col_in_fullres']
    spatial_data = spatial_data[spatial_data['in_tissue'] == 1]  # remain cells within tissue (denoted by 1)
    meta_path = os.path.join(base_dir,slicename,'metadata.tsv')
    meta = pd.read_csv(meta_path, sep="\t",index_col=False)
    meta.drop(columns=['row', 'col'], inplace=True)  # delete repeated columns

    data = spatial_data.merge(meta, on='barcode', how='right')
    data = data[['barcode', 'row', 'col', 'pxl_row_in_fullres', 'pxl_col_in_fullres', 'expr_chrM']]
    data = data.dropna(subset=['expr_chrM'])  # delete missing data
    # labels
    label_mapping = {'L1': 1, 'L2': 2, 'L3': 3, 'L4': 4, 'L5': 5, 'L6': 6, 'WM': 0} # DLPFC
    labels = data['expr_chrM'].map(label_mapping)


    # Annotated Data
    # file_path = r"C:\E\JSU\BIO\file\SpaGCN-master\SpaGCN-master\tutorial\151673\filtered_feature_bc_matrix.h5"
    file_path = os.path.join(base_dir,slicename,"filtered_feature_bc_matrix.h5")

    adata = sc.read_10x_h5(file_path)
    adata = adata[data.index]
    im_path = os.path.join(base_dir,slicename,"spatial",'full_image.tif')
    img=cv2.imread(im_path)

    adata.var_names_make_unique()
    spg.prefilter_genes(adata,min_cells=3) # avoiding all genes are zeros
    spg.prefilter_specialgenes(adata)
    #Normalize and take log for UMI
    sc.pp.normalize_per_cell(adata)
    sc.pp.log1p(adata)

    #Read in hitology image
    spatial_data = data.iloc[:, :5]
    x_array = spatial_data['row'].tolist()
    y_array = spatial_data['col'].tolist()
    x_pixel = spatial_data['pxl_row_in_fullres'].tolist()
    y_pixel = spatial_data['pxl_col_in_fullres'].tolist()

In [4]:
    #Calculate adjacent matrix
    s=1
    b=49
    adj=spg.calculate_adj_matrix(x=x_pixel,y=y_pixel, x_pixel=x_pixel, y_pixel=y_pixel, image=img, beta=b, alpha=s, histology=True)

    folder_path = fr'C:\E\JSU\BIO\file\STrafer\params\DLPFC\SPAGCN\{slicename}'
    os.makedirs(folder_path, exist_ok=True)
    file_path = os.path.join(folder_path, 'adj.csv')
    np.savetxt(file_path, adj, delimiter=',')
    adj = np.loadtxt(file_path, delimiter=',')

    p=0.5
    #Find the l value given p
    l=spg.search_l(p, adj, start=0.01, end=1000, tol=0.01, max_run=100)

    #For this toy data, we set the number of clusters=7 since this tissue has 7 layers
    n_clusters=7
    #Set seed
    r_seed=t_seed=n_seed=100
    #Search for suitable resolution
    res=spg.search_res(adata, adj, l, n_clusters, start=0.7, step=0.1, tol=5e-3, lr=0.05, max_epochs=20, r_seed=r_seed, t_seed=t_seed, n_seed=n_seed)

    clf=spg.SpaGCN()
    clf.set_l(l)
    #Set seed
    random.seed(r_seed)
    torch.manual_seed(t_seed)
    np.random.seed(n_seed)
    #Run
    clf.train(adata,adj,init_spa=True,init="louvain",res=res, tol=5e-3, lr=0.05, max_epochs=200)
    y_pred, prob=clf.predict()
    adata.obs["pred"]= y_pred
    adata.obs["pred"]=adata.obs["pred"].astype('category')
    #Do cluster refinement(optional)
    #shape="hexagon" for Visium data, "square" for ST data.
    adj_2d=spg.calculate_adj_matrix(x=x_array,y=y_array, histology=False)
    refined_pred=spg.refine(sample_id=adata.obs.index.tolist(), pred=adata.obs["pred"].tolist(), dis=adj_2d, shape="hexagon")
    adata.obs["refined_pred"]=refined_pred
    adata.obs["refined_pred"]=adata.obs["refined_pred"].astype('category')

Calculateing adj matrix using histology image...
Var of c0,c1,c2 =  29.477509465304824 174.8015556779522 60.95093839727311
Var of x,y,z =  5080995.703721643 4365111.426668278 5080995.703721643
Run 1: l [0.01, 1000], p [0.0, 161.7471317017666]
Run 2: l [0.01, 500.005], p [0.0, 30.984823179124525]
Run 3: l [0.01, 250.0075], p [0.0, 4.849617891160691]
Run 4: l [0.01, 125.00874999999999], p [0.0, 0.6140842850865946]
Run 5: l [62.509375, 125.00874999999999], p [0.03502943121229607, 0.6140842850865946]
Run 6: l [93.7590625, 125.00874999999999], p [0.22746464452421056, 0.6140842850865946]
Run 7: l [109.38390625, 125.00874999999999], p [0.39411875367635885, 0.6140842850865946]
recommended l =  117.196328125
Start at res =  0.7 step =  0.1
Initializing cluster centers with louvain, resolution =  0.7
Epoch  0
Epoch  10
Res =  0.7 Num of clusters =  6
Initializing cluster centers with louvain, resolution =  0.7999999999999999
Epoch  0
Epoch  10
Res =  0.7999999999999999 Num of clusters =  7
recom

In [5]:
    conf_mat = confusion_matrix(labels, y_pred, labels=np.arange(8))
    row_ind, col_ind = linear_sum_assignment(-conf_mat)
    mapping = {pred_label: true_label for true_label, pred_label in zip(row_ind, col_ind)}
    y_pred = np.array([mapping[p] for p in y_pred])
    pred_labels_list = pd.DataFrame({
        'spot': list(range(1, len(y_pred) + 1)),
        'pred': y_pred
    })
    file_path_pred = os.path.join(folder_path, 'pred_labels.csv')
    pred_labels_list.to_csv(file_path_pred, index=False)
    ARI_s = adjusted_rand_score(y_pred, labels)
    acc_s = accuracy_score(y_pred, labels)
    print("ARI_s:", ARI_s)
    print("acc_s", acc_s)

ARI_s: 0.3253886141084341
acc_s 0.47770329350043716
