In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import anndata as ad
import scanpy as sc
import pandas as pd
import numpy as np
import scipy.sparse as sp
import scipy.linalg
import os

import torch
used_device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [3]:
from Graspot.ST_utils import Cal_Spatial_Net, mapping_accuracy_ot, mapping_accuracy_in, batch_entropy_mixing_score, silhouette, avg_silhouette_width_batch
from Graspot.OT_utils import distance_matrix, unbalanced_ot
from Graspot.train import norm_and_center_coordinates, train_Graspot, train_Graspot_Sub

In [4]:
import scipy.sparse as sp
Batch_list = []
adj_list = []
section_ids = ['151669','151670']
#section_ids = ['151673','151674']
print(section_ids)

for section_id in section_ids:
    print(section_id)
    input_dir = os.path.join('data/', section_id)
    adata = sc.read_visium(path=input_dir, count_file=section_id + '_filtered_feature_bc_matrix.h5', load_images=True)
    adata.var_names_make_unique(join="++")

    # read the annotation
    Ann_df = pd.read_csv(os.path.join(input_dir, section_id + '_truth.txt'), sep='\t', header=None, index_col=0)
    Ann_df.columns = ['Ground Truth']
    Ann_df[Ann_df.isna()] = "unknown"
    adata.obs['Ground Truth'] = Ann_df.loc[adata.obs_names, 'Ground Truth'].astype('category')

    # make spot name unique
    adata.obs_names = [x+'_'+section_id for x in adata.obs_names]

    # Constructing the spatial network
    Cal_Spatial_Net(adata, rad_cutoff=150) # the spatial network are saved in adata.uns[‘adj’]
    # STAligner.Stats_Spatial_Net(adata) # plot the number of spatial neighbors

    # Normalization
    sc.pp.highly_variable_genes(adata, flavor="seurat_v3", n_top_genes=5000)
    sc.pp.normalize_total(adata, target_sum=1e4)
    sc.pp.log1p(adata)
    adata = adata[:, adata.var['highly_variable']]

    adj_list.append(adata.uns['adj'])
    Batch_list.append(adata)

['151669', '151670']
151669
------Calculating spatial graph...
The graph contains 21194 edges, 3661 cells.
5.7891 neighbors per cell on average.
151670
------Calculating spatial graph...
The graph contains 20370 edges, 3498 cells.
5.8233 neighbors per cell on average.


In [5]:
import anndata as ad
adata_concat = ad.concat(Batch_list, label="slice_name", keys=section_ids)
adata_concat.obs['Ground Truth'] = adata_concat.obs['Ground Truth'].astype('category')
adata_concat.obs["batch_name"] = adata_concat.obs["slice_name"].astype('category')

In [6]:
iter_comb = [(0, 1)]
adata_concat, tran_list = train_Graspot(adata_concat, verbose=True, knn_neigh = 100, n_epochs = 200, iter_comb = iter_comb,
                                                        Batch_list=Batch_list, initial = True, Couple = None, device=used_device)

STAligner(
  (conv1): GATConv(2156, 512, heads=1)
  (conv2): GATConv(512, 30, heads=1)
  (conv3): GATConv(30, 512, heads=1)
  (conv4): GATConv(512, 2156, heads=1)
)
Pretrain with STAGATE...


100%|███████████████| 200/200 [00:09<00:00, 20.79it/s]


Train with STAligner...


100%|███████████████| 200/200 [00:51<00:00,  3.89it/s]


In [8]:
batch_entropy_mixing_score(adata_concat.obsm['Graspot'],np.array(adata_concat.obs['batch_name']))
#0.69

0.6905757011319644

In [9]:
silhouette(adata_concat.obsm['Graspot'],np.array(adata_concat.obs['Ground Truth']))
#0.58

0.5828511938452721

In [10]:
avg_silhouette_width_batch(adata_concat.obsm['Graspot'],np.array(adata_concat.obs['batch_name']),np.array(adata_concat.obs['Ground Truth']))
#0.95

0.9522981643676758

In [4]:
import scipy.sparse as sp
Batch_list = []
adj_list = []
section_ids = ['151670','151671']
#section_ids = ['151673','151674']
print(section_ids)

for section_id in section_ids:
    print(section_id)
    input_dir = os.path.join('data/', section_id)
    adata = sc.read_visium(path=input_dir, count_file=section_id + '_filtered_feature_bc_matrix.h5', load_images=True)
    adata.var_names_make_unique(join="++")

    # read the annotation
    Ann_df = pd.read_csv(os.path.join(input_dir, section_id + '_truth.txt'), sep='\t', header=None, index_col=0)
    Ann_df.columns = ['Ground Truth']
    Ann_df[Ann_df.isna()] = "unknown"
    adata.obs['Ground Truth'] = Ann_df.loc[adata.obs_names, 'Ground Truth'].astype('category')

    # make spot name unique
    adata.obs_names = [x+'_'+section_id for x in adata.obs_names]

    # Constructing the spatial network
    Cal_Spatial_Net(adata, rad_cutoff=150) # the spatial network are saved in adata.uns[‘adj’]
    # STAligner.Stats_Spatial_Net(adata) # plot the number of spatial neighbors

    # Normalization
    sc.pp.highly_variable_genes(adata, flavor="seurat_v3", n_top_genes=5000)
    sc.pp.normalize_total(adata, target_sum=1e4)
    sc.pp.log1p(adata)
    adata = adata[:, adata.var['highly_variable']]

    adj_list.append(adata.uns['adj'])
    Batch_list.append(adata)

['151670', '151671']
151670
------Calculating spatial graph...
The graph contains 20370 edges, 3498 cells.
5.8233 neighbors per cell on average.
151671
------Calculating spatial graph...
The graph contains 24052 edges, 4110 cells.
5.8521 neighbors per cell on average.


In [12]:
import anndata as ad
adata_concat = ad.concat(Batch_list, label="slice_name", keys=section_ids)
adata_concat.obs['Ground Truth'] = adata_concat.obs['Ground Truth'].astype('category')
adata_concat.obs["batch_name"] = adata_concat.obs["slice_name"].astype('category')

In [13]:
iter_comb = [(0, 1)]
adata_concat, tran_list = train_Graspot(adata_concat, verbose=True, knn_neigh = 100, n_epochs = 200, iter_comb = iter_comb,
                                                        Batch_list=Batch_list, initial = True, Couple = None, device=used_device)

STAligner(
  (conv1): GATConv(2144, 512, heads=1)
  (conv2): GATConv(512, 30, heads=1)
  (conv3): GATConv(30, 512, heads=1)
  (conv4): GATConv(512, 2144, heads=1)
)
Train with STAligner...


100%|███████████████| 200/200 [00:57<00:00,  3.50it/s]


In [14]:
batch_entropy_mixing_score(adata_concat.obsm['Graspot'],np.array(adata_concat.obs['batch_name']))
#0.69

0.6875445680151334

In [15]:
silhouette(adata_concat.obsm['Graspot'],np.array(adata_concat.obs['Ground Truth']))
#0.52

0.5228031203150749

In [16]:
avg_silhouette_width_batch(adata_concat.obsm['Graspot'],np.array(adata_concat.obs['batch_name']),np.array(adata_concat.obs['Ground Truth']))
#0.90

0.901580810546875

In [17]:
import scipy.sparse as sp
Batch_list = []
adj_list = []
section_ids = ['151671','151672']
#section_ids = ['151673','151674']
print(section_ids)

for section_id in section_ids:
    print(section_id)
    input_dir = os.path.join('data/', section_id)
    adata = sc.read_visium(path=input_dir, count_file=section_id + '_filtered_feature_bc_matrix.h5', load_images=True)
    adata.var_names_make_unique(join="++")

    # read the annotation
    Ann_df = pd.read_csv(os.path.join(input_dir, section_id + '_truth.txt'), sep='\t', header=None, index_col=0)
    Ann_df.columns = ['Ground Truth']
    Ann_df[Ann_df.isna()] = "unknown"
    adata.obs['Ground Truth'] = Ann_df.loc[adata.obs_names, 'Ground Truth'].astype('category')

    # make spot name unique
    adata.obs_names = [x+'_'+section_id for x in adata.obs_names]

    # Constructing the spatial network
    Cal_Spatial_Net(adata, rad_cutoff=150) # the spatial network are saved in adata.uns[‘adj’]
    # STAligner.Stats_Spatial_Net(adata) # plot the number of spatial neighbors

    # Normalization
    sc.pp.highly_variable_genes(adata, flavor="seurat_v3", n_top_genes=5000)
    sc.pp.normalize_total(adata, target_sum=1e4)
    sc.pp.log1p(adata)
    adata = adata[:, adata.var['highly_variable']]

    adj_list.append(adata.uns['adj'])
    Batch_list.append(adata)

['151671', '151672']
151671
------Calculating spatial graph...
The graph contains 24052 edges, 4110 cells.
5.8521 neighbors per cell on average.
151672
------Calculating spatial graph...
The graph contains 23382 edges, 4015 cells.
5.8237 neighbors per cell on average.


In [18]:
import anndata as ad
adata_concat = ad.concat(Batch_list, label="slice_name", keys=section_ids)
adata_concat.obs['Ground Truth'] = adata_concat.obs['Ground Truth'].astype('category')
adata_concat.obs["batch_name"] = adata_concat.obs["slice_name"].astype('category')

In [19]:
iter_comb = [(0, 1)]
adata_concat, tran_list = train_Graspot(adata_concat, verbose=True, knn_neigh = 100, n_epochs = 200, iter_comb = iter_comb,
                                                        Batch_list=Batch_list, initial = True, Couple = None, device=used_device)

STAligner(
  (conv1): GATConv(2171, 512, heads=1)
  (conv2): GATConv(512, 30, heads=1)
  (conv3): GATConv(30, 512, heads=1)
  (conv4): GATConv(512, 2171, heads=1)
)
Train with STAligner...


100%|███████████████| 200/200 [01:05<00:00,  3.05it/s]


In [20]:
batch_entropy_mixing_score(adata_concat.obsm['Graspot'],np.array(adata_concat.obs['batch_name']))
#0.69

0.6907469475993032

In [21]:
silhouette(adata_concat.obsm['Graspot'],np.array(adata_concat.obs['Ground Truth']))
#0.57

0.5739313811063766

In [22]:
avg_silhouette_width_batch(adata_concat.obsm['Graspot'],np.array(adata_concat.obs['batch_name']),np.array(adata_concat.obs['Ground Truth']))
#0.91

0.907818078994751

In [4]:
import scipy.sparse as sp
Batch_list = []
adj_list = []
section_ids = ['151669','151670','151671','151672']
#section_ids = ['151673','151674']
print(section_ids)

for section_id in section_ids:
    print(section_id)
    input_dir = os.path.join('data/', section_id)
    adata = sc.read_visium(path=input_dir, count_file=section_id + '_filtered_feature_bc_matrix.h5', load_images=True)
    adata.var_names_make_unique(join="++")

    # read the annotation
    Ann_df = pd.read_csv(os.path.join(input_dir, section_id + '_truth.txt'), sep='\t', header=None, index_col=0)
    Ann_df.columns = ['Ground Truth']
    Ann_df[Ann_df.isna()] = "unknown"
    adata.obs['Ground Truth'] = Ann_df.loc[adata.obs_names, 'Ground Truth'].astype('category')

    # make spot name unique
    adata.obs_names = [x+'_'+section_id for x in adata.obs_names]

    # Constructing the spatial network
    Cal_Spatial_Net(adata, rad_cutoff=150) # the spatial network are saved in adata.uns[‘adj’]
    # STAligner.Stats_Spatial_Net(adata) # plot the number of spatial neighbors

    # Normalization
    sc.pp.highly_variable_genes(adata, flavor="seurat_v3", n_top_genes=5000)
    sc.pp.normalize_total(adata, target_sum=1e4)
    sc.pp.log1p(adata)
    adata = adata[:, adata.var['highly_variable']]

    adj_list.append(adata.uns['adj'])
    Batch_list.append(adata)

['151669', '151670', '151671', '151672']
151669
------Calculating spatial graph...
The graph contains 21194 edges, 3661 cells.
5.7891 neighbors per cell on average.
151670
------Calculating spatial graph...
The graph contains 20370 edges, 3498 cells.
5.8233 neighbors per cell on average.
151671
------Calculating spatial graph...
The graph contains 24052 edges, 4110 cells.
5.8521 neighbors per cell on average.
151672
------Calculating spatial graph...
The graph contains 23382 edges, 4015 cells.
5.8237 neighbors per cell on average.


In [5]:
import anndata as ad
adata_concat = ad.concat(Batch_list, label="slice_name", keys=section_ids)
adata_concat.obs['Ground Truth'] = adata_concat.obs['Ground Truth'].astype('category')
adata_concat.obs["batch_name"] = adata_concat.obs["slice_name"].astype('category')

In [6]:
iter_comb = [(0, 1), (2, 3)]
adata_concat, tran_list = train_Graspot(adata_concat, verbose=True, knn_neigh = 100, n_epochs = 200, iter_comb = iter_comb,
                                                        Batch_list=Batch_list, initial = True, device=used_device)
#adata_concat, tran_list = train_Graspot_Sub(adata_concat, verbose=True, knn_neigh = 100, n_epochs = 200, iter_comb = iter_comb,
                                                        #Batch_list=Batch_list,  initial = True, device=used_device)

STAligner(
  (conv1): GATConv(946, 512, heads=1)
  (conv2): GATConv(512, 30, heads=1)
  (conv3): GATConv(30, 512, heads=1)
  (conv4): GATConv(512, 946, heads=1)
)
Pretrain with STAGATE...


100%|███████████████| 200/200 [00:13<00:00, 15.06it/s]


Train with STAligner...


100%|███████████████| 200/200 [00:49<00:00,  4.07it/s]
100%|███████████████| 200/200 [01:02<00:00,  3.18it/s]


In [7]:
accuracy_ot=[]
for iters,comb in enumerate(iter_comb):
    i,j=comb[0],comb[1]
    accuracy_ot.append(mapping_accuracy_ot(Batch_list[i].obs['Ground Truth'], Batch_list[j].obs['Ground Truth'], tran_list[iters].cpu().detach().numpy()))
    print(mapping_accuracy_ot(Batch_list[i].obs['Ground Truth'], Batch_list[j].obs['Ground Truth'], tran_list[iters].cpu().detach().numpy()))

0.8997983
0.84878707


In [8]:
accuracy_in=[]
matching_plt=[]
getMax_plt=[]
for iters,comb in enumerate(iter_comb):
    i,j=comb[0],comb[1]
    result1 = pd.DataFrame(tran_list[iters].cpu().detach().numpy())
    if tran_list[iters].shape[0] < tran_list[iters].shape[1]:
        matching_index = np.argmax(result1.to_numpy(),axis=1)
        matching = np.array([np.arange(result1.shape[0]),matching_index])
        getMax = np.zeros_like(tran_list[iters].cpu().detach().numpy())
        #np.put_along_axis(getMax,tran_list[iters].cpu().detach().numpy().argmax(1)[:,None],1,axis=1)
        for k in range(matching.shape[1]):
            x = int(matching[:,k][0])
            y = int(matching[:,k][1])
            getMax[x][y] = 1
        matching_plt.append(matching)
        getMax_plt.append(getMax)
    if tran_list[iters].shape[0] > tran_list[iters].shape[1]:
        matching_index = np.argmax(result1.to_numpy(),axis=0)
        matching = np.array([np.arange(result1.shape[1]),matching_index])
        getMax = np.zeros_like(tran_list[iters].cpu().detach().numpy())
        for k in range(matching.shape[1]):
            x = int(matching[:,k][1])
            y = int(matching[:,k][0])
            getMax[x][y] = 1
        matching_plt.append(matching)
        getMax_plt.append(getMax)
    
    accuracy_in.append(mapping_accuracy_in(Batch_list[i].obs['Ground Truth'], Batch_list[j].obs['Ground Truth'], getMax))
    print(mapping_accuracy_in(Batch_list[i].obs['Ground Truth'], Batch_list[j].obs['Ground Truth'], getMax))

0.9033733562035449
0.8483188044831881


In [10]:
from sklearn.metrics.cluster import adjusted_rand_score
#Batch_list[1].obs['Ground Truth'][matching_plt[0][1]]
for iters,comb in enumerate(iter_comb):
    i, j = comb[0], comb[1]
    if tran_list[iters].shape[0] < tran_list[iters].shape[1]:
        print(adjusted_rand_score(Batch_list[i].obs['Ground Truth'],Batch_list[j].obs['Ground Truth'][matching_plt[iters][1]]))#第一个小第二个大
    if tran_list[iters].shape[0] > tran_list[iters].shape[1]:
        print(adjusted_rand_score(Batch_list[j].obs['Ground Truth'],Batch_list[i].obs['Ground Truth'][matching_plt[iters][1]]))#第一个小第二个大
        

0.8617829268370529
0.7319159822640117


In [11]:
import scipy.sparse as sp
Batch_list = []
adj_list = []
section_ids = ['151670','151671']
#section_ids = ['151673','151674']
print(section_ids)

for section_id in section_ids:
    print(section_id)
    input_dir = os.path.join('data/', section_id)
    adata = sc.read_visium(path=input_dir, count_file=section_id + '_filtered_feature_bc_matrix.h5', load_images=True)
    adata.var_names_make_unique(join="++")

    # read the annotation
    Ann_df = pd.read_csv(os.path.join(input_dir, section_id + '_truth.txt'), sep='\t', header=None, index_col=0)
    Ann_df.columns = ['Ground Truth']
    Ann_df[Ann_df.isna()] = "unknown"
    adata.obs['Ground Truth'] = Ann_df.loc[adata.obs_names, 'Ground Truth'].astype('category')

    # make spot name unique
    adata.obs_names = [x+'_'+section_id for x in adata.obs_names]

    # Constructing the spatial network
    Cal_Spatial_Net(adata, rad_cutoff=150) # the spatial network are saved in adata.uns[‘adj’]
    # STAligner.Stats_Spatial_Net(adata) # plot the number of spatial neighbors

    # Normalization
    sc.pp.highly_variable_genes(adata, flavor="seurat_v3", n_top_genes=5000)
    sc.pp.normalize_total(adata, target_sum=1e4)
    sc.pp.log1p(adata)
    adata = adata[:, adata.var['highly_variable']]

    adj_list.append(adata.uns['adj'])
    Batch_list.append(adata)

['151670', '151671']
151670
------Calculating spatial graph...
The graph contains 20370 edges, 3498 cells.
5.8233 neighbors per cell on average.
151671
------Calculating spatial graph...
The graph contains 24052 edges, 4110 cells.
5.8521 neighbors per cell on average.


In [12]:
import anndata as ad
adata_concat = ad.concat(Batch_list, label="slice_name", keys=section_ids)
adata_concat.obs['Ground Truth'] = adata_concat.obs['Ground Truth'].astype('category')
adata_concat.obs["batch_name"] = adata_concat.obs["slice_name"].astype('category')

In [13]:
mapping_dict = {'Layer_1':1, 'Layer_2':2, 'Layer_3':3, 'Layer_4':4, 'Layer_5':5, 'Layer_6':6, 'WM':7}
data1 = np.array(Batch_list[0].obs['Ground Truth'].map(mapping_dict))
data2 = np.array(Batch_list[1].obs['Ground Truth'].map(mapping_dict))

gamma = 0.5

DM = np.ones((len(data1), len(data2)))
for i in range(len(data1)):
    for j in range(len(data2)):
        if data1[i] == data2[j]:
            DM[i][j] = gamma

In [7]:
iter_comb = [(0, 1)]
adata_concat, tran_list = train_Graspot_Sub(adata_concat, verbose=True, knn_neigh = 100, n_epochs = 200, iter_comb = iter_comb,
                                                        Batch_list=Batch_list, Couple=None, device=used_device)

STAligner(
  (conv1): GATConv(2144, 512, heads=1)
  (conv2): GATConv(512, 30, heads=1)
  (conv3): GATConv(30, 512, heads=1)
  (conv4): GATConv(512, 2144, heads=1)
)
Pretrain with STAGATE...


100%|█████████████████████████████████████████| 200/200 [00:09<00:00, 20.24it/s]


Train with STAligner...


100%|█████████████████████████████████████████| 200/200 [00:56<00:00,  3.53it/s]


In [8]:
accuracy_ot=[]
for iters,comb in enumerate(iter_comb):
    i,j=comb[0],comb[1]
    accuracy_ot.append(mapping_accuracy_ot(Batch_list[i].obs['Ground Truth'], Batch_list[j].obs['Ground Truth'], tran_list[iters].cpu().detach().numpy()))
    print(mapping_accuracy_ot(Batch_list[i].obs['Ground Truth'], Batch_list[j].obs['Ground Truth'], tran_list[iters].cpu().detach().numpy()))

0.69886893


In [9]:
accuracy_in=[]
matching_plt=[]
getMax_plt=[]
for iters,comb in enumerate(iter_comb):
    i,j=comb[0],comb[1]
    result1 = pd.DataFrame(tran_list[iters].cpu().detach().numpy())
    if tran_list[iters].shape[0] <= tran_list[iters].shape[1]:
        matching_index = np.argmax(result1.to_numpy(),axis=1)
        matching = np.array([np.arange(result1.shape[0]),matching_index])
        getMax = np.zeros_like(tran_list[iters].cpu().detach().numpy())
        #np.put_along_axis(getMax,tran_list[iters].cpu().detach().numpy().argmax(1)[:,None],1,axis=1)
        for k in range(matching.shape[1]):
            x = int(matching[:,k][0])
            y = int(matching[:,k][1])
            getMax[x][y] = 1
        matching_plt.append(matching)
        getMax_plt.append(getMax)
    if tran_list[iters].shape[0] > tran_list[iters].shape[1]:
        matching_index = np.argmax(result1.to_numpy(),axis=0)
        matching = np.array([np.arange(result1.shape[1]),matching_index])
        getMax = np.zeros_like(tran_list[iters].cpu().detach().numpy())
        for k in range(matching.shape[1]):
            x = int(matching[:,k][1])
            y = int(matching[:,k][0])
            getMax[x][y] = 1
        matching_plt.append(matching)
        getMax_plt.append(getMax)
    
    accuracy_in.append(mapping_accuracy_in(Batch_list[i].obs['Ground Truth'], Batch_list[j].obs['Ground Truth'], getMax))
    print(mapping_accuracy_in(Batch_list[i].obs['Ground Truth'], Batch_list[j].obs['Ground Truth'], getMax))

0.7012578616352201


In [10]:
from sklearn.metrics.cluster import adjusted_rand_score
#Batch_list[1].obs['Ground Truth'][matching_plt[0][1]]
for iters,comb in enumerate(iter_comb):
    i, j = comb[0], comb[1]
    if tran_list[iters].shape[0] <= tran_list[iters].shape[1]:
        print(adjusted_rand_score(Batch_list[i].obs['Ground Truth'],Batch_list[j].obs['Ground Truth'][matching_plt[iters][1]]))#第一个小第二个大
    if tran_list[iters].shape[0] > tran_list[iters].shape[1]:
        print(adjusted_rand_score(Batch_list[j].obs['Ground Truth'],Batch_list[i].obs['Ground Truth'][matching_plt[iters][1]]))#第一个小第二个大

0.4662222720933047


In [14]:
iter_comb = [(0, 1)]
adata_concat, tran_list = train_Graspot_Sub(adata_concat, verbose=True, knn_neigh = 100, n_epochs = 200, iter_comb = iter_comb,
                                                        Batch_list=Batch_list, Couple=DM, device=used_device)

STAligner(
  (conv1): GATConv(2144, 512, heads=1)
  (conv2): GATConv(512, 30, heads=1)
  (conv3): GATConv(30, 512, heads=1)
  (conv4): GATConv(512, 2144, heads=1)
)
Pretrain with STAGATE...


100%|█████████████████████████████████████████| 200/200 [00:09<00:00, 20.49it/s]


Train with STAligner...


100%|█████████████████████████████████████████| 200/200 [01:00<00:00,  3.31it/s]


In [15]:
accuracy_ot=[]
for iters,comb in enumerate(iter_comb):
    i,j=comb[0],comb[1]
    accuracy_ot.append(mapping_accuracy_ot(Batch_list[i].obs['Ground Truth'], Batch_list[j].obs['Ground Truth'], tran_list[iters].cpu().detach().numpy()))
    print(mapping_accuracy_ot(Batch_list[i].obs['Ground Truth'], Batch_list[j].obs['Ground Truth'], tran_list[iters].cpu().detach().numpy()))

0.81332463


In [16]:
accuracy_in=[]
matching_plt=[]
getMax_plt=[]
for iters,comb in enumerate(iter_comb):
    i,j=comb[0],comb[1]
    result1 = pd.DataFrame(tran_list[iters].cpu().detach().numpy())
    if tran_list[iters].shape[0] <= tran_list[iters].shape[1]:
        matching_index = np.argmax(result1.to_numpy(),axis=1)
        matching = np.array([np.arange(result1.shape[0]),matching_index])
        getMax = np.zeros_like(tran_list[iters].cpu().detach().numpy())
        #np.put_along_axis(getMax,tran_list[iters].cpu().detach().numpy().argmax(1)[:,None],1,axis=1)
        for k in range(matching.shape[1]):
            x = int(matching[:,k][0])
            y = int(matching[:,k][1])
            getMax[x][y] = 1
        matching_plt.append(matching)
        getMax_plt.append(getMax)
    if tran_list[iters].shape[0] > tran_list[iters].shape[1]:
        matching_index = np.argmax(result1.to_numpy(),axis=0)
        matching = np.array([np.arange(result1.shape[1]),matching_index])
        getMax = np.zeros_like(tran_list[iters].cpu().detach().numpy())
        for k in range(matching.shape[1]):
            x = int(matching[:,k][1])
            y = int(matching[:,k][0])
            getMax[x][y] = 1
        matching_plt.append(matching)
        getMax_plt.append(getMax)
    
    accuracy_in.append(mapping_accuracy_in(Batch_list[i].obs['Ground Truth'], Batch_list[j].obs['Ground Truth'], getMax))
    print(mapping_accuracy_in(Batch_list[i].obs['Ground Truth'], Batch_list[j].obs['Ground Truth'], getMax))

0.8053173241852487


In [17]:
from sklearn.metrics.cluster import adjusted_rand_score
#Batch_list[1].obs['Ground Truth'][matching_plt[0][1]]
for iters,comb in enumerate(iter_comb):
    i, j = comb[0], comb[1]
    if tran_list[iters].shape[0] <= tran_list[iters].shape[1]:
        print(adjusted_rand_score(Batch_list[i].obs['Ground Truth'],Batch_list[j].obs['Ground Truth'][matching_plt[iters][1]]))#第一个小第二个大
    if tran_list[iters].shape[0] > tran_list[iters].shape[1]:
        print(adjusted_rand_score(Batch_list[j].obs['Ground Truth'],Batch_list[i].obs['Ground Truth'][matching_plt[iters][1]]))#第一个小第二个大

0.5751643695040616


In [11]:
import scipy.sparse as sp
Batch_list = []
adj_list = []
section_ids = ['151669','151670']
#section_ids = ['151673','151674']
print(section_ids)

for section_id in section_ids:
    print(section_id)
    input_dir = os.path.join('data/', section_id)
    adata = sc.read_visium(path=input_dir, count_file=section_id + '_filtered_feature_bc_matrix.h5', load_images=True)
    adata.var_names_make_unique(join="++")

    # read the annotation
    Ann_df = pd.read_csv(os.path.join(input_dir, section_id + '_truth.txt'), sep='\t', header=None, index_col=0)
    Ann_df.columns = ['Ground Truth']
    Ann_df[Ann_df.isna()] = "unknown"
    adata.obs['Ground Truth'] = Ann_df.loc[adata.obs_names, 'Ground Truth'].astype('category')

    # make spot name unique
    adata.obs_names = [x+'_'+section_id for x in adata.obs_names]

    # Constructing the spatial network
    Cal_Spatial_Net(adata, rad_cutoff=150) # the spatial network are saved in adata.uns[‘adj’]
    # STAligner.Stats_Spatial_Net(adata) # plot the number of spatial neighbors

    # Normalization
    sc.pp.highly_variable_genes(adata, flavor="seurat_v3", n_top_genes=5000)
    sc.pp.normalize_total(adata, target_sum=1e4)
    sc.pp.log1p(adata)
    adata = adata[:, adata.var['highly_variable']]

    adj_list.append(adata.uns['adj'])
    Batch_list.append(adata)

['151669', '151670']
151669
------Calculating spatial graph...
The graph contains 21194 edges, 3661 cells.
5.7891 neighbors per cell on average.
151670
------Calculating spatial graph...
The graph contains 20370 edges, 3498 cells.
5.8233 neighbors per cell on average.


In [12]:
import anndata as ad
adata_concat = ad.concat(Batch_list, label="slice_name", keys=section_ids)
adata_concat.obs['Ground Truth'] = adata_concat.obs['Ground Truth'].astype('category')
adata_concat.obs["batch_name"] = adata_concat.obs["slice_name"].astype('category')

In [13]:
mapping_dict = {'Layer_1':1, 'Layer_2':2, 'Layer_3':3, 'Layer_4':4, 'Layer_5':5, 'Layer_6':6, 'WM':7}
data1 = np.array(Batch_list[0].obs['Ground Truth'].map(mapping_dict))
data2 = np.array(Batch_list[1].obs['Ground Truth'].map(mapping_dict))

gamma = 0.5

DM = np.ones((len(data1), len(data2)))
for i in range(len(data1)):
    for j in range(len(data2)):
        if data1[i] == data2[j]:
            DM[i][j] = gamma

In [14]:
iter_comb = [(0, 1)]
adata_concat, tran_list = train_Graspot(adata_concat, verbose=True, knn_neigh = 100, n_epochs = 200, iter_comb = iter_comb,
                                                        Batch_list=Batch_list, initial=True, Couple=DM, device=used_device)

STAligner(
  (conv1): GATConv(2156, 512, heads=1)
  (conv2): GATConv(512, 30, heads=1)
  (conv3): GATConv(30, 512, heads=1)
  (conv4): GATConv(512, 2156, heads=1)
)
Train with STAligner...


100%|███████████████| 200/200 [00:54<00:00,  3.68it/s]


In [17]:
accuracy_ot=[]
for iters,comb in enumerate(iter_comb):
    i,j=comb[0],comb[1]
    accuracy_ot.append(mapping_accuracy_ot(Batch_list[i].obs['Ground Truth'], Batch_list[j].obs['Ground Truth'], tran_list[iters].cpu().detach().numpy()))
    print(mapping_accuracy_ot(Batch_list[i].obs['Ground Truth'], Batch_list[j].obs['Ground Truth'], tran_list[iters].cpu().detach().numpy()))

0.9147077


In [15]:
accuracy_in=[]
matching_plt=[]
getMax_plt=[]
for iters,comb in enumerate(iter_comb):
    i,j=comb[0],comb[1]
    result1 = pd.DataFrame(tran_list[iters].cpu().detach().numpy())
    if tran_list[iters].shape[0] <= tran_list[iters].shape[1]:
        matching_index = np.argmax(result1.to_numpy(),axis=1)
        matching = np.array([np.arange(result1.shape[0]),matching_index])
        getMax = np.zeros_like(tran_list[iters].cpu().detach().numpy())
        #np.put_along_axis(getMax,tran_list[iters].cpu().detach().numpy().argmax(1)[:,None],1,axis=1)
        for k in range(matching.shape[1]):
            x = int(matching[:,k][0])
            y = int(matching[:,k][1])
            getMax[x][y] = 1
        matching_plt.append(matching)
        getMax_plt.append(getMax)
    if tran_list[iters].shape[0] > tran_list[iters].shape[1]:
        matching_index = np.argmax(result1.to_numpy(),axis=0)
        matching = np.array([np.arange(result1.shape[1]),matching_index])
        getMax = np.zeros_like(tran_list[iters].cpu().detach().numpy())
        for k in range(matching.shape[1]):
            x = int(matching[:,k][1])
            y = int(matching[:,k][0])
            getMax[x][y] = 1
        matching_plt.append(matching)
        getMax_plt.append(getMax)
    
    accuracy_in.append(mapping_accuracy_in(Batch_list[i].obs['Ground Truth'], Batch_list[j].obs['Ground Truth'], getMax))
    print(mapping_accuracy_in(Batch_list[i].obs['Ground Truth'], Batch_list[j].obs['Ground Truth'], getMax))

0.9139508290451687


In [18]:
from sklearn.metrics.cluster import adjusted_rand_score
#Batch_list[1].obs['Ground Truth'][matching_plt[0][1]]
for iters,comb in enumerate(iter_comb):
    i, j = comb[0], comb[1]
    if tran_list[iters].shape[0] <= tran_list[iters].shape[1]:
        print(adjusted_rand_score(Batch_list[i].obs['Ground Truth'],Batch_list[j].obs['Ground Truth'][matching_plt[iters][1]]))#第一个小第二个大
    if tran_list[iters].shape[0] > tran_list[iters].shape[1]:
        print(adjusted_rand_score(Batch_list[j].obs['Ground Truth'],Batch_list[i].obs['Ground Truth'][matching_plt[iters][1]]))#第一个小第二个大

0.8725726914245223


In [4]:
import scipy.sparse as sp
Batch_list = []
adj_list = []
section_ids = ['151671','151672']
#section_ids = ['151673','151674']
print(section_ids)

for section_id in section_ids:
    print(section_id)
    input_dir = os.path.join('data/', section_id)
    adata = sc.read_visium(path=input_dir, count_file=section_id + '_filtered_feature_bc_matrix.h5', load_images=True)
    adata.var_names_make_unique(join="++")

    # read the annotation
    Ann_df = pd.read_csv(os.path.join(input_dir, section_id + '_truth.txt'), sep='\t', header=None, index_col=0)
    Ann_df.columns = ['Ground Truth']
    Ann_df[Ann_df.isna()] = "unknown"
    adata.obs['Ground Truth'] = Ann_df.loc[adata.obs_names, 'Ground Truth'].astype('category')

    # make spot name unique
    adata.obs_names = [x+'_'+section_id for x in adata.obs_names]

    # Constructing the spatial network
    Cal_Spatial_Net(adata, rad_cutoff=150) # the spatial network are saved in adata.uns[‘adj’]
    # STAligner.Stats_Spatial_Net(adata) # plot the number of spatial neighbors

    # Normalization
    sc.pp.highly_variable_genes(adata, flavor="seurat_v3", n_top_genes=5000)
    sc.pp.normalize_total(adata, target_sum=1e4)
    sc.pp.log1p(adata)
    adata = adata[:, adata.var['highly_variable']]

    adj_list.append(adata.uns['adj'])
    Batch_list.append(adata)

['151671', '151672']
151671
------Calculating spatial graph...
The graph contains 24052 edges, 4110 cells.
5.8521 neighbors per cell on average.
151672
------Calculating spatial graph...
The graph contains 23382 edges, 4015 cells.
5.8237 neighbors per cell on average.


In [5]:
import anndata as ad
adata_concat = ad.concat(Batch_list, label="slice_name", keys=section_ids)
adata_concat.obs['Ground Truth'] = adata_concat.obs['Ground Truth'].astype('category')
adata_concat.obs["batch_name"] = adata_concat.obs["slice_name"].astype('category')

In [6]:
mapping_dict = {'Layer_1':1, 'Layer_2':2, 'Layer_3':3, 'Layer_4':4, 'Layer_5':5, 'Layer_6':6, 'WM':7}
data1 = np.array(Batch_list[0].obs['Ground Truth'].map(mapping_dict))
data2 = np.array(Batch_list[1].obs['Ground Truth'].map(mapping_dict))

gamma = 0.5

DM = np.ones((len(data1), len(data2)))
for i in range(len(data1)):
    for j in range(len(data2)):
        if data1[i] == data2[j]:
            DM[i][j] = gamma

In [7]:
iter_comb = [(0, 1)]
adata_concat, tran_list = train_Graspot(adata_concat, verbose=True, knn_neigh = 100, n_epochs = 200, iter_comb = iter_comb,
                                                        Batch_list=Batch_list, initial=True, Couple=DM, device=used_device)

STAligner(
  (conv1): GATConv(2171, 512, heads=1)
  (conv2): GATConv(512, 30, heads=1)
  (conv3): GATConv(30, 512, heads=1)
  (conv4): GATConv(512, 2171, heads=1)
)
Train with STAligner...


100%|███████████████| 200/200 [01:09<00:00,  2.87it/s]


In [8]:
accuracy_ot=[]
for iters,comb in enumerate(iter_comb):
    i,j=comb[0],comb[1]
    accuracy_ot.append(mapping_accuracy_ot(Batch_list[i].obs['Ground Truth'], Batch_list[j].obs['Ground Truth'], tran_list[iters].cpu().detach().numpy()))
    print(mapping_accuracy_ot(Batch_list[i].obs['Ground Truth'], Batch_list[j].obs['Ground Truth'], tran_list[iters].cpu().detach().numpy()))

0.8520578


In [9]:
accuracy_in=[]
matching_plt=[]
getMax_plt=[]
for iters,comb in enumerate(iter_comb):
    i,j=comb[0],comb[1]
    result1 = pd.DataFrame(tran_list[iters].cpu().detach().numpy())
    if tran_list[iters].shape[0] <= tran_list[iters].shape[1]:
        matching_index = np.argmax(result1.to_numpy(),axis=1)
        matching = np.array([np.arange(result1.shape[0]),matching_index])
        getMax = np.zeros_like(tran_list[iters].cpu().detach().numpy())
        #np.put_along_axis(getMax,tran_list[iters].cpu().detach().numpy().argmax(1)[:,None],1,axis=1)
        for k in range(matching.shape[1]):
            x = int(matching[:,k][0])
            y = int(matching[:,k][1])
            getMax[x][y] = 1
        matching_plt.append(matching)
        getMax_plt.append(getMax)
    if tran_list[iters].shape[0] > tran_list[iters].shape[1]:
        matching_index = np.argmax(result1.to_numpy(),axis=0)
        matching = np.array([np.arange(result1.shape[1]),matching_index])
        getMax = np.zeros_like(tran_list[iters].cpu().detach().numpy())
        for k in range(matching.shape[1]):
            x = int(matching[:,k][1])
            y = int(matching[:,k][0])
            getMax[x][y] = 1
        matching_plt.append(matching)
        getMax_plt.append(getMax)
    
    accuracy_in.append(mapping_accuracy_in(Batch_list[i].obs['Ground Truth'], Batch_list[j].obs['Ground Truth'], getMax))
    print(mapping_accuracy_in(Batch_list[i].obs['Ground Truth'], Batch_list[j].obs['Ground Truth'], getMax))

0.851307596513076


In [10]:
from sklearn.metrics.cluster import adjusted_rand_score
#Batch_list[1].obs['Ground Truth'][matching_plt[0][1]]
for iters,comb in enumerate(iter_comb):
    i, j = comb[0], comb[1]
    if tran_list[iters].shape[0] <= tran_list[iters].shape[1]:
        print(adjusted_rand_score(Batch_list[i].obs['Ground Truth'],Batch_list[j].obs['Ground Truth'][matching_plt[iters][1]]))#第一个小第二个大
    if tran_list[iters].shape[0] > tran_list[iters].shape[1]:
        print(adjusted_rand_score(Batch_list[j].obs['Ground Truth'],Batch_list[i].obs['Ground Truth'][matching_plt[iters][1]]))#第一个小第二个大

0.7340198291841303
