In [1]:
import scanpy as sc
import pandas as pd
import numpy as np

In [2]:
def write2csv(method, adata):
    columns = ['Sample', 'method', 'cluster_method', 'spatial_coherence_score', 
              'ARI', 'NMI', 'cASW', 'cLISI', 'IsolatedLabelsAsw', 'IsolatedLabelsF1', 
              'iASW', 'iLISI', 'graph_connectivity', 'kBET', 'PCR', "Moran's I", "Geary's C"]
    df = pd.DataFrame(data = None, columns = columns)
    df['Sample'] = np.repeat('embryo1', 4)
    df['method'] = np.repeat(method, 4)
    df['cluster_method'] = ['ground.truth', 'leiden', 'louvain', 'mclust']
    for column in columns:
        if column == 'spatial_coherence_score':
            temp_column = 'scs'
        elif column == "Moran's I":
            temp_column = 'moran'
        elif column == "Geary's C":
            temp_column = 'geary'
        else:
            temp_column = column
        for key in adata.uns.keys():
            if key.find(temp_column) != -1:
                if key.find('leiden') != -1:
                    df.loc[ [1], [column] ] = adata.uns[key]
                elif key.find('louvain') != -1:
                    df.loc[ [2], [column] ] = adata.uns[key]
                elif key.find('mclust') != -1:
                    df.loc[ [3], [column] ] = adata.uns[key]
                else:
                    df.loc[ [0], [column] ] = adata.uns[key]
    
    return df

# import os
# if os.path.exists(output_dir + "embedding_index.csv"):
#     df_other = pd.read_csv(output_dir + "embedding_index.csv", index_col = 0)
#     df = pd.concat([df_other,df],axis=0)
# df.to_csv(output_dir + "embedding_index.csv")

In [3]:
def write_scs(method, adata, samples):
    columns = ['method', 'cluster_method'] + samples
    df = pd.DataFrame(data = None, columns = columns)
    df['method'] = np.repeat(method, 4)
    cluster_method = ['ground.truth', 'leiden', 'louvain', 'mclust']
    df['cluster_method'] = cluster_method
    for key in adata.uns.keys():
        if key.find('scs') != -1:
            row = 0
            for m in cluster_method:
                if key.find(m) != -1:
                    row = cluster_method.index(m)
                    break
            for sample in samples:
                if key.find(sample) != -1:
                    col = sample
            df.loc[ [row], [col] ] = adata.uns[key]
    return df

# method = 'SPIRAL'
# experiment = 'donor3'
# input_dir = "G:/dataset/1_DLPFC/output/"+experiment+"/embedding/"+method+"/"
# adata = sc.read_h5ad(input_dir + method + '_K7_result.h5ad')
# write_scs(method, adata, [str(i) for i in range(151673,151677)]).to_csv("G:/dataset/1_DLPFC/output/donor1/test.csv")

In [4]:
from sklearn.metrics import adjusted_rand_score
def write2ARI(adatas, method, annotation):
    groups = adatas.obs.groupby("batch").indices
    adata_list = [adatas[inds] for inds in groups.values()]
    adata_list.insert(0, adatas)

    batchs = ['integrated'] + list(groups.keys())
#     batchs = list(groups.keys())

    df = pd.DataFrame(data = None, columns = batchs)
    df['method'] = np.repeat(method, 3)
    df['cluster_method'] = ['leiden', 'louvain', 'mclust']

    for i in range(len(batchs)):
        df.loc[ [0], [batchs[i]] ] = adjusted_rand_score(adata_list[i].obs['leiden_'+method],  adata_list[i].obs[annotation])
        df.loc[ [1], [batchs[i]] ] = adjusted_rand_score(adata_list[i].obs['louvain_'+method],  adata_list[i].obs[annotation])
        df.loc[ [2], [batchs[i]] ] = adjusted_rand_score(adata_list[i].obs['mclust_'+method],  adata_list[i].obs[annotation])

    return df

# input_dir = "G:/dataset/04-PDAC/output/result/embedding/"
# output_dir = "G:/dataset/04-PDAC/output/result/embedding/"
# methods  = ['seurat', 'harmony', 'liger', 'PRECAST', 'STAGATE', 'STAligner', 'DeepST', 'GraphST', 'SPIRAL']
# annotation = 'Classification'
# df_res = pd.DataFrame(data = None)
# for method in methods:
#     adata = sc.read_h5ad(input_dir + method + '_K5_result.h5ad')
#     df = write2ARI(adata, method, annotation)
#     df_res = pd.concat([df_res, df],axis=0)

# df_res

In [None]:
method = 'harmony'
experiment = 'donor1'
input_dir = "G:/dataset/1_DLPFC/output/"+experiment+"/embedding/"+method+"/"
adata = sc.read_h5ad(input_dir + method + '_K7_result.h5ad')
adata

In [None]:
# input_dir = "G:/dataset/2_saggital/output/result/embedding/sample1/"
# output_dir = "G:/dataset/2_saggital/output/result/embedding/"
input_dir = "G:/dataset/2_saggital/output/result/embedding/all/"
output_dir = "G:/dataset/2_saggital/output/result/embedding/all/"
methods  = ['seurat', 'harmony', 'liger', 'PRECAST', 'STAGATE', 'STAligner', 'DeepST', 'GraphST', 'SPIRAL', 'SPIRAL_pyg']
method = 'seurat'
adata = sc.read_h5ad(input_dir + method + '_K12_result.h5ad')
adata

In [None]:
input_dir = "G:/dataset/3_BRCA/output/result/embedding/"
output_dir = "G:/dataset/3_BRCA/output/result/embedding/"
methods  = ['seurat', 'harmony', 'liger', 'PRECAST', 'STAGATE', 'STAligner', 'DeepST', 'GraphST', 'SPIRAL']
df_index_all = pd.DataFrame(data = None)
df_index_scs = pd.DataFrame(data = None)
for method in methods:
    input_dir + method + '_h5ad/' + method + '_K0_result.h5ad'
    adata = sc.read_h5ad(input_dir + method + '_h5ad/' + method + '_K0_result.h5ad')
    df1 = write2csv(method, adata)
    df2 = write_scs(method, adata, ['1142243F', '1160920F'])
    df_index_all = pd.concat([df_index_all, df1],axis=0)
    df_index_scs = pd.concat([df_index_scs, df2],axis=0)

In [None]:
input_dir = "G:/dataset/04-PDAC/output/result/embedding/"
output_dir = "G:/dataset/04-PDAC/output/result/embedding/"
methods  = ['seurat', 'harmony', 'liger', 'PRECAST', 'STAGATE', 'STAligner', 'DeepST', 'GraphST', 'SPIRAL']
df_index_all = pd.DataFrame(data = None)
df_index_scs = pd.DataFrame(data = None)
for method in methods:
    print(method)
    adata = sc.read_h5ad(input_dir + method + '_K5_result.h5ad')
    print(adata)
#     df1 = write2csv(method, adata)
    df2 = write_scs(method, adata, ['A1', 'B1'])
    df_index_all = pd.concat([df_index_all, df1],axis=0)
#     break
    df_index_scs = pd.concat([df_index_scs, df2],axis=0)

df_index_scs

In [None]:
input_dir = "G:/dataset/06-CoronalMouseBrain/output/result/embedding/"
output_dir = "G:/dataset/06-CoronalMouseBrain/output/result/embedding/"
# methods  = ['seurat', 'harmony', 'liger', 'PRECAST', 'STAGATE', 'STAligner', 'DeepST', 'GraphST', 'SPIRAL', 'SPIRAL_pyg']
methods  = ['GraphST']
annotation = 'region'
df_index_all = pd.DataFrame(data = None)
df_index_scs = pd.DataFrame(data = None)
df_index_ARI = pd.DataFrame(data = None)
for method in methods:
    adata = sc.read_h5ad(input_dir + method + '_h5ad/' + method + '_K6_result.h5ad')
    df1 = write2csv(method, adata)
    df2 = write_scs(method, adata, ['DAPI', 'FFPE', 'Normal'])
    df3 = write2ARI(adata, method, annotation)
    df_index_all = pd.concat([df_index_all, df1],axis=0)
    df_index_scs = pd.concat([df_index_scs, df2],axis=0)
    df_index_ARI = pd.concat([df_index_ARI, df3],axis=0)

# df_index_ARI
df_index_all.to_csv(output_dir+'index_embedding_GraphST.csv')
df_index_scs.to_csv(output_dir+'index_scs_GraphST.csv')
df_index_ARI.to_csv(output_dir+'index_ARI_GraphST.csv')

In [None]:
input_dir = "G:/dataset/08-STARmap/output/result/embedding/"
output_dir = "G:/dataset/08-STARmap/output/result/embedding/"
methods  = ['seurat', 'harmony', 'liger', 'PRECAST', 'STAGATE', 'STAligner', 'DeepST', 'GraphST', 'SPIRAL']
annotation = 'z'
df_index_all = pd.DataFrame(data = None)
df_index_scs = pd.DataFrame(data = None)
df_index_ARI = pd.DataFrame(data = None)
for method in methods:
    adata = sc.read_h5ad(input_dir + method + '/' + method + '_K4_result.h5ad')
    df1 = write2csv(method, adata)
    df2 = write_scs(method, adata, ['BZ5', 'BZ9', 'BZ14'])
#     df3 = write2ARI(adata, method, annotation)
    df_index_all = pd.concat([df_index_all, df1],axis=0)
    df_index_scs = pd.concat([df_index_scs, df2],axis=0)
#     df_index_ARI = pd.concat([df_index_ARI, df],axis=0)

In [None]:
input_dir = "G:/dataset/11-MOSTA/output/MOSTA_h5ad/"
output_dir = "G:/dataset/11-MOSTA/output/MOSTA_h5ad/result/"
methods  = ['seurat', 'harmony', 'liger', 'PRECAST', 'STAGATE', 'STAligner', 'DeepST', 'GraphST', 'SPIRAL_pyg']
annotation = 'annotation'
df_index_all = pd.DataFrame(data = None)
df_index_scs = pd.DataFrame(data = None)
df_index_ARI = pd.DataFrame(data = None)
for method in methods:
    adata = sc.read_h5ad(input_dir + method + '/' + method + '_K4_result.h5ad')
    df1 = write2csv(method, adata)
    df2 = write_scs(method, adata, ['E9.5_E1S1', 'E10.5_E2S1', 'E11.5_E1S1'])
    df3 = write2ARI(adata, method, annotation)
    df_index_all = pd.concat([df_index_all, df1],axis=0)
    df_index_scs = pd.concat([df_index_scs, df2],axis=0)
    df_index_ARI = pd.concat([df_index_ARI, df],axis=0)

In [7]:
input_dir = "G:/dataset/12-sim/2batch_6celltype/sim_h5ad/"
output_dir = "G:/dataset/12-sim/2batch_6celltype/sim_h5ad/"
methods  = ['seurat', 'harmony', 'liger', 'PRECAST', 'STAGATE', 'STAligner', 'DeepST', 'GraphST', 'SPIRAL']
annotation = 'celltype'
df_index_all = pd.DataFrame(data = None)
df_index_scs = pd.DataFrame(data = None)
df_index_ARI = pd.DataFrame(data = None)
for method in methods:
    adata = sc.read_h5ad(input_dir + method + '_K6_result.h5ad')
    df1 = write2csv(method, adata)
    df2 = write_scs(method, adata, ['batch1', 'batch2'])
    df3 = write2ARI(adata, method, annotation)
    df_index_all = pd.concat([df_index_all, df1],axis=0)
    df_index_scs = pd.concat([df_index_scs, df2],axis=0)
    df_index_ARI = pd.concat([df_index_ARI, df3],axis=0)

print(df_index_all)
print(df_index_scs)
print(df_index_ARI)
# df_index_ARI.to_csv(output_dir + 'index_k'+str(k)+'_ARI.csv')

    Sample     method cluster_method spatial_coherence_score       ARI  \
0  embryo1     seurat   ground.truth              -74.477891       1.0   
1  embryo1     seurat         leiden               -7.756493  0.424027   
2  embryo1     seurat        louvain              -13.477764  0.524732   
3  embryo1     seurat         mclust               -0.378494   0.00005   
0  embryo1    harmony   ground.truth              -74.290115       1.0   
1  embryo1    harmony         leiden               -1.579991   0.10546   
2  embryo1    harmony        louvain               -2.105877  0.110681   
3  embryo1    harmony         mclust               -0.550635   0.00005   
0  embryo1      liger   ground.truth              -81.013075       1.0   
1  embryo1      liger         leiden                1.045984  0.099244   
2  embryo1      liger        louvain                2.184568  0.086509   
3  embryo1      liger         mclust                0.246087  0.002035   
0  embryo1    PRECAST   ground.truth  

In [8]:
df_index_all
df_index_all.to_csv(output_dir + 'index_embedding.csv')

df_index_scs
df_index_scs.to_csv(output_dir + 'index_scs.csv')

df_index_ARI
df_index_ARI.to_csv(output_dir + 'index_ARI.csv')

In [None]:
adatas

In [None]:
output_dir = 'G:/dataset/05-LICA/output/result/embedding/k4/'
adatas = sc.read_h5ad(r'G:\dataset\05-LICA\output\result\embedding\SPIRAL_K4_result.h5ad')
method = 'SPIRAL'
write2csv(method, adatas).to_csv(output_dir+'SPIRAL_embedding_index.csv')
write_scs(method, adatas, ['HCC-5A', 'HCC-5B', 'HCC-5C','HCC-5D']).to_csv(output_dir+'SPIRAL_scs_index.csv')
write2ARI(adatas, method, 'region').to_csv(output_dir+'SPIRAL_ARI_index.csv')

In [None]:
input_dir = 'G:/dataset/05-LICA/output/result/embedding/'
output_dir = "G:/dataset/05-LICA/output/result/embedding/"
methods  = ['seurat', 'harmony', 'liger', 'PRECAST', 'STAGATE', 'STAligner', 'DeepST', 'GraphST', 'SPIRAL']
annotation = 'region'
df_res = pd.DataFrame(data = None)
for method in methods:
    adata = sc.read_h5ad(input_dir + method + '_K3_result.h5ad')
    df = write2ARI(adata, method, annotation)
    df_res = pd.concat([df_res, df],axis=0)

df_res
df_res.to_csv(output_dir + 'ARI_index.csv')

In [None]:
df_res.to_csv(output_dir + 'index_ARI_K5.csv')

# spatial index

In [None]:
df = pd.read_csv(r'G:\dataset\07-seqFish-SpatialMouseAtlas\output\result\test.csv',header = None)
df.groupby([0, 1, 2]).aggregate("mean").to_csv(r'G:\dataset\07-seqFish-SpatialMouseAtlas\output\result\moran_geary.csv')
# df

##  test

In [None]:
columns = ['Sample', 'method', 'cluster_method', 'spatial_coherence_score', 
              'ARI', 'NMI', 'cASW', 'cLISI', 'IsolatedLabelsAsw', 'IsolatedLabelsF1', 
              'iASW', 'iLISI', 'graph_connectivity', 'kBET', 'PCR', "Moran's I", "Geary's C"]
df = pd.DataFrame(data = None, columns = columns)
df['Sample'] = np.repeat('LICA', 4)
df['method'] = np.repeat(method, 4)
df['cluster_method'] = ['ground.truth', 'leiden', 'louvain', 'mclust']
df

In [None]:
for column in columns:
    if column == 'spatial_coherence_score':
        temp_column = 'scs'
    elif column == "Moran's I":
        temp_column = 'moran'
    elif column == "Geary's C":
        temp_column = 'geary'
    else:
        temp_column = column
    for key in adata.uns.keys():
        if key.find(temp_column) != -1:
            if key.find('leiden') != -1:
                df.loc[ [1], [column] ] = adata.uns[key]
            elif key.find('louvain') != -1:
                df.loc[ [2], [column] ] = adata.uns[key]
            elif key.find('mclust') != -1:
                df.loc[ [3], [column] ] = adata.uns[key]
            else:
                df.loc[ [0], [column] ] = adata.uns[key]

df

In [None]:
from sklearn.metrics import adjusted_rand_score

input_dir = "G:/dataset/3_BRCA/output/result/embedding/"
output_dir = "G:/dataset/3_BRCA/output/result/embedding/"
methods  = ['seurat', 'harmony', 'liger', 'PRECAST', 'STAGATE', 'STAligner', 'DeepST', 'GraphST', 'SPIRAL']
annotation = 'Classification'

df_res = pd.DataFrame(data = None)
for method in methods:
    adatas = sc.read_h5ad(input_dir + method + '_h5ad/' + method + '_K11_result.h5ad')
    print(adatas)


    groups = adatas.obs.groupby("batch").indices
    adata_list = [adatas[inds] for inds in groups.values()]
    adata_list.insert(0, adatas)

    batchs = ['integrated'] + list(groups.keys())
#     batchs = list(groups.keys())
    print(batchs)

    df = pd.DataFrame(data = None, columns = batchs)
    df['method'] = np.repeat(method, 3)
    df['cluster_method'] = ['leiden', 'louvain', 'mclust']

    for i in range(len(batchs)):
        df.loc[ [0], [batchs[i]] ] = adjusted_rand_score(adata_list[i].obs['leiden_'+method],  adata_list[i].obs[annotation])
        df.loc[ [1], [batchs[i]] ] = adjusted_rand_score(adata_list[i].obs['louvain_'+method],  adata_list[i].obs[annotation])
        df.loc[ [2], [batchs[i]] ] = adjusted_rand_score(adata_list[i].obs['mclust_'+method],  adata_list[i].obs[annotation])

    df_res = pd.concat([df_res,df],axis=0)
    
df_res
df_res.to_csv(output_dir + 'index_ARI_K11.csv')
# ari=adjusted_rand_score(labels_true, labels_pred)  

In [None]:
df_res.to_csv(output_dir + 'index_ARI_K11.csv')