In [None]:
'''
计算各方法的得分（综合相关指标），对应论文中的图3
'''

In [23]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from functools import reduce

### 局部得分

In [32]:
'''
所有真实数据集：局部得分
'''
fragments = ['scDesign3','TI','SIMLR','VASC','benchmarker']
all_dfs = []
for fragment in fragments:
    if fragment == 'scDesign3':
        datasets = ['ACINAR','ATAC','BATCH','CITE','EMBYRO','MARROW','MOBSC','OVARIAN','PANCREAS','SCGEMMETH','SCGEMRNA','SCIATAC', 'VISIUM', 'IFNB','SLIDE']
    elif fragment == 'TI':
        datasets = ['DA1_horns','DC3_VA1d_horns','horns','blastocyst','epiblast','ICM','trophectoderm','combination-1','combination-2','combination-3','epidermis','muscle','neuron','pair-1','pair-2','pair-3','pair-4','parenchyme','phagocyte','pharynx']
    elif fragment == 'SIMLR':
        datasets = ['Pollen','Usoskin']
    elif fragment == 'VASC':
        datasets = ['Baron','Biase','Goolam','Xin','yan','Zeisel']
    elif fragment == 'benchmarker':
        datasets = ['Baron_mouse','Darmanis','Pancreatic','Silver','Zhengmix4eq']

    for dataset in datasets:
        datapath1 = f'/home/henu/work/result/metric/{fragment}/{dataset}/dr1.csv'
        datapath2 = f'/home/henu/work/result/metric/{fragment}/{dataset}/dr2.csv'
        df1 = pd.read_csv(datapath1)
        df2 = pd.read_csv(datapath2)
        df_merge = pd.merge(df1, df2, on="Method",how="outer")

        df_merge.insert(1, "Dataset", dataset)  # 加一列标记数据集名
        all_dfs.append(df_merge)

# 合并所有数据集
df_all = pd.concat(all_dfs, ignore_index=True)

print("合并后的数据：")
print(df_all.head())


metric_cols = [c for c in df_all.columns if c not in ["Method", "Dataset",'random_triplet','spearman','k-nearest','centroid_distance','AUC','Qlocal','Qglobal','kmax']]

# -------- Step1: 指标标准化（所有方法+数据集一起 min-max）--------
scaler = MinMaxScaler()
df_all[metric_cols] = scaler.fit_transform(df_all[metric_cols])
# print(df_all[['Method']+metric_cols])

# -------- Step2: 每个 (方法×数据集) 的指标平均 --------
df_all["Accuracy_subscore"] = df_all[metric_cols].mean(axis=1)
# print(df_all[['Method','Accuracy_subscore']])

# -------- Step3: 跨所有数据集取平均，得到每个方法的最终 Accuracy --------
accuracy_scores = df_all.groupby("Method")["Accuracy_subscore"].mean()

# 重命名列名为 'score'
accuracy_scores = accuracy_scores.rename_axis('Method').reset_index()
accuracy_scores.rename(columns={"Accuracy_subscore": "score"}, inplace=True)

print("\n最终 Accuracy 分数：")
print(accuracy_scores)

# 保存结果
output_file = "/home/henu/work/result/score/local.csv"
accuracy_scores.to_csv(output_file, index=False)
print(f"结果已保存到文件：{output_file}")

合并后的数据：
   Method Dataset  knn_10  knn_20  knn_30    svm  nkr_10  nkr_20  nkr_30  \
0     DRA  ACINAR   0.136   0.128   0.137  0.145   0.003   0.007   0.010   
1   DREAM  ACINAR   0.127   0.135   0.128  0.136   0.003   0.007   0.010   
2    EDGE  ACINAR   0.467   0.470   0.464  0.432   0.045   0.072   0.094   
3  GLMPCA  ACINAR   0.577   0.590   0.594  0.588   0.064   0.094   0.117   
4     PCA  ACINAR   0.475   0.497   0.490  0.499   0.069   0.105   0.131   

   aji_10  ...    AUC  Qlocal  Qglobal   T_10   T_20   T_30   C_10   C_20  \
0   0.002  ...  0.502   0.326    0.827  0.504  0.503  0.504  0.502  0.502   
1   0.002  ...  0.503   0.275    0.776  0.499  0.502  0.503  0.496  0.498   
2   0.022  ...  0.556   0.233    0.607  0.797  0.792  0.789  0.682  0.676   
3   0.032  ...  0.665   0.301    0.732  0.745  0.747  0.748  0.918  0.909   
4   0.035  ...  0.658   0.317    0.716  0.754  0.754  0.754  0.912  0.904   

    C_30  kmax  
0  0.503  1974  
1  0.500  1659  
2  0.671   410  
3  0

### 全局得分

In [33]:
'''
所有真实数据集：全局得分
'''
fragments = ['scDesign3','TI','SIMLR','VASC','benchmarker']
all_dfs = []
for fragment in fragments:
    if fragment == 'scDesign3':
        datasets = ['ACINAR','ATAC','BATCH','CITE','EMBYRO','MARROW','MOBSC','OVARIAN','PANCREAS','SCGEMMETH','SCGEMRNA','SCIATAC', 'VISIUM', 'IFNB','SLIDE']
    elif fragment == 'TI':
        datasets = ['DA1_horns','DC3_VA1d_horns','horns','blastocyst','epiblast','ICM','trophectoderm','combination-1','combination-2','combination-3','epidermis','muscle','neuron','pair-1','pair-2','pair-3','pair-4','parenchyme','phagocyte','pharynx']
    elif fragment == 'SIMLR':
        datasets = ['Pollen','Usoskin']
    elif fragment == 'VASC':
        datasets = ['Baron','Biase','Goolam','Xin','yan','Zeisel']
    elif fragment == 'benchmarker':
        datasets = ['Baron_mouse','Darmanis','Pancreatic','Silver','Zhengmix4eq']

    for dataset in datasets:
        datapath1 = f'/home/henu/work/result/metric/{fragment}/{dataset}/dr1.csv'
        datapath2 = f'/home/henu/work/result/metric/{fragment}/{dataset}/dr2.csv'
        df1 = pd.read_csv(datapath1)
        df2 = pd.read_csv(datapath2)
        df_merge = pd.merge(df1, df2, on="Method",how="outer")

        df_merge.insert(1, "Dataset", dataset)  # 加一列标记数据集名
        all_dfs.append(df_merge)

# 合并所有数据集
df_all = pd.concat(all_dfs, ignore_index=True)

print("合并后的数据：")
print(df_all.head())

metric_cols = [c for c in df_all.columns if c not in ["Method", "Dataset",'knn_10','knn_20','knn_30','svm','nkr_10','nkr_20','nkr_30','aji_10','aji_20','aji_30','AUC','Qlocal','Qglobal','kmax','T_10','T_20','T_30','C_10','C_20','C_30']]

# -------- Step1: 指标标准化（所有方法+数据集一起 min-max）--------
scaler = MinMaxScaler()
df_all[metric_cols] = scaler.fit_transform(df_all[metric_cols])
# print(df_all[['Method']+metric_cols])

# -------- Step2: 每个 (方法×数据集) 的指标平均 --------
df_all["Accuracy_subscore"] = df_all[metric_cols].mean(axis=1)
# print(df_all[['Method','Accuracy_subscore']])

# -------- Step3: 跨所有数据集取平均，得到每个方法的最终 Accuracy --------
accuracy_scores = df_all.groupby("Method")["Accuracy_subscore"].mean()

# 重命名列名为 'score'
accuracy_scores = accuracy_scores.rename_axis('Method').reset_index()
accuracy_scores.rename(columns={"Accuracy_subscore": "score"}, inplace=True)

print("\n最终 Accuracy 分数：")
print(accuracy_scores)

# 保存结果
output_file = "/home/henu/work/result/score/global.csv"
accuracy_scores.to_csv(output_file, index=False)
print(f"结果已保存到文件：{output_file}")


合并后的数据：
   Method Dataset  knn_10  knn_20  knn_30    svm  nkr_10  nkr_20  nkr_30  \
0     DRA  ACINAR   0.136   0.128   0.137  0.145   0.003   0.007   0.010   
1   DREAM  ACINAR   0.127   0.135   0.128  0.136   0.003   0.007   0.010   
2    EDGE  ACINAR   0.467   0.470   0.464  0.432   0.045   0.072   0.094   
3  GLMPCA  ACINAR   0.577   0.590   0.594  0.588   0.064   0.094   0.117   
4     PCA  ACINAR   0.475   0.497   0.490  0.499   0.069   0.105   0.131   

   aji_10  ...    AUC  Qlocal  Qglobal   T_10   T_20   T_30   C_10   C_20  \
0   0.002  ...  0.502   0.326    0.827  0.504  0.503  0.504  0.502  0.502   
1   0.002  ...  0.503   0.275    0.776  0.499  0.502  0.503  0.496  0.498   
2   0.022  ...  0.556   0.233    0.607  0.797  0.792  0.789  0.682  0.676   
3   0.032  ...  0.665   0.301    0.732  0.745  0.747  0.748  0.918  0.909   
4   0.035  ...  0.658   0.317    0.716  0.754  0.754  0.754  0.912  0.904   

    C_30  kmax  
0  0.503  1974  
1  0.500  1659  
2  0.671   410  
3  0

### 聚类得分

In [34]:
'''
所有真实数据集：聚类得分
kmeans
'''
fragments = ['scDesign3','TI','SIMLR','VASC','benchmarker']
all_dfs = []
cluster_method = 'kmeans'
for fragment in fragments:
    if fragment == 'scDesign3':
        datasets = ['ACINAR','ATAC','BATCH','CITE','EMBYRO','MARROW','MOBSC','OVARIAN','PANCREAS','SCGEMMETH','SCGEMRNA','SCIATAC', 'VISIUM', 'IFNB','SLIDE']
    elif fragment == 'TI':
        datasets = ['DA1_horns','DC3_VA1d_horns','horns','blastocyst','epiblast','ICM','trophectoderm','combination-1','combination-2','combination-3','epidermis','muscle','neuron','pair-1','pair-2','pair-3','pair-4','parenchyme','phagocyte','pharynx']
    elif fragment == 'SIMLR':
        datasets = ['Pollen','Usoskin']
    elif fragment == 'VASC':
        datasets = ['Baron','Biase','Goolam','Xin','yan','Zeisel']
    elif fragment == 'benchmarker':
        datasets = ['Baron_mouse','Darmanis','Pancreatic','Silver','Zhengmix4eq']

    for dataset in datasets:
        datapath1 = f'/home/henu/work/result/cluster/{fragment}/{dataset}/indicators/{cluster_method}_ARI.csv'
        datapath2 = f'/home/henu/work/result/cluster/{fragment}/{dataset}/indicators/{cluster_method}_NMI.csv'
        datapath3 = f'/home/henu/work/result/cluster/{fragment}/{dataset}/indicators/{cluster_method}_SIL.csv'
        datapath4 = f'/home/henu/work/result/cluster/{fragment}/{dataset}/indicators/{cluster_method}_COMP.csv'
        datapath5 = f'/home/henu/work/result/cluster/{fragment}/{dataset}/indicators/{cluster_method}_HOMO.csv'
        df1 = pd.read_csv(datapath1)
        df2 = pd.read_csv(datapath2)
        df3 = pd.read_csv(datapath3)
        df4 = pd.read_csv(datapath4)
        df5 = pd.read_csv(datapath5)
        df_merge = reduce(lambda left, right: pd.merge(left, right, on="Method", how="outer"), [df1, df2, df3, df4, df5])

        df_merge.insert(1, "Dataset", dataset)  # 加一列标记数据集名
        all_dfs.append(df_merge)

# 合并所有数据集
df_all = pd.concat(all_dfs, ignore_index=True)

print("合并后的数据：")
print(df_all.head())

metric_cols = [c for c in df_all.columns if c not in ["Method", "Dataset"]]

# -------- Step1: 指标标准化（所有方法+数据集一起 min-max）--------
scaler = MinMaxScaler()
df_all[metric_cols] = scaler.fit_transform(df_all[metric_cols])
# print(df_all[['method']+metric_cols])

# -------- Step2: 每个 (方法×数据集) 的指标平均 --------
df_all["Accuracy_subscore"] = df_all[metric_cols].mean(axis=1)
# print(df_all[['Method','Accuracy_subscore']])

# -------- Step3: 跨所有数据集取平均，得到每个方法的最终 Accuracy --------
accuracy_scores = df_all.groupby("Method")["Accuracy_subscore"].mean()

# 重命名列名为 'score'
accuracy_scores = accuracy_scores.rename_axis('Method').reset_index()
accuracy_scores.rename(columns={"Accuracy_subscore": "score"}, inplace=True)

print("\n最终 Accuracy 分数：")
print(accuracy_scores)

# 保存结果
output_file = "/home/henu/work/result/score/kmeans.csv"
accuracy_scores.to_csv(output_file, index=False)
print(f"结果已保存到文件：{output_file}")


合并后的数据：
   Method Dataset   ARI   NMI   SIL  COMP  HOMO
0     DRA  ACINAR  0.00  0.01  0.35  0.01  0.01
1   DREAM  ACINAR  0.00  0.01  0.32  0.01  0.01
2    EDGE  ACINAR  0.14  0.23  0.56  0.23  0.23
3  GLMPCA  ACINAR  0.29  0.45  0.36  0.45  0.44
4     PCA  ACINAR  0.15  0.31  0.35  0.32  0.30

最终 Accuracy 分数：
               Method     score
0                 DRA  0.162800
1               DREAM  0.462572
2                EDGE  0.486485
3              GLMPCA  0.608917
4                 PCA  0.549690
5               PHATE  0.602700
6              PaCMAP  0.306871
7   ParametricUMAP200  0.590043
8    ParametricUMAP50  0.589401
9              SAUCIE  0.356084
10             SCDRHA  0.518872
11              SIMLR  0.611821
12               SPDR  0.605149
13          SQuaD_MDS  0.453330
14   SQuaD_MDS_hybrid  0.534743
15             SSNMDI  0.266731
16               TSNE  0.606920
17             TriMap  0.613567
18               UMAP  0.578703
19                VAE  0.460403
20             

In [35]:
'''
所有真实数据集：聚类得分
louvain
'''
fragments = ['scDesign3','TI','SIMLR','VASC','benchmarker']
all_dfs = []
cluster_method = 'louvain'
for fragment in fragments:
    if fragment == 'scDesign3':
        datasets = ['ACINAR','ATAC','BATCH','CITE','EMBYRO','MARROW','MOBSC','OVARIAN','PANCREAS','SCGEMMETH','SCGEMRNA','SCIATAC', 'VISIUM', 'IFNB','SLIDE']
    elif fragment == 'TI':
        datasets = ['DA1_horns','DC3_VA1d_horns','horns','blastocyst','epiblast','ICM','trophectoderm','combination-1','combination-2','combination-3','epidermis','muscle','neuron','pair-1','pair-2','pair-3','pair-4','parenchyme','phagocyte','pharynx']
    elif fragment == 'SIMLR':
        datasets = ['Pollen','Usoskin']
    elif fragment == 'VASC':
        datasets = ['Baron','Biase','Goolam','Xin','yan','Zeisel']
    elif fragment == 'benchmarker':
        datasets = ['Baron_mouse','Darmanis','Pancreatic','Silver','Zhengmix4eq']

    for dataset in datasets:
        datapath1 = f'/home/henu/work/result/cluster/{fragment}/{dataset}/indicators/{cluster_method}_ARI.csv'
        datapath2 = f'/home/henu/work/result/cluster/{fragment}/{dataset}/indicators/{cluster_method}_NMI.csv'
        datapath3 = f'/home/henu/work/result/cluster/{fragment}/{dataset}/indicators/{cluster_method}_SIL.csv'
        datapath4 = f'/home/henu/work/result/cluster/{fragment}/{dataset}/indicators/{cluster_method}_COMP.csv'
        datapath5 = f'/home/henu/work/result/cluster/{fragment}/{dataset}/indicators/{cluster_method}_HOMO.csv'
        df1 = pd.read_csv(datapath1)
        df2 = pd.read_csv(datapath2)
        df3 = pd.read_csv(datapath3)
        df4 = pd.read_csv(datapath4)
        df5 = pd.read_csv(datapath5)
        df_merge = reduce(lambda left, right: pd.merge(left, right, on="Method", how="outer"), [df1, df2, df3, df4, df5])

        df_merge.insert(1, "Dataset", dataset)  # 加一列标记数据集名
        all_dfs.append(df_merge)

# 合并所有数据集
df_all = pd.concat(all_dfs, ignore_index=True)

print("合并后的数据：")
print(df_all.head())

metric_cols = [c for c in df_all.columns if c not in ["Method", "Dataset"]]

# -------- Step1: 指标标准化（所有方法+数据集一起 min-max）--------
scaler = MinMaxScaler()
df_all[metric_cols] = scaler.fit_transform(df_all[metric_cols])
# print(df_all[['method']+metric_cols])

# -------- Step2: 每个 (方法×数据集) 的指标平均 --------
df_all["Accuracy_subscore"] = df_all[metric_cols].mean(axis=1)
# print(df_all[['Method','Accuracy_subscore']])

# -------- Step3: 跨所有数据集取平均，得到每个方法的最终 Accuracy --------
accuracy_scores = df_all.groupby("Method")["Accuracy_subscore"].mean()

# 重命名列名为 'score'
accuracy_scores = accuracy_scores.rename_axis('Method').reset_index()
accuracy_scores.rename(columns={"Accuracy_subscore": "score"}, inplace=True)

print("\n最终 Accuracy 分数：")
print(accuracy_scores)

# 保存结果
output_file = "/home/henu/work/result/score/louvain.csv"
accuracy_scores.to_csv(output_file, index=False)
print(f"结果已保存到文件：{output_file}")


合并后的数据：
   Method Dataset   ARI   NMI   SIL  COMP  HOMO
0     DRA  ACINAR  0.00  0.02  0.24  0.02  0.03
1   DREAM  ACINAR  0.00  0.02  0.19  0.02  0.03
2    EDGE  ACINAR  0.07  0.25  0.45  0.20  0.33
3  GLMPCA  ACINAR  0.14  0.39  0.24  0.32  0.51
4     PCA  ACINAR  0.10  0.33  0.26  0.26  0.43

最终 Accuracy 分数：
               Method     score
0                 DRA  0.148193
1               DREAM  0.409723
2                EDGE  0.455139
3              GLMPCA  0.495079
4                 PCA  0.462134
5               PHATE  0.492252
6              PaCMAP  0.286589
7   ParametricUMAP200  0.501074
8    ParametricUMAP50  0.495374
9              SAUCIE  0.333851
10             SCDRHA  0.458110
11              SIMLR  0.534594
12               SPDR  0.509282
13          SQuaD_MDS  0.424490
14   SQuaD_MDS_hybrid  0.482759
15             SSNMDI  0.271789
16               TSNE  0.539034
17             TriMap  0.520652
18               UMAP  0.499921
19                VAE  0.415271
20             

In [36]:
'''
所有真实数据集：聚类得分
spectral
'''
fragments = ['scDesign3','TI','SIMLR','VASC','benchmarker']
all_dfs = []
cluster_method = 'spectral'
for fragment in fragments:
    if fragment == 'scDesign3':
        datasets = ['ACINAR','ATAC','BATCH','CITE','EMBYRO','MARROW','MOBSC','OVARIAN','PANCREAS','SCGEMMETH','SCGEMRNA','SCIATAC', 'VISIUM', 'IFNB','SLIDE']
    elif fragment == 'TI':
        datasets = ['DA1_horns','DC3_VA1d_horns','horns','blastocyst','epiblast','ICM','trophectoderm','combination-1','combination-2','combination-3','epidermis','muscle','neuron','pair-1','pair-2','pair-3','pair-4','parenchyme','phagocyte','pharynx']
    elif fragment == 'SIMLR':
        datasets = ['Pollen','Usoskin']
    elif fragment == 'VASC':
        datasets = ['Baron','Biase','Goolam','Xin','yan','Zeisel']
    elif fragment == 'benchmarker':
        datasets = ['Baron_mouse','Darmanis','Pancreatic','Silver','Zhengmix4eq']

    for dataset in datasets:
        datapath1 = f'/home/henu/work/result/cluster/{fragment}/{dataset}/indicators/{cluster_method}_ARI.csv'
        datapath2 = f'/home/henu/work/result/cluster/{fragment}/{dataset}/indicators/{cluster_method}_NMI.csv'
        datapath3 = f'/home/henu/work/result/cluster/{fragment}/{dataset}/indicators/{cluster_method}_SIL.csv'
        datapath4 = f'/home/henu/work/result/cluster/{fragment}/{dataset}/indicators/{cluster_method}_COMP.csv'
        datapath5 = f'/home/henu/work/result/cluster/{fragment}/{dataset}/indicators/{cluster_method}_HOMO.csv'
        df1 = pd.read_csv(datapath1)
        df2 = pd.read_csv(datapath2)
        df3 = pd.read_csv(datapath3)
        df4 = pd.read_csv(datapath4)
        df5 = pd.read_csv(datapath5)
        df_merge = reduce(lambda left, right: pd.merge(left, right, on="Method", how="outer"), [df1, df2, df3, df4, df5])

        df_merge.insert(1, "Dataset", dataset)  # 加一列标记数据集名
        all_dfs.append(df_merge)

# 合并所有数据集
df_all = pd.concat(all_dfs, ignore_index=True)

print("合并后的数据：")
print(df_all.head())

metric_cols = [c for c in df_all.columns if c not in ["Method", "Dataset"]]

# -------- Step1: 指标标准化（所有方法+数据集一起 min-max）--------
scaler = MinMaxScaler()
df_all[metric_cols] = scaler.fit_transform(df_all[metric_cols])
# print(df_all[['method']+metric_cols])

# -------- Step2: 每个 (方法×数据集) 的指标平均 --------
df_all["Accuracy_subscore"] = df_all[metric_cols].mean(axis=1)
# print(df_all[['Method','Accuracy_subscore']])

# -------- Step3: 跨所有数据集取平均，得到每个方法的最终 Accuracy --------
accuracy_scores = df_all.groupby("Method")["Accuracy_subscore"].mean()

# 重命名列名为 'score'
accuracy_scores = accuracy_scores.rename_axis('Method').reset_index()
accuracy_scores.rename(columns={"Accuracy_subscore": "score"}, inplace=True)

print("\n最终 Accuracy 分数：")
print(accuracy_scores)

# 保存结果
output_file = "/home/henu/work/result/score/spectral.csv"
accuracy_scores.to_csv(output_file, index=False)
print(f"结果已保存到文件：{output_file}")


合并后的数据：
   Method Dataset   ARI   NMI   SIL  COMP  HOMO
0     DRA  ACINAR  0.00  0.01  0.31  0.01  0.01
1   DREAM  ACINAR  0.00  0.01  0.26  0.01  0.01
2    EDGE  ACINAR  0.09  0.19  0.08  0.19  0.19
3  GLMPCA  ACINAR  0.30  0.46  0.31  0.46  0.47
4     PCA  ACINAR  0.17  0.32  0.30  0.32  0.33

最终 Accuracy 分数：
               Method     score
0                 DRA  0.163091
1               DREAM  0.468047
2                EDGE  0.482877
3              GLMPCA  0.596221
4                 PCA  0.523026
5               PHATE  0.577397
6              PaCMAP  0.298294
7   ParametricUMAP200  0.584175
8    ParametricUMAP50  0.583729
9              SAUCIE  0.365688
10             SCDRHA  0.492569
11              SIMLR  0.576569
12               SPDR  0.590062
13          SQuaD_MDS  0.464530
14   SQuaD_MDS_hybrid  0.546764
15             SSNMDI  0.242744
16               TSNE  0.627409
17             TriMap  0.607036
18               UMAP  0.582215
19                VAE  0.473684
20             

### 时间与内存得分

In [None]:
'''
时间与内存得分的计算是按数据集进行归一化的，不是整合所有数据集之后，整体按列进行归一化的；
单个数据集归一化，计算方法在这个数据集上的得分，取所有数据集上得分的均值作为该方法最终得分；1-x，得分符合折线图；
'''

In [60]:
'''
时间
'''
# 所有数据集的列表
datasets = ['cell_100','cell_500','cell_1000','cell_2000','cell_5000','cell_10000','cell_20000','cell_30000','cell_50000','cell_73233']
all_dfs = []

# 读取所有数据集
for dataset in datasets:
    datapath = f'/home/henu/work/result/efficiency/{dataset}.csv'
    df = pd.read_csv(datapath)

    df.insert(1, "Dataset", dataset)  # 加一列标记数据集名
    all_dfs.append(df)

# 合并所有数据集
df_all = pd.concat(all_dfs, ignore_index=True)

print("合并后的数据：")
print(df_all.head())

# 提取指标列（除了 Method 和 Dataset）
metric_cols = [c for c in df_all.columns if c not in ["Method", "Dataset", 'PeakMemory(gb)']]

# -------- Step 1: 按每个数据集标准化（Min-Max 归一化） --------
scaler = MinMaxScaler()

# 用于存储标准化后的数据
df_all_normalized = df_all.copy()

for dataset in datasets:
    # 按每个数据集进行标准化
    df_dataset = df_all[df_all["Dataset"] == dataset]

    # 对指标列进行标准化
    df_dataset[metric_cols] = scaler.fit_transform(df_dataset[metric_cols])

    # 将标准化后的数据合并回原始数据
    df_all_normalized.update(df_dataset)

# -------- Step 2: 每个 (方法×数据集) 的指标平均 --------
df_all_normalized["Accuracy_subscore"] = df_all_normalized[metric_cols].mean(axis=1)

# -------- Step 3: 跨所有数据集取平均，得到每个方法的最终 Accuracy --------
accuracy_scores = df_all_normalized.groupby("Method")["Accuracy_subscore"].mean()

# 重命名列名为 'score'
accuracy_scores = accuracy_scores.rename_axis('Method').reset_index()
accuracy_scores.rename(columns={"Accuracy_subscore": "score"}, inplace=True)

# 计算得分时取反（高分表示低成本）
accuracy_scores['score'] = 1 - accuracy_scores['score']

print("\n最终 Accuracy 分数：")
print(accuracy_scores)

# 保存结果
output_file = "/home/henu/work/result/score/time.csv"
accuracy_scores.to_csv(output_file, index=False)
print(f"结果已保存到文件：{output_file}")

合并后的数据：
      Method   Dataset  PeakMemory(gb)  Time(s)
0     SAUCIE  cell_100           0.289   1.2138
1  SQuaD-MDS  cell_100           0.260   2.6305
2       UMAP  cell_100           0.722   7.3263
3     SCDRHA  cell_100           0.626  13.4948
4      scGAE  cell_100           0.514   3.3701

最终 Accuracy 分数：
       Method     score
0         DRA  0.826749
1       DREAM  0.893422
2        EDGE  0.864895
3      GLMPCA  0.984293
4         PCA  0.999858
5       PHATE  0.993161
6      PaCMAP  0.996160
7      SAUCIE  0.999238
8      SCDRHA  0.904575
9       SIMLR  0.662293
10       SPDR  0.810968
11  SQuaD-MDS  0.980002
12     SSNMDI  0.721061
13     TriMap  0.998207
14       UMAP  0.993611
15        VAE  0.947520
16       VASC  0.406081
17       ZIFA  0.765998
18       ivis  0.993506
19       pCMF  0.502604
20      scGAE  0.917079
21      scGBM  0.988004
22    scScope  0.952114
23      scvis  0.361316
24      t-SNE  0.992975
25     tGPLVM  0.910891
结果已保存到文件：/home/henu/work/result/score/t

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_dataset[metric_cols] = scaler.fit_transform(df_dataset[metric_cols])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_dataset[metric_cols] = scaler.fit_transform(df_dataset[metric_cols])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_dataset[metric_cols] = scaler.fit_transform(df_dataset[met

In [58]:
'''
内存
'''
# 所有数据集的列表
datasets = ['cell_100','cell_500','cell_1000','cell_2000','cell_5000','cell_10000','cell_20000','cell_30000','cell_50000','cell_73233']
all_dfs = []

# 读取所有数据集
for dataset in datasets:
    datapath = f'/home/henu/work/result/efficiency/{dataset}.csv'
    df = pd.read_csv(datapath)

    df.insert(1, "Dataset", dataset)  # 加一列标记数据集名
    all_dfs.append(df)

# 合并所有数据集
df_all = pd.concat(all_dfs, ignore_index=True)

print("合并后的数据：")
print(df_all.head())

# 提取指标列（除了 Method 和 Dataset）
metric_cols = [c for c in df_all.columns if c not in ["Method", "Dataset", 'Time(s)']]

# -------- Step 1: 按每个数据集标准化（Min-Max 归一化） --------
scaler = MinMaxScaler()

# 用于存储标准化后的数据
df_all_normalized = df_all.copy()

for dataset in datasets:
    # 按每个数据集进行标准化
    df_dataset = df_all[df_all["Dataset"] == dataset]

    # 对指标列进行标准化
    df_dataset[metric_cols] = scaler.fit_transform(df_dataset[metric_cols])

    # 将标准化后的数据合并回原始数据
    df_all_normalized.update(df_dataset)

# -------- Step 2: 每个 (方法×数据集) 的指标平均 --------
df_all_normalized["Accuracy_subscore"] = df_all_normalized[metric_cols].mean(axis=1)

# -------- Step 3: 跨所有数据集取平均，得到每个方法的最终 Accuracy --------
accuracy_scores = df_all_normalized.groupby("Method")["Accuracy_subscore"].mean()

# 重命名列名为 'score'
accuracy_scores = accuracy_scores.rename_axis('Method').reset_index()
accuracy_scores.rename(columns={"Accuracy_subscore": "score"}, inplace=True)

# 计算得分时取反（高分表示低成本）
accuracy_scores['score'] = 1 - accuracy_scores['score']

print("\n最终 Accuracy 分数：")
print(accuracy_scores)

# 保存结果
output_file = "/home/henu/work/result/score/memory.csv"
accuracy_scores.to_csv(output_file, index=False)
print(f"结果已保存到文件：{output_file}")

合并后的数据：
      Method   Dataset  PeakMemory(gb)  Time(s)
0     SAUCIE  cell_100           0.289   1.2138
1  SQuaD-MDS  cell_100           0.260   2.6305
2       UMAP  cell_100           0.722   7.3263
3     SCDRHA  cell_100           0.626  13.4948
4      scGAE  cell_100           0.514   3.3701

最终 Accuracy 分数：
       Method     score
0         DRA  0.959883
1       DREAM  0.794299
2        EDGE  0.398328
3      GLMPCA  0.974607
4         PCA  0.991381
5       PHATE  0.980804
6      PaCMAP  0.977187
7      SAUCIE  0.975789
8      SCDRHA  0.506289
9       SIMLR  0.367526
10       SPDR  0.938520
11  SQuaD-MDS  0.976456
12     SSNMDI  0.740276
13     TriMap  0.976543
14       UMAP  0.906186
15        VAE  0.927733
16       VASC  0.955671
17       ZIFA  0.933574
18       ivis  0.929538
19       pCMF  0.973060
20      scGAE  0.666807
21      scGBM  0.954582
22    scScope  0.913354
23      scvis  0.973818
24      t-SNE  0.991186
25     tGPLVM  0.936219
结果已保存到文件：/home/henu/work/result/score/m

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_dataset[metric_cols] = scaler.fit_transform(df_dataset[metric_cols])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_dataset[metric_cols] = scaler.fit_transform(df_dataset[metric_cols])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_dataset[metric_cols] = scaler.fit_transform(df_dataset[met

### 稳定性得分

In [39]:
'''
细胞数量稳定性得分：相关合成数据集的局部/全局/聚类指标的综合
'''

all_dfs = []
datasets = ['cell_100','cell_500','cell_1k','cell_5k','cell_1w','cell_2w','cell_3w']
for dataset in datasets:
    # 局部与全局
    datapath1 = f'/home/henu/work/result/metric/simulate/{dataset}/dr1.csv'
    datapath2 = f'/home/henu/work/result/metric/simulate/{dataset}/dr2.csv'
    # 聚类
    datapath3 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_ARI.csv'
    datapath4 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_NMI.csv'
    datapath5 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_SIL.csv'
    datapath6 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_COMP.csv'
    datapath7 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_HOMO.csv'

    df1 = pd.read_csv(datapath1)
    df2 = pd.read_csv(datapath2)
    df3 = pd.read_csv(datapath3)
    df4 = pd.read_csv(datapath4)
    df5 = pd.read_csv(datapath5)
    df6 = pd.read_csv(datapath6)
    df7 = pd.read_csv(datapath7)
    df_merge = reduce(lambda left, right: pd.merge(left, right, on="Method", how="outer"), [df1, df2, df3, df4, df5, df6, df7])

    df_merge.insert(1, "Dataset", dataset)  # 加一列标记数据集名
    all_dfs.append(df_merge)

# 合并所有数据集
df_all = pd.concat(all_dfs, ignore_index=True)

print("合并后的数据：")
print(df_all.head())

# 提取指标列（除了 Method 和 Dataset）
metric_cols = [c for c in df_all.columns if c not in ["Method", "Dataset",'AUC','Qlocal','Qglobal','kmax']]
print(metric_cols)

# -------- Step1: 指标标准化（所有方法+数据集一起 min-max）--------
scaler = MinMaxScaler()
df_all[metric_cols] = scaler.fit_transform(df_all[metric_cols])
# print(df_all[['Method']+metric_cols])

# -------- Step2: 每个 (方法×数据集) 的指标平均 --------
df_all["Accuracy_subscore"] = df_all[metric_cols].mean(axis=1)
# print(df_all[['Method','Accuracy_subscore']])

# -------- Step3: 跨所有数据集取平均，得到每个方法的最终 Accuracy --------
accuracy_scores = df_all.groupby("Method")["Accuracy_subscore"].mean()

# 重命名列名为 'score'
accuracy_scores = accuracy_scores.rename_axis('Method').reset_index()
accuracy_scores.rename(columns={"Accuracy_subscore": "score"}, inplace=True)

print("\n最终 Accuracy 分数：")
print(accuracy_scores)

# 保存结果
output_file = "/home/henu/work/result/score/cell_number.csv"
accuracy_scores.to_csv(output_file, index=False)
print(f"结果已保存到文件：{output_file}")

合并后的数据：
   Method   Dataset  knn_10  knn_20  knn_30   svm  nkr_10  nkr_20  nkr_30  \
0     DRA  cell_100    0.19    0.28    0.26  0.24   0.082   0.187   0.279   
1   DREAM  cell_100    0.39    0.39    0.43  0.36   0.138   0.297   0.435   
2    EDGE  cell_100    0.42    0.46    0.47  0.46   0.128   0.242   0.342   
3  GLMPCA  cell_100    0.72    0.74    0.73  0.74   0.148   0.328   0.447   
4     PCA  cell_100    0.83    0.89    0.87  0.88   0.150   0.312   0.403   

   aji_10  ...   T_30   C_10   C_20   C_30  kmax   ARI   NMI   SIL  COMP  HOMO  
0   0.045  ...  0.532  0.523  0.506  0.504     2  0.01  0.06  0.38  0.06  0.05  
1   0.083  ...  0.709  0.721  0.731  0.750    39  0.06  0.14  0.38  0.16  0.12  
2   0.070  ...  0.606  0.575  0.594  0.601    53  0.05  0.14  0.32  0.14  0.14  
3   0.085  ...  0.705  0.722  0.748  0.762    33  0.45  0.55  0.38  0.56  0.55  
4   0.089  ...  0.662  0.690  0.717  0.734    44  0.81  0.89  0.61  0.91  0.87  

[5 rows x 31 columns]
['knn_10', 'knn_20',

In [40]:
'''
基因数量稳定性得分：相关合成数据集的局部/全局/聚类指标的综合
'''

all_dfs = []
datasets = ['gene_5k','gene_2w','gene_3w','gene_4w','gene_5w']
for dataset in datasets:
    # 局部与全局
    datapath1 = f'/home/henu/work/result/metric/simulate/{dataset}/dr1.csv'
    datapath2 = f'/home/henu/work/result/metric/simulate/{dataset}/dr2.csv'
    # 聚类
    datapath3 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_ARI.csv'
    datapath4 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_NMI.csv'
    datapath5 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_SIL.csv'
    datapath6 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_COMP.csv'
    datapath7 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_HOMO.csv'

    df1 = pd.read_csv(datapath1)
    df2 = pd.read_csv(datapath2)
    df3 = pd.read_csv(datapath3)
    df4 = pd.read_csv(datapath4)
    df5 = pd.read_csv(datapath5)
    df6 = pd.read_csv(datapath6)
    df7 = pd.read_csv(datapath7)
    df_merge = reduce(lambda left, right: pd.merge(left, right, on="Method", how="outer"), [df1, df2, df3, df4, df5, df6, df7])

    df_merge.insert(1, "Dataset", dataset)  # 加一列标记数据集名
    all_dfs.append(df_merge)

# 合并所有数据集
df_all = pd.concat(all_dfs, ignore_index=True)

print("合并后的数据：")
print(df_all.head())

# 提取指标列（除了 Method 和 Dataset）
metric_cols = [c for c in df_all.columns if c not in ["Method", "Dataset",'AUC','Qlocal','Qglobal','kmax']]
print(metric_cols)

# -------- Step1: 指标标准化（所有方法+数据集一起 min-max）--------
scaler = MinMaxScaler()
df_all[metric_cols] = scaler.fit_transform(df_all[metric_cols])
# print(df_all[['Method']+metric_cols])

# -------- Step2: 每个 (方法×数据集) 的指标平均 --------
df_all["Accuracy_subscore"] = df_all[metric_cols].mean(axis=1)
# print(df_all[['Method','Accuracy_subscore']])

# -------- Step3: 跨所有数据集取平均，得到每个方法的最终 Accuracy --------
accuracy_scores = df_all.groupby("Method")["Accuracy_subscore"].mean()

# 重命名列名为 'score'
accuracy_scores = accuracy_scores.rename_axis('Method').reset_index()
accuracy_scores.rename(columns={"Accuracy_subscore": "score"}, inplace=True)

print("\n最终 Accuracy 分数：")
print(accuracy_scores)

# 保存结果
output_file = "/home/henu/work/result/score/gene_number.csv"
accuracy_scores.to_csv(output_file, index=False)
print(f"结果已保存到文件：{output_file}")

合并后的数据：
   Method  Dataset  knn_10  knn_20  knn_30    svm  nkr_10  nkr_20  nkr_30  \
0     DRA  gene_5k   0.230   0.248   0.270  0.301   0.004   0.010   0.015   
1   DREAM  gene_5k   0.266   0.276   0.281  0.308   0.005   0.011   0.016   
2    EDGE  gene_5k   0.932   0.934   0.934  0.932   0.016   0.031   0.045   
3  GLMPCA  gene_5k   0.974   0.973   0.973  0.974   0.016   0.031   0.045   
4     PCA  gene_5k   0.927   0.928   0.928  0.935   0.016   0.029   0.042   

   aji_10  ...   T_30   C_10   C_20   C_30  kmax   ARI   NMI   SIL  COMP  HOMO  
0   0.002  ...  0.504  0.501  0.510  0.514   360 -0.00  0.00  0.32  0.00  0.00  
1   0.003  ...  0.506  0.566  0.555  0.549  1276  0.00  0.01  0.32  0.01  0.01  
2   0.009  ...  0.618  0.766  0.746  0.730   385  0.78  0.75  0.41  0.74  0.76  
3   0.008  ...  0.639  0.784  0.766  0.754   398  0.95  0.92  0.57  0.91  0.92  
4   0.008  ...  0.627  0.774  0.755  0.742   394  0.93  0.91  0.71  0.91  0.91  

[5 rows x 31 columns]
['knn_10', 'knn_20',

In [41]:
'''
类型数量稳定性得分：相关合成数据集的局部/全局/聚类指标的综合
'''

all_dfs = []
datasets = ['celltype_7','celltype_9','celltype_11','celltype_13','celltype_15']
for dataset in datasets:
    # 局部与全局
    datapath1 = f'/home/henu/work/result/metric/simulate/{dataset}/dr1.csv'
    datapath2 = f'/home/henu/work/result/metric/simulate/{dataset}/dr2.csv'
    # 聚类
    datapath3 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_ARI.csv'
    datapath4 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_NMI.csv'
    datapath5 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_SIL.csv'
    datapath6 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_COMP.csv'
    datapath7 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_HOMO.csv'

    df1 = pd.read_csv(datapath1)
    df2 = pd.read_csv(datapath2)
    df3 = pd.read_csv(datapath3)
    df4 = pd.read_csv(datapath4)
    df5 = pd.read_csv(datapath5)
    df6 = pd.read_csv(datapath6)
    df7 = pd.read_csv(datapath7)
    df_merge = reduce(lambda left, right: pd.merge(left, right, on="Method", how="outer"), [df1, df2, df3, df4, df5, df6, df7])

    df_merge.insert(1, "Dataset", dataset)  # 加一列标记数据集名
    all_dfs.append(df_merge)

# 合并所有数据集
df_all = pd.concat(all_dfs, ignore_index=True)

print("合并后的数据：")
print(df_all.head())

# 提取指标列（除了 Method 和 Dataset）
metric_cols = [c for c in df_all.columns if c not in ["Method", "Dataset",'AUC','Qlocal','Qglobal','kmax']]
print(metric_cols)

# -------- Step1: 指标标准化（所有方法+数据集一起 min-max）--------
scaler = MinMaxScaler()
df_all[metric_cols] = scaler.fit_transform(df_all[metric_cols])
# print(df_all[['Method']+metric_cols])

# -------- Step2: 每个 (方法×数据集) 的指标平均 --------
df_all["Accuracy_subscore"] = df_all[metric_cols].mean(axis=1)
# print(df_all[['Method','Accuracy_subscore']])

# -------- Step3: 跨所有数据集取平均，得到每个方法的最终 Accuracy --------
accuracy_scores = df_all.groupby("Method")["Accuracy_subscore"].mean()

# 重命名列名为 'score'
accuracy_scores = accuracy_scores.rename_axis('Method').reset_index()
accuracy_scores.rename(columns={"Accuracy_subscore": "score"}, inplace=True)

print("\n最终 Accuracy 分数：")
print(accuracy_scores)

# 保存结果
output_file = "/home/henu/work/result/score/celltype_number.csv"
accuracy_scores.to_csv(output_file, index=False)
print(f"结果已保存到文件：{output_file}")

合并后的数据：
   Method     Dataset  knn_10  knn_20  knn_30    svm  nkr_10  nkr_20  nkr_30  \
0     DRA  celltype_7   0.158   0.159   0.150  0.128   0.005   0.011   0.016   
1   DREAM  celltype_7   0.136   0.143   0.150  0.176   0.005   0.010   0.015   
2    EDGE  celltype_7   0.731   0.731   0.730  0.736   0.016   0.029   0.041   
3  GLMPCA  celltype_7   0.713   0.727   0.728  0.742   0.017   0.030   0.041   
4     PCA  celltype_7   0.828   0.834   0.837  0.841   0.018   0.033   0.045   

   aji_10  ...   T_30   C_10   C_20   C_30  kmax   ARI   NMI   SIL  COMP  HOMO  
0   0.003  ...  0.504  0.537  0.530  0.527   422 -0.00  0.00  0.33  0.00  0.00  
1   0.002  ...  0.505  0.478  0.489  0.496   659  0.00  0.01  0.32  0.01  0.01  
2   0.009  ...  0.593  0.706  0.679  0.664   285  0.53  0.53  0.39  0.53  0.53  
3   0.009  ...  0.606  0.740  0.716  0.702   316  0.55  0.65  0.39  0.65  0.65  
4   0.009  ...  0.620  0.701  0.677  0.665   285  0.73  0.78  0.55  0.78  0.78  

[5 rows x 31 columns]
['

In [42]:
'''
批次数量稳定性得分：相关合成数据集的局部/全局/聚类指标的综合
'''

all_dfs = []
datasets = ['batch_2','batch_4','batch_6','batch_8','batch_10']
for dataset in datasets:
    # 局部与全局
    datapath1 = f'/home/henu/work/result/metric/simulate/{dataset}/dr1.csv'
    datapath2 = f'/home/henu/work/result/metric/simulate/{dataset}/dr2.csv'
    # 聚类
    datapath3 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_ARI.csv'
    datapath4 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_NMI.csv'
    datapath5 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_SIL.csv'
    datapath6 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_COMP.csv'
    datapath7 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_HOMO.csv'

    df1 = pd.read_csv(datapath1)
    df2 = pd.read_csv(datapath2)
    df3 = pd.read_csv(datapath3)
    df4 = pd.read_csv(datapath4)
    df5 = pd.read_csv(datapath5)
    df6 = pd.read_csv(datapath6)
    df7 = pd.read_csv(datapath7)
    df_merge = reduce(lambda left, right: pd.merge(left, right, on="Method", how="outer"), [df1, df2, df3, df4, df5, df6, df7])

    df_merge.insert(1, "Dataset", dataset)  # 加一列标记数据集名
    all_dfs.append(df_merge)

# 合并所有数据集
df_all = pd.concat(all_dfs, ignore_index=True)

print("合并后的数据：")
print(df_all.head())

# 提取指标列（除了 Method 和 Dataset）
metric_cols = [c for c in df_all.columns if c not in ["Method", "Dataset",'AUC','Qlocal','Qglobal','kmax']]
print(metric_cols)

# -------- Step1: 指标标准化（所有方法+数据集一起 min-max）--------
scaler = MinMaxScaler()
df_all[metric_cols] = scaler.fit_transform(df_all[metric_cols])
# print(df_all[['Method']+metric_cols])

# -------- Step2: 每个 (方法×数据集) 的指标平均 --------
df_all["Accuracy_subscore"] = df_all[metric_cols].mean(axis=1)
# print(df_all[['Method','Accuracy_subscore']])

# -------- Step3: 跨所有数据集取平均，得到每个方法的最终 Accuracy --------
accuracy_scores = df_all.groupby("Method")["Accuracy_subscore"].mean()

# 重命名列名为 'score'
accuracy_scores = accuracy_scores.rename_axis('Method').reset_index()
accuracy_scores.rename(columns={"Accuracy_subscore": "score"}, inplace=True)

print("\n最终 Accuracy 分数：")
print(accuracy_scores)

# 保存结果
output_file = "/home/henu/work/result/score/batch_number.csv"
accuracy_scores.to_csv(output_file, index=False)
print(f"结果已保存到文件：{output_file}")

合并后的数据：
   Method  Dataset  knn_10  knn_20  knn_30    svm  nkr_10  nkr_20  nkr_30  \
0     DRA  batch_2   0.256   0.252   0.256  0.290   0.005   0.010   0.015   
1   DREAM  batch_2   0.459   0.490   0.485  0.508   0.008   0.015   0.023   
2    EDGE  batch_2   0.965   0.967   0.967  0.964   0.025   0.042   0.060   
3  GLMPCA  batch_2   0.739   0.756   0.760  0.765   0.020   0.036   0.050   
4     PCA  batch_2   0.760   0.776   0.773  0.769   0.020   0.035   0.049   

   aji_10  ...   T_30   C_10   C_20   C_30  kmax   ARI   NMI   SIL  COMP  HOMO  
0   0.003  ...  0.505  0.516  0.510  0.508  1502 -0.00  0.00  0.33  0.00  0.00  
1   0.004  ...  0.539  0.567  0.572  0.574   524  0.11  0.15  0.31  0.15  0.15  
2   0.013  ...  0.677  0.783  0.763  0.751   385  0.85  0.81  0.49  0.80  0.82  
3   0.010  ...  0.653  0.763  0.754  0.749   399  0.48  0.58  0.62  0.58  0.57  
4   0.010  ...  0.655  0.769  0.759  0.753   386  0.51  0.63  0.77  0.63  0.63  

[5 rows x 31 columns]
['knn_10', 'knn_20',

In [43]:
'''
批次强度稳定性得分：相关合成数据集的局部/全局/聚类指标的综合
'''

all_dfs = []
datasets = ['batch_0.2','batch_0.4','batch_0.6','batch_0.8','batch_1.0']
for dataset in datasets:
    # 局部与全局
    datapath1 = f'/home/henu/work/result/metric/simulate/{dataset}/dr1.csv'
    datapath2 = f'/home/henu/work/result/metric/simulate/{dataset}/dr2.csv'
    # 聚类
    datapath3 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_ARI.csv'
    datapath4 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_NMI.csv'
    datapath5 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_SIL.csv'
    datapath6 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_COMP.csv'
    datapath7 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_HOMO.csv'

    df1 = pd.read_csv(datapath1)
    df2 = pd.read_csv(datapath2)
    df3 = pd.read_csv(datapath3)
    df4 = pd.read_csv(datapath4)
    df5 = pd.read_csv(datapath5)
    df6 = pd.read_csv(datapath6)
    df7 = pd.read_csv(datapath7)
    df_merge = reduce(lambda left, right: pd.merge(left, right, on="Method", how="outer"), [df1, df2, df3, df4, df5, df6, df7])

    df_merge.insert(1, "Dataset", dataset)  # 加一列标记数据集名
    all_dfs.append(df_merge)

# 合并所有数据集
df_all = pd.concat(all_dfs, ignore_index=True)

print("合并后的数据：")
print(df_all.head())

# 提取指标列（除了 Method 和 Dataset）
metric_cols = [c for c in df_all.columns if c not in ["Method", "Dataset",'AUC','Qlocal','Qglobal','kmax']]
print(metric_cols)

# -------- Step1: 指标标准化（所有方法+数据集一起 min-max）--------
scaler = MinMaxScaler()
df_all[metric_cols] = scaler.fit_transform(df_all[metric_cols])
# print(df_all[['Method']+metric_cols])

# -------- Step2: 每个 (方法×数据集) 的指标平均 --------
df_all["Accuracy_subscore"] = df_all[metric_cols].mean(axis=1)
# print(df_all[['Method','Accuracy_subscore']])

# -------- Step3: 跨所有数据集取平均，得到每个方法的最终 Accuracy --------
accuracy_scores = df_all.groupby("Method")["Accuracy_subscore"].mean()

# 重命名列名为 'score'
accuracy_scores = accuracy_scores.rename_axis('Method').reset_index()
accuracy_scores.rename(columns={"Accuracy_subscore": "score"}, inplace=True)

print("\n最终 Accuracy 分数：")
print(accuracy_scores)

# 保存结果
output_file = "/home/henu/work/result/score/batch_strength.csv"
accuracy_scores.to_csv(output_file, index=False)
print(f"结果已保存到文件：{output_file}")

合并后的数据：
   Method    Dataset  knn_10  knn_20  knn_30    svm  nkr_10  nkr_20  nkr_30  \
0     DRA  batch_0.2   0.244   0.270   0.282  0.298   0.005   0.010   0.015   
1   DREAM  batch_0.2   0.306   0.348   0.369  0.376   0.011   0.022   0.033   
2    EDGE  batch_0.2   0.901   0.901   0.900  0.847   0.033   0.058   0.080   
3  GLMPCA  batch_0.2   0.735   0.742   0.746  0.751   0.024   0.047   0.068   
4     PCA  batch_0.2   0.751   0.755   0.764  0.773   0.026   0.048   0.068   

   aji_10  ...   T_30   C_10   C_20   C_30  kmax   ARI   NMI   SIL  COMP  HOMO  
0   0.003  ...  0.505  0.501  0.505  0.506  1669  0.00  0.00  0.32  0.00  0.00  
1   0.005  ...  0.738  0.758  0.757  0.757   870  0.02  0.03  0.35  0.03  0.03  
2   0.017  ...  0.809  0.875  0.861  0.854   951  0.29  0.39  0.45  0.38  0.39  
3   0.013  ...  0.800  0.875  0.867  0.861   920  0.33  0.51  0.64  0.51  0.51  
4   0.013  ...  0.799  0.877  0.868  0.862   934  0.34  0.55  0.73  0.55  0.55  

[5 rows x 31 columns]
['knn_10

In [44]:
'''
dropout稳定性得分：相关合成数据集的局部/全局/聚类指标的综合
'''

all_dfs = []
datasets = ['dropout_-1','dropout_0','dropout_1','dropout_2','dropout_3']
for dataset in datasets:
    # 局部与全局
    datapath1 = f'/home/henu/work/result/metric/simulate/{dataset}/dr1.csv'
    datapath2 = f'/home/henu/work/result/metric/simulate/{dataset}/dr2.csv'
    # 聚类
    datapath3 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_ARI.csv'
    datapath4 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_NMI.csv'
    datapath5 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_SIL.csv'
    datapath6 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_COMP.csv'
    datapath7 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_HOMO.csv'

    df1 = pd.read_csv(datapath1)
    df2 = pd.read_csv(datapath2)
    df3 = pd.read_csv(datapath3)
    df4 = pd.read_csv(datapath4)
    df5 = pd.read_csv(datapath5)
    df6 = pd.read_csv(datapath6)
    df7 = pd.read_csv(datapath7)
    df_merge = reduce(lambda left, right: pd.merge(left, right, on="Method", how="outer"), [df1, df2, df3, df4, df5, df6, df7])

    df_merge.insert(1, "Dataset", dataset)  # 加一列标记数据集名
    all_dfs.append(df_merge)

# 合并所有数据集
df_all = pd.concat(all_dfs, ignore_index=True)

print("合并后的数据：")
print(df_all.head())

# 提取指标列（除了 Method 和 Dataset）
metric_cols = [c for c in df_all.columns if c not in ["Method", "Dataset",'AUC','Qlocal','Qglobal','kmax']]
print(metric_cols)

# -------- Step1: 指标标准化（所有方法+数据集一起 min-max）--------
scaler = MinMaxScaler()
df_all[metric_cols] = scaler.fit_transform(df_all[metric_cols])
# print(df_all[['Method']+metric_cols])

# -------- Step2: 每个 (方法×数据集) 的指标平均 --------
df_all["Accuracy_subscore"] = df_all[metric_cols].mean(axis=1)
# print(df_all[['Method','Accuracy_subscore']])

# -------- Step3: 跨所有数据集取平均，得到每个方法的最终 Accuracy --------
accuracy_scores = df_all.groupby("Method")["Accuracy_subscore"].mean()

# 重命名列名为 'score'
accuracy_scores = accuracy_scores.rename_axis('Method').reset_index()
accuracy_scores.rename(columns={"Accuracy_subscore": "score"}, inplace=True)

print("\n最终 Accuracy 分数：")
print(accuracy_scores)

# 保存结果
output_file = "/home/henu/work/result/score/dropout.csv"
accuracy_scores.to_csv(output_file, index=False)
print(f"结果已保存到文件：{output_file}")

合并后的数据：
   Method     Dataset  knn_10  knn_20  knn_30    svm  nkr_10  nkr_20  nkr_30  \
0     DRA  dropout_-1   0.232   0.256   0.256  0.290   0.005   0.010   0.015   
1   DREAM  dropout_-1   0.332   0.370   0.376  0.398   0.006   0.012   0.018   
2    EDGE  dropout_-1   0.956   0.959   0.960  0.960   0.020   0.036   0.051   
3  GLMPCA  dropout_-1   0.917   0.924   0.929  0.925   0.016   0.031   0.045   
4     PCA  dropout_-1   0.957   0.958   0.959  0.961   0.018   0.033   0.048   

   aji_10  ...   T_30   C_10   C_20   C_30  kmax   ARI   NMI   SIL  COMP  HOMO  
0   0.003  ...  0.505  0.529  0.521  0.518  1626  0.00  0.00  0.33  0.00  0.00  
1   0.003  ...  0.513  0.534  0.539  0.542   768  0.03  0.04  0.31  0.04  0.04  
2   0.010  ...  0.643  0.785  0.767  0.754   349  0.87  0.83  0.46  0.82  0.84  
3   0.008  ...  0.638  0.806  0.788  0.777   415  0.76  0.83  0.47  0.82  0.84  
4   0.010  ...  0.643  0.790  0.771  0.758   398  0.95  0.93  0.67  0.93  0.93  

[5 rows x 31 columns]
['

In [45]:
'''
DE比例稳定性得分：相关合成数据集的局部/全局/聚类指标的综合
'''

all_dfs = []
datasets = ['de_prob_0.05','de_prob_0.15','de_prob_0.2','de_prob_0.25','de_prob_0.3']
for dataset in datasets:
    # 局部与全局
    datapath1 = f'/home/henu/work/result/metric/simulate/{dataset}/dr1.csv'
    datapath2 = f'/home/henu/work/result/metric/simulate/{dataset}/dr2.csv'
    # 聚类
    datapath3 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_ARI.csv'
    datapath4 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_NMI.csv'
    datapath5 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_SIL.csv'
    datapath6 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_COMP.csv'
    datapath7 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_HOMO.csv'

    df1 = pd.read_csv(datapath1)
    df2 = pd.read_csv(datapath2)
    df3 = pd.read_csv(datapath3)
    df4 = pd.read_csv(datapath4)
    df5 = pd.read_csv(datapath5)
    df6 = pd.read_csv(datapath6)
    df7 = pd.read_csv(datapath7)
    df_merge = reduce(lambda left, right: pd.merge(left, right, on="Method", how="outer"), [df1, df2, df3, df4, df5, df6, df7])

    df_merge.insert(1, "Dataset", dataset)  # 加一列标记数据集名
    all_dfs.append(df_merge)

# 合并所有数据集
df_all = pd.concat(all_dfs, ignore_index=True)

print("合并后的数据：")
print(df_all.head())

# 提取指标列（除了 Method 和 Dataset）
metric_cols = [c for c in df_all.columns if c not in ["Method", "Dataset",'AUC','Qlocal','Qglobal','kmax']]
print(metric_cols)

# -------- Step1: 指标标准化（所有方法+数据集一起 min-max）--------
scaler = MinMaxScaler()
df_all[metric_cols] = scaler.fit_transform(df_all[metric_cols])
# print(df_all[['Method']+metric_cols])

# -------- Step2: 每个 (方法×数据集) 的指标平均 --------
df_all["Accuracy_subscore"] = df_all[metric_cols].mean(axis=1)
# print(df_all[['Method','Accuracy_subscore']])

# -------- Step3: 跨所有数据集取平均，得到每个方法的最终 Accuracy --------
accuracy_scores = df_all.groupby("Method")["Accuracy_subscore"].mean()

# 重命名列名为 'score'
accuracy_scores = accuracy_scores.rename_axis('Method').reset_index()
accuracy_scores.rename(columns={"Accuracy_subscore": "score"}, inplace=True)

print("\n最终 Accuracy 分数：")
print(accuracy_scores)

# 保存结果
output_file = "/home/henu/work/result/score/de_prob.csv"
accuracy_scores.to_csv(output_file, index=False)
print(f"结果已保存到文件：{output_file}")

合并后的数据：
   Method       Dataset  knn_10  knn_20  knn_30    svm  nkr_10  nkr_20  \
0     DRA  de_prob_0.05   0.241   0.260   0.264  0.288   0.005   0.010   
1   DREAM  de_prob_0.05   0.256   0.285   0.289  0.286   0.006   0.011   
2    EDGE  de_prob_0.05   0.489   0.508   0.517  0.524   0.008   0.014   
3  GLMPCA  de_prob_0.05   0.787   0.799   0.812  0.812   0.009   0.018   
4     PCA  de_prob_0.05   0.950   0.952   0.951  0.947   0.010   0.021   

   nkr_30  aji_10  ...   T_30   C_10   C_20   C_30  kmax   ARI   NMI   SIL  \
0   0.016   0.002  ...  0.504  0.522  0.512  0.510   541 -0.00  0.00  0.32   
1   0.015   0.003  ...  0.506  0.519  0.519  0.518   980  0.00  0.00  0.30   
2   0.021   0.004  ...  0.524  0.594  0.581  0.572   466  0.12  0.13  0.34   
3   0.026   0.005  ...  0.563  0.665  0.665  0.660   580  0.70  0.69  0.42   
4   0.030   0.005  ...  0.577  0.680  0.664  0.650   412  0.94  0.91  0.64   

   COMP  HOMO  
0  0.00  0.00  
1  0.00  0.00  
2  0.13  0.14  
3  0.68  0.70 

In [46]:
'''
DE强度稳定性得分：相关合成数据集的局部/全局/聚类指标的综合
'''

all_dfs = []
datasets = ['de_0.2','de_0.4','de_0.6','de_0.8','de_1.0']
for dataset in datasets:
    # 局部与全局
    datapath1 = f'/home/henu/work/result/metric/simulate/{dataset}/dr1.csv'
    datapath2 = f'/home/henu/work/result/metric/simulate/{dataset}/dr2.csv'
    # 聚类
    datapath3 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_ARI.csv'
    datapath4 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_NMI.csv'
    datapath5 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_SIL.csv'
    datapath6 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_COMP.csv'
    datapath7 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_HOMO.csv'

    df1 = pd.read_csv(datapath1)
    df2 = pd.read_csv(datapath2)
    df3 = pd.read_csv(datapath3)
    df4 = pd.read_csv(datapath4)
    df5 = pd.read_csv(datapath5)
    df6 = pd.read_csv(datapath6)
    df7 = pd.read_csv(datapath7)
    df_merge = reduce(lambda left, right: pd.merge(left, right, on="Method", how="outer"), [df1, df2, df3, df4, df5, df6, df7])

    df_merge.insert(1, "Dataset", dataset)  # 加一列标记数据集名
    all_dfs.append(df_merge)

# 合并所有数据集
df_all = pd.concat(all_dfs, ignore_index=True)

print("合并后的数据：")
print(df_all.head())

# 提取指标列（除了 Method 和 Dataset）
metric_cols = [c for c in df_all.columns if c not in ["Method", "Dataset",'AUC','Qlocal','Qglobal','kmax']]
print(metric_cols)

# -------- Step1: 指标标准化（所有方法+数据集一起 min-max）--------
scaler = MinMaxScaler()
df_all[metric_cols] = scaler.fit_transform(df_all[metric_cols])
# print(df_all[['Method']+metric_cols])

# -------- Step2: 每个 (方法×数据集) 的指标平均 --------
df_all["Accuracy_subscore"] = df_all[metric_cols].mean(axis=1)
# print(df_all[['Method','Accuracy_subscore']])

# -------- Step3: 跨所有数据集取平均，得到每个方法的最终 Accuracy --------
accuracy_scores = df_all.groupby("Method")["Accuracy_subscore"].mean()

# 重命名列名为 'score'
accuracy_scores = accuracy_scores.rename_axis('Method').reset_index()
accuracy_scores.rename(columns={"Accuracy_subscore": "score"}, inplace=True)

print("\n最终 Accuracy 分数：")
print(accuracy_scores)

# 保存结果
output_file = "/home/henu/work/result/score/de_strength.csv"
accuracy_scores.to_csv(output_file, index=False)
print(f"结果已保存到文件：{output_file}")

合并后的数据：
   Method Dataset  knn_10  knn_20  knn_30    svm  nkr_10  nkr_20  nkr_30  \
0     DRA  de_0.2   0.260   0.264   0.275  0.289   0.006   0.010   0.015   
1   DREAM  de_0.2   0.241   0.274   0.285  0.304   0.005   0.010   0.015   
2    EDGE  de_0.2   0.341   0.361   0.376  0.386   0.006   0.012   0.017   
3  GLMPCA  de_0.2   0.823   0.816   0.826  0.835   0.009   0.017   0.024   
4     PCA  de_0.2   0.873   0.874   0.870  0.876   0.008   0.017   0.024   

   aji_10  ...   T_30   C_10   C_20   C_30  kmax   ARI   NMI   SIL  COMP  HOMO  
0   0.003  ...  0.504  0.525  0.508  0.503   531  0.00  0.00  0.33  0.00  0.00  
1   0.003  ...  0.505  0.507  0.507  0.507   838  0.00  0.00  0.31  0.00  0.00  
2   0.003  ...  0.514  0.608  0.584  0.571   730  0.04  0.04  0.32  0.04  0.04  
3   0.005  ...  0.559  0.680  0.661  0.650   471  0.61  0.65  0.38  0.64  0.66  
4   0.004  ...  0.558  0.631  0.616  0.607   446  0.65  0.76  0.49  0.75  0.77  

[5 rows x 31 columns]
['knn_10', 'knn_20', 'knn_

In [47]:
'''
异常值概率稳定性得分：相关合成数据集的局部/全局/聚类指标的综合
'''

all_dfs = []
datasets = ['out_0.1','out_0.2','out_0.3','out_0.4','out_0.5']
for dataset in datasets:
    # 局部与全局
    datapath1 = f'/home/henu/work/result/metric/simulate/{dataset}/dr1.csv'
    datapath2 = f'/home/henu/work/result/metric/simulate/{dataset}/dr2.csv'
    # 聚类
    datapath3 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_ARI.csv'
    datapath4 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_NMI.csv'
    datapath5 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_SIL.csv'
    datapath6 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_COMP.csv'
    datapath7 = f'/home/henu/work/result/cluster/simulate/{dataset}/indicators/kmeans_HOMO.csv'

    df1 = pd.read_csv(datapath1)
    df2 = pd.read_csv(datapath2)
    df3 = pd.read_csv(datapath3)
    df4 = pd.read_csv(datapath4)
    df5 = pd.read_csv(datapath5)
    df6 = pd.read_csv(datapath6)
    df7 = pd.read_csv(datapath7)
    df_merge = reduce(lambda left, right: pd.merge(left, right, on="Method", how="outer"), [df1, df2, df3, df4, df5, df6, df7])

    df_merge.insert(1, "Dataset", dataset)  # 加一列标记数据集名
    all_dfs.append(df_merge)

# 合并所有数据集
df_all = pd.concat(all_dfs, ignore_index=True)

print("合并后的数据：")
print(df_all.head())

# 提取指标列（除了 Method 和 Dataset）
metric_cols = [c for c in df_all.columns if c not in ["Method", "Dataset",'AUC','Qlocal','Qglobal','kmax']]
print(metric_cols)

# -------- Step1: 指标标准化（所有方法+数据集一起 min-max）--------
scaler = MinMaxScaler()
df_all[metric_cols] = scaler.fit_transform(df_all[metric_cols])
# print(df_all[['Method']+metric_cols])

# -------- Step2: 每个 (方法×数据集) 的指标平均 --------
df_all["Accuracy_subscore"] = df_all[metric_cols].mean(axis=1)
# print(df_all[['Method','Accuracy_subscore']])

# -------- Step3: 跨所有数据集取平均，得到每个方法的最终 Accuracy --------
accuracy_scores = df_all.groupby("Method")["Accuracy_subscore"].mean()

# 重命名列名为 'score'
accuracy_scores = accuracy_scores.rename_axis('Method').reset_index()
accuracy_scores.rename(columns={"Accuracy_subscore": "score"}, inplace=True)

print("\n最终 Accuracy 分数：")
print(accuracy_scores)

# 保存结果
output_file = "/home/henu/work/result/score/out.csv"
accuracy_scores.to_csv(output_file, index=False)
print(f"结果已保存到文件：{output_file}")

合并后的数据：
   Method  Dataset  knn_10  knn_20  knn_30    svm  nkr_10  nkr_20  nkr_30  \
0     DRA  out_0.1   0.263   0.256   0.259  0.294   0.006   0.011   0.016   
1   DREAM  out_0.1   0.280   0.326   0.342  0.364   0.006   0.011   0.016   
2    EDGE  out_0.1   0.976   0.973   0.977  0.978   0.021   0.037   0.052   
3  GLMPCA  out_0.1   0.925   0.934   0.932  0.938   0.018   0.033   0.048   
4     PCA  out_0.1   0.892   0.899   0.900  0.903   0.018   0.033   0.048   

   aji_10  ...   T_30   C_10   C_20   C_30  kmax   ARI   NMI   SIL  COMP  HOMO  
0   0.003  ...  0.505  0.494  0.496  0.500   250 -0.00  0.00  0.33  0.00  0.00  
1   0.003  ...  0.511  0.526  0.526  0.526   938  0.02  0.02  0.31  0.02  0.02  
2   0.011  ...  0.656  0.795  0.770  0.756   397  0.94  0.90  0.51  0.89  0.90  
3   0.009  ...  0.651  0.829  0.805  0.788   397  0.90  0.87  0.55  0.86  0.87  
4   0.009  ...  0.645  0.832  0.806  0.787   396  0.82  0.89  0.71  0.88  0.89  

[5 rows x 31 columns]
['knn_10', 'knn_20',