In [None]:
'''
zadu库指标计算：MRRE、NH、Pearson
'''

In [1]:
from zadu import zadu
import pandas as pd
import numpy as np
import os

In [7]:
datasets = [
                            'gene_5k', 'gene_2w', 'gene_3w', 'gene_4w', 'gene_5w', 'dropout_-1', 'dropout_0', 'dropout_1',
                            'dropout_2', 'dropout_3', 'batch_2', 'batch_4', 'batch_6', 'batch_8', 'batch_10', 'batch_0.2',
                            'batch_0.4', 'batch_0.6', 'batch_0.8', 'batch_1.0', 'de_prob_0.05', 'de_prob_0.15',
                            'de_prob_0.2', 'de_prob_0.25', 'de_prob_0.3', 'de_0.2', 'de_0.4', 'de_0.6', 'de_0.8', 'de_1.0',
                            'out_0.1', 'out_0.2', 'out_0.3', 'out_0.4', 'out_0.5']
# VASC、VAE都先去除
for dataset in datasets:
    if dataset in ['cell_1w','cell_2w','cell_3w']:
        # 去除VASC、VAE、SIMLR，共23+3
        DR_methods = ['ZIFA','DRA','DREAM','scvis','UMAP','EDGE','GLMPCA','ivis','PaCMAP','pCMF','PHATE','SAUCIE','SCDRHA','scGAE','scGBM','scScope','tGPLVM','TriMap','PCA','TSNE','SPDR','SSNMDI','ParametricUMAP50','ParametricUMAP200','SQuaD_MDS','SQuaD_MDS_hybrid']
    elif dataset in ['cell_5w']:
        # 去除VASC、VAE、SIMLR、SCDRHA、scGAE、DREAM、两个参数UMAP，共20+1
        DR_methods = ['ZIFA','DRA','scvis','UMAP','EDGE','GLMPCA','ivis','PaCMAP','pCMF','PHATE','SAUCIE','scGBM','scScope','tGPLVM','TriMap','PCA','TSNE','SPDR','SSNMDI','SQuaD_MDS','SQuaD_MDS_hybrid']
    elif dataset in ['cell_4w']:
        # 去除VASC、VAE、SIMLR、SCDRHA、scGAE、DREAM，共20+3
        DR_methods = ['ZIFA','DRA','scvis','UMAP','EDGE','GLMPCA','ivis','PaCMAP','pCMF','PHATE','SAUCIE','scGBM','scScope','tGPLVM','TriMap','PCA','TSNE','SPDR','SSNMDI','ParametricUMAP50','ParametricUMAP200','SQuaD_MDS','SQuaD_MDS_hybrid']
    elif dataset in ['celltype_7']:
        DR_methods = ['pCMF','PHATE','SAUCIE','SCDRHA','scGAE','scGBM','scScope','tGPLVM','TriMap','PCA','TSNE','SPDR','SIMLR','SSNMDI','ParametricUMAP50','ParametricUMAP200','SQuaD_MDS','SQuaD_MDS_hybrid']
    elif dataset in ['cell_100']:
        # 去除VASC、VAE、SCDRHA，共23+3
        DR_methods = ['ZIFA','DRA','DREAM','scvis','UMAP','EDGE','GLMPCA','ivis','PaCMAP','pCMF','PHATE','SAUCIE','scGAE','scGBM','scScope','tGPLVM','TriMap','PCA','TSNE','SPDR','SIMLR','SSNMDI','ParametricUMAP50','ParametricUMAP200','SQuaD_MDS','SQuaD_MDS_hybrid']
    else:
        # VASC、VAE 共24+3
        DR_methods = ['ZIFA','DRA','DREAM','scvis','UMAP','EDGE','GLMPCA','ivis','PaCMAP','pCMF','PHATE','SAUCIE','SCDRHA','scGAE','scGBM','scScope','tGPLVM','TriMap','PCA','TSNE','SPDR','SIMLR','SSNMDI','ParametricUMAP50','ParametricUMAP200','SQuaD_MDS','SQuaD_MDS_hybrid']



    dim = 2

    # 创建结果目录
    result_dir = f'/home/henu/work/result/metric/simulate/{dataset}/'
    if not os.path.exists(result_dir):
        os.makedirs(result_dir)
        print(f"目录 '{result_dir}' 创建成功")
    else:
        print(f"目录 '{result_dir}' 已存在")

    # 定义输出文件
    out_csv = os.path.join(result_dir, 'dr3.csv')
    # 如果第一次跑，先建一个空表头；之后追加即可
    columns = ['Method',
               'Mrre_false_10','Mrre_missing_10',
               'Mrre_false_20','Mrre_missing_20',
               'Mrre_false_30','Mrre_missing_30',
               'Pearson',
               'nh_10','nh_20','nh_30']
    # 如果文件不存在，先写表头；存在的话后面直接追加
    if not os.path.isfile(out_csv):
        print(f"文件 {out_csv} 不存在")
        pd.DataFrame(columns=columns).to_csv(out_csv, index=False)
    else:
        print(f"文件 {out_csv} 存在")



    # 加载数据：原始数据、降维数据、真实标签
    for DR_method in DR_methods:
        embedding_path = f'/home/henu/work/result/DR/simulate/datasets/{dataset}/{DR_method}_{dim}.csv'
        labels_path = f'/home/henu/work/data/simulate/datasets/{dataset}/cell_metadata.csv'
        label_column = 'Group'
        high_path = f'/home/henu/work/data/simulate/datasets/{dataset}/counts_matrix.csv'

        # 原始数据
        high_df = pd.read_csv(high_path,index_col=0)
        X_high = high_df.values
        # X_high = X_high.T
        print(X_high.shape)  # (3043,1000)
        # 降维数据
        low_df = pd.read_csv(embedding_path,index_col=0)
        X_low = low_df.values
        print(X_low.shape)  # (3043,2)
        # 真实标签
        label_df = pd.read_csv(labels_path,index_col=0)
        labels = label_df['Group'].values
        print(labels.shape)  # (3043,)

        spec = [
            {"id": "mrre", "params": {"k": 10}, },
            {"id": "mrre", "params": {"k": 20}, },
            {"id": "mrre", "params": {"k": 30}, },
            {"id": "pr"},
            {"id": "nh", "params": {"k": 10}, },
            {"id": "nh", "params": {"k": 20}, },
            {"id": "nh", "params": {"k": 30}, },
        ]
        scores = zadu.ZADU(spec, X_high).measure(X_low, label = labels)

        # 创建一个 DataFrame 来存储其余结果
        row = {
            'Method': f'{DR_method}',
            'Mrre_false_10': scores[0]['mrre_false'],
            'Mrre_missing_10': scores[0]['mrre_missing'],
            'Mrre_false_20': scores[1]['mrre_false'],
            'Mrre_missing_20': scores[1]['mrre_missing'],
            'Mrre_false_30': scores[2]['mrre_false'],
            'Mrre_missing_30': scores[2]['mrre_missing'],
            'Pearson': scores[3]['pearson_r'],
            'nh_10': scores[4]['neighborhood_hit'],
            'nh_20': scores[5]['neighborhood_hit'],
            'nh_30': scores[6]['neighborhood_hit'],
        }

        # 追加写
        pd.DataFrame([row]).to_csv(out_csv, mode='a', header=False, index=False, float_format='%.3f')

目录 '/home/henu/work/result/metric/simulate/gene_5k/' 已存在
文件 /home/henu/work/result/metric/simulate/gene_5k/dr3.csv 不存在
(2000, 3000)
(2000, 2)
(2000,)
(2000, 3000)
(2000, 2)
(2000,)
(2000, 3000)
(2000, 2)
(2000,)
(2000, 3000)
(2000, 2)
(2000,)
(2000, 3000)
(2000, 2)
(2000,)
(2000, 3000)
(2000, 2)
(2000,)
(2000, 3000)
(2000, 2)
(2000,)
(2000, 3000)
(2000, 2)
(2000,)
(2000, 3000)
(2000, 2)
(2000,)
(2000, 3000)
(2000, 2)
(2000,)
(2000, 3000)
(2000, 2)
(2000,)
(2000, 3000)
(2000, 2)
(2000,)
(2000, 3000)
(2000, 2)
(2000,)
(2000, 3000)
(2000, 2)
(2000,)
(2000, 3000)
(2000, 2)
(2000,)
(2000, 3000)
(2000, 2)
(2000,)
(2000, 3000)
(2000, 2)
(2000,)
(2000, 3000)
(2000, 2)
(2000,)
(2000, 3000)
(2000, 2)
(2000,)
(2000, 3000)
(2000, 2)
(2000,)
(2000, 3000)
(2000, 2)
(2000,)
(2000, 3000)
(2000, 2)
(2000,)
(2000, 3000)
(2000, 2)
(2000,)
(2000, 3000)
(2000, 2)
(2000,)
(2000, 3000)
(2000, 2)
(2000,)
(2000, 3000)
(2000, 2)
(2000,)
(2000, 3000)
(2000, 2)
(2000,)
目录 '/home/henu/work/result/metric/simulate/g