In [1]:
import pandas as pd
from sklearn.metrics import roc_auc_score
import torch
import torch.nn as nn
import torch.optim as optim 
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import matthews_corrcoef
import numpy as np
import os
from tqdm import tqdm

file = 'NC'    #  You can modify the training data set here, for example: BBB, NC, NA, NT

file_prefixes = ["smiles","DMPNN", "ecfp", "mfbert", "padel"]
file_extension = ".csv"

file_prefix = f"{file}_Feature_fusion/data_train"
file_extension = ".csv"

val_file_prefix = f"{file}_Feature_fusion/data_valid"
val_file_extension = ".csv"

start_index = 0
end_index = 4

In [2]:
auc_scores = []
acc_scores = []
f1_scores = []
recall_scores = []
mcc_scores = []


for i in tqdm(range(start_index, end_index + 1), desc="Processing files"):  # 使用 tqdm 添加进度条  
    combined_data_train = None
    combined_data_valid = None
    combined_data_test = None
    for file_feature in file_prefixes:
        #从randomi/中获取train，valid，test    
        
        file_path_train = file + "/" +'random' + str(i) + "/" + file_feature + "/" + "train" + file_extension
        data_train = pd.read_csv(file_path_train) 
                
        file_path_valid = file + "/" +'random' + str(i) + "/" + file_feature + "/" + "valid" + file_extension
        data_valid = pd.read_csv(file_path_valid) 

        file_path_test = file + "/" +'random' + str(i) + "/" + file_feature + "/" + "test" + file_extension
        data_test = pd.read_csv(file_path_test)   
        
        data_train = pd.read_csv(file_path_train)     
        data_valid = pd.read_csv(file_path_valid)   
        data_test  = pd.read_csv(file_path_test) 
        
        if combined_data_train is None:
            combined_data_train = data_train
        if combined_data_valid is None:
            combined_data_valid = data_valid
        if combined_data_test is None:   
            combined_data_test  = data_test
        else:
            combined_data_train = pd.concat([combined_data_train, data_train], axis=1)     
            combined_data_valid = pd.concat([combined_data_valid, data_valid], axis=1) 
            combined_data_test = pd.concat([combined_data_test, data_test], axis=1) 
    
    folder_path = f'{file}_Feature_fusion'
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

    combined_data_train.to_csv(f'{file}_Feature_fusion/data_train{i}.csv', index=False)
    combined_data_valid.to_csv(f'{file}_Feature_fusion/data_valid{i}.csv', index=False)
    combined_data_test.to_csv(f'{file}_Feature_fusion/data_test{i}.csv', index=False)


Processing files: 100%|██████████| 5/5 [00:10<00:00,  2.10s/it]


In [6]:
# 读取CSV文件
for i in range(start_index, end_index+1):
    
    # 读取数据
    file_path = file_prefix + str(i) + file_extension
    data = pd.read_csv(file_path)
    
    first_row = data.iloc[0]
    print(first_row)
    
    # 提取特征和标签
    x_input = data.drop(['smiles'], axis=1).values
    y_output = data['active'].values
    
        # 遍历数据，将 inf 赋值为 0
    x_input = np.nan_to_num(x_input, posinf=0, neginf=0)
    
#        # 检查并输出非 float64 数据的坐标
#    for row_index, row in enumerate(x_input):
#        for col_index, value in enumerate(row):
#            if x_input.dtype!= np.float64:
#                print(f"非 float64 数据位于 ({row_index}, {col_index})，值为: {value}")
#    
    #print('x_input',x_input)
    #print('y_output',y_output)
    
    
    val_x_input = data.drop(['smiles'], axis=1).values
    val_y_output = data['active'].values
    
    val_x_input = np.nan_to_num(x_input, posinf=0, neginf=0)
    
#    # 查找 x_input 中的无穷值位置
#    infinity_positions_x = np.argwhere(np.isinf(x_input))
#    if len(infinity_positions_x) > 0:
#        column_names = data.drop(['SMILES'], axis=1).columns
#        infinity_column_names_x = [column_names[pos[1]] for pos in infinity_positions_x]
#        print("在 x_input 中的无穷值所在列名：", infinity_column_names_x, '         位置：',infinity_positions_x)
#    else:
#        print("在 x_input 中没有无穷值")
#
#    # 查找 val_x_input 中的无穷值位置
#    infinity_positions_val_x = np.argwhere(np.isinf(val_x_input))
#    if len(infinity_positions_val_x) > 0:
#        column_names = data.drop(['SMILES'], axis=1).columns
#        infinity_column_names_val_x = [column_names[pos[1]] for pos in infinity_positions_val_x]
#        print("在 val_x_input 中的无穷值所在列名：", infinity_column_names_val_x, '        位置：',infinity_positions_x)
#    else:
#        print("在 val_x_input 中没有无穷值")
    
        # 标准化特征（假设特征是数值型的）
    from sklearn.preprocessing import StandardScaler
    scaler = StandardScaler()
    x_input = scaler.fit_transform(x_input)
    val_x_input = scaler.fit_transform(val_x_input)

    # 数据放缩处理（例如，将特征值缩放到 0 到 1 之间）
    from sklearn.preprocessing import MinMaxScaler
    min_max_scaler = MinMaxScaler(feature_range=(0, 1))
    x_input = min_max_scaler.fit_transform(x_input)
    val_x_input = min_max_scaler.fit_transform(val_x_input)
    
        # 转换为Tensor
    x = torch.Tensor(x_input)
    y_true = torch.Tensor(y_output).view(-1, 1)    
    
    val_x = torch.Tensor(val_x_input)
    val_y_true = torch.Tensor(val_y_output).view(-1, 1)
    
    
    # 数据维度
    input_dim = x.shape[1]
    output_dim = 1
    hidden_dim1 = 256
    hidden_dim2 = 128
    hidden_dim3 = 64
    learning_rate = 0.001
    num_epochs = 100
    weight_decay = 0.001
    # MLP模型定义
    mlp = nn.Sequential(  
        nn.Linear(input_dim, hidden_dim1),  # 第一层隐藏层  
        nn.ReLU(),  
        nn.Linear(hidden_dim1, hidden_dim2),  # 第二层隐藏层  
        nn.ReLU(),  
        nn.Linear(hidden_dim2, hidden_dim3),  # 第三层隐藏层  
        nn.ReLU(),  
        nn.Linear(hidden_dim3, output_dim),  # 输出层  
        nn.Sigmoid()  
    )  
        
    # 优化器和损失函数
    optimizer = torch.optim.Adam(mlp.parameters(), lr=learning_rate,weight_decay=weight_decay)
    #loss_func =  nn.BCEWithLogitsLoss()
    loss_func =  nn.MSELoss()
    best_val_loss = float('inf')
    num_iterations_without_improvement = 0  # Initialize counter for iterations without improvement. 
    
    for epoch in range(num_epochs):
        
        print(f"开始训练第{epoch}组epochs")
        prediction = mlp(x)
        #print('prediction',prediction)
        
        #prediction_np = prediction.detach().numpy()
        #df = pd.DataFrame(prediction_np)
        #df.to_csv('prediction.csv', index=False)
        
        loss = loss_func(prediction, y_true)
        print('loss',loss)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        #print(optimizer)
        
        with torch.no_grad():
            val_prediction=mlp(val_x)
            val_loss = loss_func(val_prediction, val_y_true)
        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {loss.item()}, Val Loss: {val_loss.item()}")
        
        #print("检查数据和模型输出:")
        #print("是否存在 NaN 或无穷大值在训练预测结果中:", torch.isnan(prediction).any() or torch.isinf(prediction).any())
        #print("是否存在 NaN 或无穷大值在验证预测结果中:", torch.isnan(val_prediction).any() or torch.isinf(val_prediction).any())
        #print("是否存在 NaN 或无穷大值在训练真实值中:", torch.isnan(y_true).any() or torch.isinf(y_true).any())
        #print("是否存在 NaN 或无穷大值在验证真实值中:", torch.isnan(val_y_true).any() or torch.isinf(val_y_true).any())
        #
          
        if val_loss < best_val_loss:
            best_val_loss = val_loss  
            num_iterations_without_improvement = 0
            torch.save(mlp.state_dict(), f"models/models_{file}/model_{i+1}.pt")
            #torch.save(mlp, "model_saved/mlp1.pt")
            print(f'model_{i}训练结果更新为第{epoch}个模型')
            best_epoch = epoch
        else :
            num_iterations_without_improvement += 1  
            if num_iterations_without_improvement == 100:
                print('100次迭代没有更新，结束迭代')
                print(f'最棒的模型是第{best_epoch}个epoch')
                break
        print('         ')
        

    prediction_np = prediction.data.numpy()
        # 将 prediction 添加到 DataFrame
    #data['prediction'] = prediction_np
        # 将 DataFrame 保存到 CSV 文件
    #data.to_csv('output/output_train.csv', index=True)
    
    # 预测并计算AUC
    prediction_np = prediction.detach().numpy().flatten()
    auc = roc_auc_score(y_true, prediction_np)
    
    threshold = 0.5
    binary_prediction = np.where(prediction_np > threshold, 1, 0)
    
    acc = accuracy_score(y_true, binary_prediction)
    f1 = f1_score(y_true, binary_prediction)
    recall = recall_score(y_true, binary_prediction)
    mcc = matthews_corrcoef(y_true, binary_prediction)
    output_file = "train_scores.csv"
    
    
    auc_scores.append(auc)
    acc_scores.append(acc)
    f1_scores.append(f1)
    recall_scores.append(recall)
    mcc_scores.append(mcc)
    
    print('第',i,'组模型')
    print("AUC:", auc)
    print("ACC:", acc)
    print("F1:", f1)
    print("Recall:", recall)
    print("MCC:", mcc)
    print('    ')
      
    

smiles    Cc1cc(c(cc1)C)[N+](=O)[O-]
active                             0
0                           0.098735
1                                0.0
2                                0.0
                     ...            
676                         0.179503
677                         0.078543
678                              0.0
679                              0.0
680                         0.384523
Name: 0, Length: 2775, dtype: object
开始训练第0组epochs
loss tensor(0.2502, grad_fn=<MseLossBackward>)
Epoch 1/100, Train Loss: 0.2502429485321045, Val Loss: 0.2433009296655655
model_0训练结果更新为第0个模型
         
开始训练第1组epochs
loss tensor(0.2433, grad_fn=<MseLossBackward>)
Epoch 2/100, Train Loss: 0.2433009296655655, Val Loss: 0.23311583697795868
model_0训练结果更新为第1个模型
         
开始训练第2组epochs
loss tensor(0.2331, grad_fn=<MseLossBackward>)
Epoch 3/100, Train Loss: 0.23311583697795868, Val Loss: 0.22039970755577087
model_0训练结果更新为第2个模型
         
开始训练第3组epochs
loss tensor(0.2204, grad_fn=<MseLossBackward

Epoch 56/100, Train Loss: 0.007340666372328997, Val Loss: 0.0069429706782102585
model_0训练结果更新为第55个模型
         
开始训练第56组epochs
loss tensor(0.0069, grad_fn=<MseLossBackward>)
Epoch 57/100, Train Loss: 0.0069429706782102585, Val Loss: 0.0069868601858615875
         
开始训练第57组epochs
loss tensor(0.0070, grad_fn=<MseLossBackward>)
Epoch 58/100, Train Loss: 0.0069868601858615875, Val Loss: 0.00695425970479846
         
开始训练第58组epochs
loss tensor(0.0070, grad_fn=<MseLossBackward>)
Epoch 59/100, Train Loss: 0.00695425970479846, Val Loss: 0.006609535776078701
model_0训练结果更新为第58个模型
         
开始训练第59组epochs
loss tensor(0.0066, grad_fn=<MseLossBackward>)
Epoch 60/100, Train Loss: 0.006609535776078701, Val Loss: 0.00619152095168829
model_0训练结果更新为第59个模型
         
开始训练第60组epochs
loss tensor(0.0062, grad_fn=<MseLossBackward>)
Epoch 61/100, Train Loss: 0.00619152095168829, Val Loss: 0.005973190534859896
model_0训练结果更新为第60个模型
         
开始训练第61组epochs
loss tensor(0.0060, grad_fn=<MseLossBackward>)
Epoch 62/1

Epoch 3/100, Train Loss: 0.23795613646507263, Val Loss: 0.22753529250621796
model_1训练结果更新为第2个模型
         
开始训练第3组epochs
loss tensor(0.2275, grad_fn=<MseLossBackward>)
Epoch 4/100, Train Loss: 0.22753529250621796, Val Loss: 0.21497437357902527
model_1训练结果更新为第3个模型
         
开始训练第4组epochs
loss tensor(0.2150, grad_fn=<MseLossBackward>)
Epoch 5/100, Train Loss: 0.21497437357902527, Val Loss: 0.1996915191411972
model_1训练结果更新为第4个模型
         
开始训练第5组epochs
loss tensor(0.1997, grad_fn=<MseLossBackward>)
Epoch 6/100, Train Loss: 0.1996915191411972, Val Loss: 0.18260842561721802
model_1训练结果更新为第5个模型
         
开始训练第6组epochs
loss tensor(0.1826, grad_fn=<MseLossBackward>)
Epoch 7/100, Train Loss: 0.18260842561721802, Val Loss: 0.16323383152484894
model_1训练结果更新为第6个模型
         
开始训练第7组epochs
loss tensor(0.1632, grad_fn=<MseLossBackward>)
Epoch 8/100, Train Loss: 0.16323383152484894, Val Loss: 0.14287272095680237
model_1训练结果更新为第7个模型
         
开始训练第8组epochs
loss tensor(0.1429, grad_fn=<MseLossBackward>)


model_1训练结果更新为第52个模型
         
开始训练第53组epochs
loss tensor(0.0090, grad_fn=<MseLossBackward>)
Epoch 54/100, Train Loss: 0.009040206670761108, Val Loss: 0.008672325871884823
model_1训练结果更新为第53个模型
         
开始训练第54组epochs
loss tensor(0.0087, grad_fn=<MseLossBackward>)
Epoch 55/100, Train Loss: 0.008672325871884823, Val Loss: 0.008533898741006851
model_1训练结果更新为第54个模型
         
开始训练第55组epochs
loss tensor(0.0085, grad_fn=<MseLossBackward>)
Epoch 56/100, Train Loss: 0.008533898741006851, Val Loss: 0.008569006808102131
         
开始训练第56组epochs
loss tensor(0.0086, grad_fn=<MseLossBackward>)
Epoch 57/100, Train Loss: 0.008569006808102131, Val Loss: 0.008746468462049961
         
开始训练第57组epochs
loss tensor(0.0087, grad_fn=<MseLossBackward>)
Epoch 58/100, Train Loss: 0.008746468462049961, Val Loss: 0.009028183296322823
         
开始训练第58组epochs
loss tensor(0.0090, grad_fn=<MseLossBackward>)
Epoch 59/100, Train Loss: 0.009028183296322823, Val Loss: 0.009609385393559933
         
开始训练第59组epochs
loss t

loss tensor(0.2402, grad_fn=<MseLossBackward>)
Epoch 2/100, Train Loss: 0.2401982545852661, Val Loss: 0.22721688449382782
model_2训练结果更新为第1个模型
         
开始训练第2组epochs
loss tensor(0.2272, grad_fn=<MseLossBackward>)
Epoch 3/100, Train Loss: 0.22721688449382782, Val Loss: 0.20706596970558167
model_2训练结果更新为第2个模型
         
开始训练第3组epochs
loss tensor(0.2071, grad_fn=<MseLossBackward>)
Epoch 4/100, Train Loss: 0.20706596970558167, Val Loss: 0.18412943184375763
model_2训练结果更新为第3个模型
         
开始训练第4组epochs
loss tensor(0.1841, grad_fn=<MseLossBackward>)
Epoch 5/100, Train Loss: 0.18412943184375763, Val Loss: 0.15809135138988495
model_2训练结果更新为第4个模型
         
开始训练第5组epochs
loss tensor(0.1581, grad_fn=<MseLossBackward>)
Epoch 6/100, Train Loss: 0.15809135138988495, Val Loss: 0.131581112742424
model_2训练结果更新为第5个模型
         
开始训练第6组epochs
loss tensor(0.1316, grad_fn=<MseLossBackward>)
Epoch 7/100, Train Loss: 0.131581112742424, Val Loss: 0.10682635009288788
model_2训练结果更新为第6个模型
         
开始训练第7组epochs
los

loss tensor(0.0092, grad_fn=<MseLossBackward>)
Epoch 52/100, Train Loss: 0.009153478778898716, Val Loss: 0.009009166620671749
         
开始训练第52组epochs
loss tensor(0.0090, grad_fn=<MseLossBackward>)
Epoch 53/100, Train Loss: 0.009009166620671749, Val Loss: 0.007255670614540577
model_2训练结果更新为第52个模型
         
开始训练第53组epochs
loss tensor(0.0073, grad_fn=<MseLossBackward>)
Epoch 54/100, Train Loss: 0.007255670614540577, Val Loss: 0.008702977560460567
         
开始训练第54组epochs
loss tensor(0.0087, grad_fn=<MseLossBackward>)
Epoch 55/100, Train Loss: 0.008702977560460567, Val Loss: 0.006859674118459225
model_2训练结果更新为第54个模型
         
开始训练第55组epochs
loss tensor(0.0069, grad_fn=<MseLossBackward>)
Epoch 56/100, Train Loss: 0.006859674118459225, Val Loss: 0.00815302599221468
         
开始训练第56组epochs
loss tensor(0.0082, grad_fn=<MseLossBackward>)
Epoch 57/100, Train Loss: 0.00815302599221468, Val Loss: 0.0073141190223395824
         
开始训练第57组epochs
loss tensor(0.0073, grad_fn=<MseLossBackward>)
Epoch 

loss tensor(0.2449, grad_fn=<MseLossBackward>)
Epoch 2/100, Train Loss: 0.24491538107395172, Val Loss: 0.2276630401611328
model_3训练结果更新为第1个模型
         
开始训练第2组epochs
loss tensor(0.2277, grad_fn=<MseLossBackward>)
Epoch 3/100, Train Loss: 0.2276630401611328, Val Loss: 0.20765146613121033
model_3训练结果更新为第2个模型
         
开始训练第3组epochs
loss tensor(0.2077, grad_fn=<MseLossBackward>)
Epoch 4/100, Train Loss: 0.20765146613121033, Val Loss: 0.17970037460327148
model_3训练结果更新为第3个模型
         
开始训练第4组epochs
loss tensor(0.1797, grad_fn=<MseLossBackward>)
Epoch 5/100, Train Loss: 0.17970037460327148, Val Loss: 0.15035919845104218
model_3训练结果更新为第4个模型
         
开始训练第5组epochs
loss tensor(0.1504, grad_fn=<MseLossBackward>)
Epoch 6/100, Train Loss: 0.15035919845104218, Val Loss: 0.12066157162189484
model_3训练结果更新为第5个模型
         
开始训练第6组epochs
loss tensor(0.1207, grad_fn=<MseLossBackward>)
Epoch 7/100, Train Loss: 0.12066157162189484, Val Loss: 0.09416734427213669
model_3训练结果更新为第6个模型
         
开始训练第7组epochs


Epoch 54/100, Train Loss: 0.007246138993650675, Val Loss: 0.007470833603292704
         
开始训练第54组epochs
loss tensor(0.0075, grad_fn=<MseLossBackward>)
Epoch 55/100, Train Loss: 0.007470833603292704, Val Loss: 0.007536097429692745
         
开始训练第55组epochs
loss tensor(0.0075, grad_fn=<MseLossBackward>)
Epoch 56/100, Train Loss: 0.007536097429692745, Val Loss: 0.007188050076365471
         
开始训练第56组epochs
loss tensor(0.0072, grad_fn=<MseLossBackward>)
Epoch 57/100, Train Loss: 0.007188050076365471, Val Loss: 0.006615757010877132
model_3训练结果更新为第56个模型
         
开始训练第57组epochs
loss tensor(0.0066, grad_fn=<MseLossBackward>)
Epoch 58/100, Train Loss: 0.006615757010877132, Val Loss: 0.006047572009265423
model_3训练结果更新为第57个模型
         
开始训练第58组epochs
loss tensor(0.0060, grad_fn=<MseLossBackward>)
Epoch 59/100, Train Loss: 0.006047572009265423, Val Loss: 0.005751440767198801
model_3训练结果更新为第58个模型
         
开始训练第59组epochs
loss tensor(0.0058, grad_fn=<MseLossBackward>)
Epoch 60/100, Train Loss: 0.005

model_4训练结果更新为第5个模型
         
开始训练第6组epochs
loss tensor(0.1604, grad_fn=<MseLossBackward>)
Epoch 7/100, Train Loss: 0.1603536307811737, Val Loss: 0.1355806440114975
model_4训练结果更新为第6个模型
         
开始训练第7组epochs
loss tensor(0.1356, grad_fn=<MseLossBackward>)
Epoch 8/100, Train Loss: 0.1355806440114975, Val Loss: 0.11154976487159729
model_4训练结果更新为第7个模型
         
开始训练第8组epochs
loss tensor(0.1115, grad_fn=<MseLossBackward>)
Epoch 9/100, Train Loss: 0.11154976487159729, Val Loss: 0.09114082157611847
model_4训练结果更新为第8个模型
         
开始训练第9组epochs
loss tensor(0.0911, grad_fn=<MseLossBackward>)
Epoch 10/100, Train Loss: 0.09114082157611847, Val Loss: 0.07501526921987534
model_4训练结果更新为第9个模型
         
开始训练第10组epochs
loss tensor(0.0750, grad_fn=<MseLossBackward>)
Epoch 11/100, Train Loss: 0.07501526921987534, Val Loss: 0.06375125050544739
model_4训练结果更新为第10个模型
         
开始训练第11组epochs
loss tensor(0.0638, grad_fn=<MseLossBackward>)
Epoch 12/100, Train Loss: 0.06375125050544739, Val Loss: 0.0563922002911

model_4训练结果更新为第60个模型
         
开始训练第61组epochs
loss tensor(0.0067, grad_fn=<MseLossBackward>)
Epoch 62/100, Train Loss: 0.006704424507915974, Val Loss: 0.007142206188291311
         
开始训练第62组epochs
loss tensor(0.0071, grad_fn=<MseLossBackward>)
Epoch 63/100, Train Loss: 0.007142206188291311, Val Loss: 0.007431877311319113
         
开始训练第63组epochs
loss tensor(0.0074, grad_fn=<MseLossBackward>)
Epoch 64/100, Train Loss: 0.007431877311319113, Val Loss: 0.007650543935596943
         
开始训练第64组epochs
loss tensor(0.0077, grad_fn=<MseLossBackward>)
Epoch 65/100, Train Loss: 0.007650543935596943, Val Loss: 0.0067820558324456215
         
开始训练第65组epochs
loss tensor(0.0068, grad_fn=<MseLossBackward>)
Epoch 66/100, Train Loss: 0.0067820558324456215, Val Loss: 0.006040431559085846
model_4训练结果更新为第65个模型
         
开始训练第66组epochs
loss tensor(0.0060, grad_fn=<MseLossBackward>)
Epoch 67/100, Train Loss: 0.006040431559085846, Val Loss: 0.005708653945475817
model_4训练结果更新为第66个模型
         
开始训练第67组epochs
loss

In [7]:
te_file_prefix = f"{file}_Feature_fusion/data_test"
te_file_extension = ".csv"

import os 

avg_auc = 0.0  
avg_acc = 0.0  
avg_f1 = 0.0  
avg_recall = 0.0  
avg_mcc = 0.0  


#te_file_paths = [te_file_prefix + str(i) + te_file_extension for i in range(start_index, end_index+1)]  
results = pd.DataFrame(columns=['AUC', 'Accuracy', 'F1', 'Recall', 'MCC'])   
    
#for i,te_file_path in enumerate(te_file_paths):  
for i in range(start_index, end_index): 
    te_file_path = te_file_prefix + str(i) + te_file_extension
    data_test = pd.read_csv(te_file_path)  
    if file == 'BBB':
        data_test = data_test.head(129)  

    #print(data_test)
      
    te_x_input = data_test.drop(['smiles'], axis=1).values  
    #print('1te_x_input.shape',te_x_input.shape)
    
    te_y_true = data_test['active'].values  
    te_x_input = te_x_input.astype(np.float64)
    #print('2te_x_input.shape',te_x_input.shape)
    
    te_x_input = np.nan_to_num(te_x_input, posinf=0, neginf=0)
    #print('3te_x_input',te_x_input.shape)    
    from sklearn.preprocessing import StandardScaler
    scaler = StandardScaler()
    te_x_input = scaler.fit_transform(te_x_input)

    # 数据放缩处理（例如，将特征值缩放到 0 到 1 之间）
    from sklearn.preprocessing import MinMaxScaler
    min_max_scaler = MinMaxScaler(feature_range=(0, 1))
    te_x_input = min_max_scaler.fit_transform(te_x_input)
    te_x_input = torch.Tensor(te_x_input)
    #print('4te_x_input.shape',te_x_input.shape)
    mlp.load_state_dict(torch.load(f"models/models_{file}/model_{i+1}.pt")) 
    
    #print('39 te_x_input te_x_input ',te_x_input)
    
    #print("Length of te_prediction:", len(te_prediction))
    #print("Length of data_test:", len(data_test))
    #print("Length of data index:", len(data.index))
    #
    with torch.no_grad():    
        
       # print("47  te_x_input  te_x_input",te_x_input)
        
        te_prediction = mlp(te_x_input)  
        
        ############################################################
        te_prediction = te_prediction.numpy()
        data = pd.DataFrame(te_prediction)
        data.to_csv(f'output1/test_prediction{i+1}.csv', mode='a', index=False)
        ############################################################
        
        #print('te_y_true',len(te_y_true))
        #print('te_prediction',len(te_prediction))
        #print('te_y_true',te_y_true)
        #print('te_prediction',te_prediction)
        #print('检查 te_y_true 是否包含 NaN',np.isnan(te_y_true).any())  # 检查 te_y_true 是否包含 NaN
        #print('检查 te_y_true 是否包含无穷值',np.isinf(te_y_true).any())  # 检查 te_y_true 是否包含无穷值
        #print('检查 te_prediction 是否包含 NaN',np.isnan(te_prediction).any())  # 检查 te_prediction 是否包含 NaN
        #print('检查 te_prediction 是否包含无穷值',np.isinf(te_prediction).any())  # 检查 te_prediction 是否包含无穷值
        #
        auc = roc_auc_score(te_y_true, te_prediction)
        #print(auc)
        
        #print('54  te_predictionte_prediction',te_prediction)    
        #te_prediction = te_prediction.numpy()      
        data_test[f'te_prediction_{i+1}'] = te_prediction 
        
        auc = roc_auc_score(te_y_true, te_prediction)
        #print(auc)
        
        #data_test.to_csv(f'output1/te_prediction{i+1}.csv', mode='a', index=True)
        #print('te_prediction  ',te_prediction)
        
        auc = roc_auc_score(te_y_true, te_prediction)  
        print(f'AUC_{i+1}: {auc}')
        
        te_prediction_binary = np.where(te_prediction > 0.5, 1, 0)  
        acc = accuracy_score(te_y_true, te_prediction_binary)  
        print(f'Accuracy_{i+1}: {acc}')
        f1 = f1_score(te_y_true, te_prediction_binary)
        recall = recall_score(te_y_true, te_prediction_binary)
        mcc = matthews_corrcoef(te_y_true, te_prediction_binary)
        print(f'f1_{i+1}: {f1}')
        print(f'recall_{i+1}: {recall}')
        print(f'mcc_{i+1}: {mcc}')
        print('             ')
        print('             ')
        
        avg_auc += auc  
        avg_acc += acc  
        avg_f1 += f1  
        avg_recall += recall  
        avg_mcc += mcc
        
        results = results.append({'AUC': auc, 'Accuracy': acc, 'F1': f1, 'Recall': recall, 'MCC': mcc}, ignore_index=True) 
results.to_csv('测试集预测_results.csv', index=False)

avg_auc /= len(range(start_index, end_index)) 
avg_acc /= len(range(start_index, end_index)) 
avg_f1 /= len(range(start_index, end_index))  
avg_recall /= len(range(start_index, end_index))
avg_mcc /= len(range(start_index, end_index))

auc_te_values = results['AUC'].values
accuracy_te_values = results['Accuracy'].values
f1_te_values = results['F1'].values
recall_te_values = results['Recall'].values
mcc_te_values = results['MCC'].values

std_auc_te = np.std(auc_te_values)
std_acc_te = np.std(accuracy_te_values)
std_f1_te = np.std(f1_te_values)
std_recall_te = np.std(recall_te_values)
std_mcc_te = np.std(mcc_te_values)

print(f'Average AUC: {avg_auc:.6f}±{std_auc_te:.6f}')
print(f'Average Accuracy: {avg_acc:.6f}±{std_acc_te:.6f}')
print(f'Average F1: {avg_f1:.6f}±{std_f1_te:.6f}')
print(f'Average Recall: {avg_recall:.6f}±{std_recall_te:.6f}')
print(f'Average MCC: {avg_mcc:.6f}±{std_mcc_te:.6f}')

AUC_1: 0.9739350320949232
Accuracy_1: 0.9359605911330049
f1_1: 0.9319371727748691
recall_1: 0.9175257731958762
mcc_1: 0.8718836478204887
             
             
AUC_2: 0.9716999608303956
Accuracy_2: 0.916256157635468
f1_2: 0.9137055837563451
recall_2: 0.9782608695652174
mcc_2: 0.8399241354423864
             
             
AUC_3: 0.950197628458498
Accuracy_3: 0.8866995073891626
f1_3: 0.8756756756756756
recall_3: 0.9204545454545454
mcc_3: 0.7751445230514818
             
             
AUC_4: 0.9636363636363636
Accuracy_4: 0.8669950738916257
f1_4: 0.8615384615384616
recall_4: 0.9032258064516129
mcc_4: 0.7370004403209095
             
             
Average AUC: 0.964867±0.009295
Average Accuracy: 0.901478±0.026528
Average F1: 0.895714±0.028308
Average Recall: 0.929867±0.028691
Average MCC: 0.805988±0.052926
