In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from sklearn.ensemble import AdaBoostClassifier
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import SGD
from keras.optimizers import adam_v2
import keras_metrics
from keras import metrics
from sklearn.datasets import make_classification
import keras
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.preprocessing import label_binarize
from sklearn.metrics import roc_auc_score

In [2]:
df_train = pd.read_csv('train_preprocessed-nonPCA.csv',index_col=[0])
df_test = pd.read_csv('test_preprocessed-nonPCA.csv',index_col=[0])

In [3]:
X_train=df_train.iloc[:,:-1]
y_train=df_train.iloc[:,-1]
X_test=df_test.iloc[:,:-1]
y_test=df_test.iloc[:,-1]

# print the shape of the split dataset
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(40736, 118)
(40736,)
(10184, 118)
(10184,)


In [4]:
# 定义N值列表
N_values = [20, 50, 80, 100, 118]

# 初始化记录最佳AUROC和最佳N值的变量
best_auroc = 0
best_N = 0

# 遍历N值
for N in N_values:
    # 计算相关性并选择前N个特征
    correlation_matrix = df_train.corr()
    abs_target_correlation = correlation_matrix['aki'].abs()
    top_features = abs_target_correlation.drop('aki', axis=0).nlargest(N).index.tolist()
    top_features_indices = [X_train.columns.get_loc(col) for col in top_features]


    # 根据选定的特征更新训练和测试集
    X_train_selected = X_train.iloc[:, top_features_indices]
    X_test_selected = X_test.iloc[:, top_features_indices]

    # 训练逻辑回归模型
    model = AdaBoostClassifier(n_estimators=50, learning_rate=1, random_state=42)
    model.fit(X_train_selected, y_train)

    # 进行预测
    y_pred = model.predict(X_test_selected)

    # 计算并打印分类报告
    print(f"Classification report for N={N}:\n{classification_report(y_test, y_pred)}")

    # 计算AUROC
    y_pred_proba = model.predict_proba(X_test_selected)
    auroc = roc_auc_score(y_test, y_pred_proba, multi_class="ovr", average="macro")
    print(f"Multi-class AUROC (One-vs-Rest) for N={N}: {auroc}")

    # 更新最佳AUROC和N值
    if auroc > best_auroc:
        best_auroc = auroc
        best_N = N

# 打印最佳AUROC和对应的N值
print(f"Best AUROC: {best_auroc} with N={best_N}")

Classification report for N=20:
              precision    recall  f1-score   support

           0       0.50      0.67      0.58      3400
           1       0.36      0.00      0.00      2028
           2       0.42      0.59      0.49      3162
           3       0.48      0.34      0.40      1594

    accuracy                           0.46     10184
   macro avg       0.44      0.40      0.37     10184
weighted avg       0.44      0.46      0.41     10184

Multi-class AUROC (One-vs-Rest) for N=20: 0.6775263901859299
Classification report for N=50:
              precision    recall  f1-score   support

           0       0.51      0.68      0.58      3400
           1       0.14      0.00      0.00      2028
           2       0.41      0.57      0.48      3162
           3       0.47      0.37      0.42      1594

    accuracy                           0.46     10184
   macro avg       0.38      0.41      0.37     10184
weighted avg       0.40      0.46      0.41     10184

Multi