MEMO：Kaggle代碼為最終版本

In [None]:
"""
在地端執行時請註解此段程式
"""
!pip install scikit-learn==1.2.2
!pip install imbalanced-learn==0.10.1

In [None]:
# 導入庫
import pandas as pd
import numpy as np
import dask.dataframe as dd
import dask
import joblib
import gc
import os
import lightgbm as lgb
from sklearn.metrics import f1_score, classification_report, accuracy_score, roc_auc_score, average_precision_score, confusion_matrix, precision_recall_curve
from sklearn.model_selection import train_test_split, StratifiedKFold
from skopt import BayesSearchCV
from skopt.space import Real, Integer
from sklearn.metrics import make_scorer, f1_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

In [None]:
# 參數設置
RUN_ON_KAGGLE = True
EXPORT_DATASET_FOR_KAGGEL = False

# 正:負比例實驗
# TARGET_RATIO = 60 # 1:60，即不使用VAE時 F1為0.8333
TARGET_RATIO = 50 # 1:50時 F1為0.8379 👑快又有效的比例
# TARGET_RATIO = 25 # 1:25時 F1為0.8379
# TARGET_RATIO = 1 # 1:1時 F1為0.8379

INIT_MODEL = False

# for VAE
LATENT_DIM = 12000 # VAE 潛在空間維度；12000是原本的特徵欄位大略數目，後續會刷新為 上市加權指數XXX 的欄位數目
VAE_EPOCHS = 150 # VAE 訓練輪數
VAE_BATCH_SIZE = 16 # VAE 批次大小
RE_TRAIN_VAE = False # VAE_EPOCHS跟VAE_BATCH_SIZE有改變時，要改變這個設定以重新訓練新的VAE模型

In [None]:
"""
自家電腦僅作資料前處理，不做VAE模型訓練，故不import tf相關套件
"""
if RUN_ON_KAGGLE:
    # for VAE
    # pip install tensorflow
    import tensorflow as tf
    from tensorflow.keras.layers import Input, Dense, BatchNormalization, Lambda, Layer, Reshape, Flatten, ReLU, LeakyReLU
    from tensorflow.keras.layers import BatchNormalization
    from tensorflow.keras.models import Model, Sequential
    from tensorflow.keras import backend as K
    from tensorflow.keras.losses import mse

In [None]:
if not RUN_ON_KAGGLE:
    """
    使用缺失值填補+欠採樣完的資料集進行預處理
    """
    # 讀取 CSV
    print('載入訓練集以進行預處理...')
    X_train_ddf = dd.read_csv('./X_train_ddf.csv/*', sample=1048576, blocksize='128MB')
    y_train_ddf = dd.read_csv('./y_train_ddf.csv/*', sample=1048576, blocksize='128MB')

    print('載入測試集以進行預處理...')
    X_test_ddf = dd.read_csv('./X_test_ddf.csv/*', sample=1048576, blocksize='128MB')
    y_test_ddf = dd.read_csv('./y_test_ddf.csv/*', sample=1048576, blocksize='128MB')


In [None]:
if not RUN_ON_KAGGLE:
    print('精煉特徵...')
    feature_importance_df = pd.read_csv('feature_importance.csv')
    # 改用上市加權指數XXX，節省效能+保底F1 0.81
    mask = feature_importance_df['Feature'].str.startswith('上市加權指數', na=False)
    important_features = feature_importance_df[mask]['Feature'].tolist()
    print(f"保留的特徵數量 (基於前綴 '上市加權指數'): {len(important_features)}")
    print(f"保留的特徵 (基於前綴 '上市加權指數'):", important_features)

    # 刷新VAE潛在空間維度參數
    LATENT_DIM = len(important_features)

    X_train_ddf = X_train_ddf[important_features]
    X_test_ddf = X_test_ddf[important_features]

In [None]:
if (not RUN_ON_KAGGLE) and EXPORT_DATASET_FOR_KAGGEL :
    # 匯出到CSV
    print("匯出CSV以清空計算圖")
    X_test_ddf.to_csv('./X_test_ddf_kaggle.csv', index=False)
    # y_train_ddf.to_csv('./y_train_ddf_kaggle.csv', index=False) # Y本來就只有一欄，不用花時間重新匯出
    X_train_ddf.to_csv('./X_train_ddf_kaggle.csv', index=False)
    # y_test_ddf.to_csv('./y_test_ddf_kaggle.csv', index=False) # Y本來就只有一欄，不用花時間重新匯出
    print("所有資料已匯出到 CSV！")

In [None]:
"""
自家電腦僅作資料前處理，不做VAE模型訓練，故不需要Pandas df
"""
# if not RUN_ON_KAGGLE:
#     """
#     此時才正式將ddf正式轉成Pandas df
#     """
#     print("轉換訓練集格式為熊貓DF...")
#     # 初始化空的 DataFrame
#     X_train = pd.DataFrame()
#     y_train = pd.DataFrame()

#     # 分批合併
#     batch_size = 10  # 每次處理 10 個分割
#     for i in range(0, X_train_ddf.npartitions, batch_size):
#         print(f"處理批次 {i//batch_size + 1}")
#         # 一次計算多個分割
#         X_batch = X_train_ddf.partitions[i:min(i + batch_size, X_train_ddf.npartitions)].compute()
#         y_batch = y_train_ddf.partitions[i:min(i + batch_size, y_train_ddf.npartitions)].compute()
#         # 合併到結果
#         X_train = pd.concat([X_train, X_batch], axis=0, ignore_index=True)
#         y_train = pd.concat([y_train, y_batch], axis=0, ignore_index=True)
#         del X_batch, y_batch
#         gc.collect()

#     print("轉換測試集格式為熊貓DF...")
#     # 初始化空的 DataFrame
#     X_test = pd.DataFrame()
#     y_test = pd.DataFrame()

#     # 分批合併
#     batch_size = 10  # 每次處理 10 個分割
#     for i in range(0, X_test_ddf.npartitions, batch_size):
#         print(f"處理批次 {i//batch_size + 1}")
#         # 一次計算多個分割
#         X_batch = X_test_ddf.partitions[i:min(i + batch_size, X_test_ddf.npartitions)].compute()
#         y_batch = y_test_ddf.partitions[i:min(i + batch_size, y_test_ddf.npartitions)].compute()
#         # 合併到結果
#         X_test = pd.concat([X_test, X_batch], axis=0, ignore_index=True)
#         y_test = pd.concat([y_test, y_batch], axis=0, ignore_index=True)
#         del X_batch, y_batch
#         gc.collect()

In [None]:
"""
自家電腦僅作資料前處理，不做VAE模型訓練，提前結束程式
"""
import sys
if not RUN_ON_KAGGLE:
    sys.exit()

In [None]:
if RUN_ON_KAGGLE:
    """
    使用缺失值填補+欠採樣完+僅留上市加權指數XXX的資料集
    """
    # 讀取 CSV
    print("轉換訓練集格式為熊貓DF...")
    X_train_ddf = dd.read_csv('/kaggle/input/taiwan-stock/X_train_ddf_kaggle.csv/*', sample=1048576, blocksize='128MB') #TODO 應該改成Kaggle實際目錄
    y_train_ddf = dd.read_csv('/kaggle/input/taiwan-stock/y_train_ddf.csv/*', sample=1048576, blocksize='128MB')

    # 初始化空的 DataFrame
    X_train = pd.DataFrame()
    y_train = pd.DataFrame()

    # 分批合併
    batch_size = 10  # 每次處理 10 個分割
    for i in range(0, X_train_ddf.npartitions, batch_size):
        print(f"處理批次 {i//batch_size + 1}")
        # 一次計算多個分割
        X_batch = X_train_ddf.partitions[i:min(i + batch_size, X_train_ddf.npartitions)].compute()
        y_batch = y_train_ddf.partitions[i:min(i + batch_size, y_train_ddf.npartitions)].compute()
        # 合併到結果
        X_train = pd.concat([X_train, X_batch], axis=0, ignore_index=True)
        y_train = pd.concat([y_train, y_batch], axis=0, ignore_index=True)
        del X_batch, y_batch
        gc.collect()

    # 讀取 CSV
    print("轉換測試集格式為熊貓DF...")
    X_test_ddf = dd.read_csv('/kaggle/input/taiwan-stock/X_test_ddf_kaggle.csv/*', sample=1048576, blocksize='128MB')
    y_test_ddf = dd.read_csv('/kaggle/input/taiwan-stock/y_test_ddf.csv/*', sample=1048576, blocksize='128MB')

    # 初始化空的 DataFrame
    X_test = pd.DataFrame()
    y_test = pd.DataFrame()

    # 分批合併
    batch_size = 10  # 每次處理 10 個分割
    for i in range(0, X_test_ddf.npartitions, batch_size):
        print(f"處理批次 {i//batch_size + 1}")
        # 一次計算多個分割
        X_batch = X_test_ddf.partitions[i:min(i + batch_size, X_test_ddf.npartitions)].compute()
        y_batch = y_test_ddf.partitions[i:min(i + batch_size, y_test_ddf.npartitions)].compute()
        # 合併到結果
        X_test = pd.concat([X_test, X_batch], axis=0, ignore_index=True)
        y_test = pd.concat([y_test, y_batch], axis=0, ignore_index=True)
        del X_batch, y_batch
        gc.collect()

In [None]:
# 計算正負樣本權重
neg_count_result = (y_train == 0).sum() # 要留意Pandas df跟Dask dd計算行為有些許不同
pos_count_result = (y_train == 1).sum()

# 打印類型以調試
print(f"Type of neg_count_result: {type(neg_count_result)}")
print(f"Value of neg_count_result: {neg_count_result}")
print(f"Type of pos_count_result: {type(pos_count_result)}")
print(f"Value of pos_count_result: {pos_count_result}")

# 從可能的 Series 中提取純量
neg_count = neg_count_result.item() if isinstance(neg_count_result, pd.Series) else neg_count_result
pos_count = pos_count_result.item() if isinstance(pos_count_result, pd.Series) else pos_count_result

scale_pos_weight = neg_count / pos_count if pos_count > 0 else 1

print(f"Negative count: {neg_count}")
print(f"Positive count: {pos_count}")
print(f"Scale positive weight: {scale_pos_weight}")

In [None]:
"""
此塊for匯入.keras模型
"""
# KL Divergence Loss Layer
class KLDivergenceLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(KLDivergenceLayer, self).__init__(**kwargs)

    def call(self, inputs_for_kl): # inputs_for_kl 應該是 [z_mean, z_log_var]
        z_mean_kl, z_log_var_kl = inputs_for_kl
        kl_batch = 1 + z_log_var_kl - tf.square(z_mean_kl) - tf.exp(z_log_var_kl)
        kl_batch = tf.reduce_sum(kl_batch, axis=-1) * -0.5
        self.add_loss(tf.reduce_mean(kl_batch)) # 添加 KL 損失的均值到層的損失
        return inputs_for_kl # 返回原始輸入，不改變數據流

# Sampling Layer (輸入仍然是原始的 z_mean 和 z_log_var)
def sampling(args):
    z_mean_s, z_log_var_s = args
    batch = tf.shape(z_mean_s)[0]
    dim = tf.shape(z_mean_s)[1]
    epsilon = tf.random.normal(shape=(batch, dim), mean=0., stddev=1.0)
    return z_mean_s + tf.exp(0.5 * z_log_var_s) * epsilon

# 損失函數現在只需要計算重構損失
def reconstruction_loss_fn(y_true, y_pred):
    return tf.reduce_mean(tf.reduce_sum(tf.square(y_true - y_pred), axis=-1))

In [None]:
"""
VAE過採樣至指定比例
"""
print("\n================ VAE 模型訓練 ================")
#TODO 要改成Kaggle路徑 注意要上傳到input
vae_model_path = 'my_vae.keras'
generator_model_path = 'my_vae_generator.keras'
X_train_minority = X_train[y_train.values == 1] # 這裡決定生成目標

if os.path.exists('/kaggle/input/stock-vae/tensorflow2/default/1/my_vae.keras') and os.path.exists('/kaggle/input/stock-vae/tensorflow2/default/1/my_vae_generator.keras') and RE_TRAIN_VAE != True: # 如果目錄下已經存在VAE模型，代表已訓練完成，無須重新訓練
    print("\n正在載入模型...")
    custom_objects_dict = {
        'sampling': sampling,
        'reconstruction_loss_fn': reconstruction_loss_fn
    }
    vae = tf.keras.models.load_model('/kaggle/input/stock-vae/tensorflow2/default/1/my_vae.keras', custom_objects=custom_objects_dict)
    print("VAE 模型已載入。")
    vae.summary()

    generator = tf.keras.models.load_model('/kaggle/input/stock-vae/tensorflow2/default/1/my_vae_generator.keras')
    print("Generator 模型已載入。")
    generator.summary()
else:
    print(f"用於訓練 VAE 的少數類別樣本數量: {len(X_train_minority)}")
    original_dim = X_train_minority.shape[1]

    # VAE Encoder
    inputs_vae = Input(shape=(original_dim,), name='vae_input')
    h_vae = Dense(int(original_dim * 2.0), activation='relu')(inputs_vae)
    h_vae = BatchNormalization()(h_vae)
    h_vae = Dense(int(original_dim * 1.25), activation='relu')(h_vae)
    z_mean_tensor = Dense(LATENT_DIM, name='z_mean_encoder_output')(h_vae)
    z_log_var_tensor = Dense(LATENT_DIM, name='z_log_var_encoder_output')(h_vae)

    # KL Divergence Loss Layer
    class KLDivergenceLayer(tf.keras.layers.Layer):
        def __init__(self, **kwargs):
            super(KLDivergenceLayer, self).__init__(**kwargs)

        def call(self, inputs_for_kl): # inputs_for_kl 應該是 [z_mean, z_log_var]
            z_mean_kl, z_log_var_kl = inputs_for_kl
            kl_batch = 1 + z_log_var_kl - tf.square(z_mean_kl) - tf.exp(z_log_var_kl)
            kl_batch = tf.reduce_sum(kl_batch, axis=-1) * -0.5
            self.add_loss(tf.reduce_mean(kl_batch)) # 添加 KL 損失的均值到層的損失
            return inputs_for_kl # 返回原始輸入，不改變數據流

    # 將 KL 散度計算層應用於 encoder 的輸出
    # 這個層的 call 方法會執行 self.add_loss()
    # _ (下劃線) 表示我們不直接使用這個層的輸出進行後續計算（因為它返回輸入）
    # 但重要的是它被執行了。
    _ = KLDivergenceLayer(name='kl_divergence_adder')([z_mean_tensor, z_log_var_tensor])


    # Sampling Layer (輸入仍然是原始的 z_mean 和 z_log_var)
    def sampling(args):
        z_mean_s, z_log_var_s = args
        batch = tf.shape(z_mean_s)[0]
        dim = tf.shape(z_mean_s)[1]
        epsilon = tf.random.normal(shape=(batch, dim), mean=0., stddev=1.0)
        return z_mean_s + tf.exp(0.5 * z_log_var_s) * epsilon

    z_sampled_tensor = Lambda(sampling, output_shape=(LATENT_DIM,), name='z_sampling_lambda')([z_mean_tensor, z_log_var_tensor])

    # VAE Decoder
    decoder_h_layer1 = Dense(int(original_dim * 1.0), activation='relu', name='decoder_h1')
    decoder_h_layer2 = Dense(int(original_dim * 1.5), activation='relu', name='decoder_h2')
    decoder_mean_layer = Dense(original_dim, activation=None, name='reconstruction_output_layer') # 命名輸出層

    h_decoded_vae = decoder_h_layer1(z_sampled_tensor)
    h_decoded_vae = decoder_h_layer2(h_decoded_vae)
    x_reconstructed_tensor_out = decoder_mean_layer(h_decoded_vae)

    # VAE 模型現在只輸出重構的 x
    vae = Model(inputs_vae, x_reconstructed_tensor_out, name='vae_with_internal_kl_loss')

    # 損失函數現在只需要計算重構損失
    def reconstruction_loss_fn(y_true, y_pred):
        return tf.reduce_mean(tf.reduce_sum(tf.square(y_true - y_pred), axis=-1))

    # 編譯模型
    # KL 散度是通過 KLDivergenceLayer 內部調用 self.add_loss() 添加的
    # Keras 在訓練時會自動收集所有通過 add_loss（層級別）添加的損失，
    # 並將它們與 compile 中指定的損失相加。
    vae.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                loss=reconstruction_loss_fn)
    vae.summary()

    print("\n訓練 VAE...")
    # KL 散度作為一個額外的損失項被加到總損失中
    history = vae.fit(X_train_minority, X_train_minority,
                    epochs=VAE_EPOCHS,
                    batch_size=VAE_BATCH_SIZE,
                    shuffle=True,
                    verbose=1)

    if history and history.history and 'loss' in history.history:
        plt.figure(figsize=(8, 4))
        plt.plot(history.history['loss'])
        plt.title('VAE Model Total Loss (Reconstruction + KL)')
        plt.ylabel('Loss')
        plt.xlabel('Epoch')
        plt.show()

    # 創建獨立的 Decoder/Generator 模型
    generator_input_tensor = Input(shape=(LATENT_DIM,), name='standalone_generator_input')
    _h_decoded_gen = decoder_h_layer1(generator_input_tensor)
    _h_decoded_gen = decoder_h_layer2(_h_decoded_gen)
    _x_decoded_mean_gen = decoder_mean_layer(_h_decoded_gen) # 使用相同的輸出層實例
    generator = Model(generator_input_tensor, _x_decoded_mean_gen, name='standalone_generator')

    # 匯出模型
    print(f"\n正在匯出 VAE 模型到 {vae_model_path}...")
    vae.save(vae_model_path)
    print("VAE 模型已匯出。")

    print(f"\n正在匯出 Generator 模型到 {generator_model_path}...")
    generator.save(generator_model_path)
    print("Generator 模型已匯出。")

In [None]:
print("\n================ VAE 生成數據 ================")
# 根據TARGET_RATIO計算是否需要生成樣本+要生成多少樣本
if hasattr(y_train, 'values'):
    y_values = y_train.values.ravel() # ravel()確保它是一維的
else:
    y_values = np.asarray(y_train).ravel()

# 使用np.sum()，它通常會返回一個純量（Python int或NumPy int）
num_majority = np.sum(y_values == 0)
num_minority_original = np.sum(y_values == 1)
print(f'num_majority: {num_majority}, num_minority_original: {num_minority_original}')

target_num_minority = int(round(num_majority / TARGET_RATIO))
print(f'target_num_minority: {target_num_minority}')

num_to_generate = target_num_minority - num_minority_original
num_to_generate = max(0, num_to_generate)
print(f'num_to_generate: {num_to_generate}')

if len(X_train_minority) == 0:
    print("錯誤：訓練集中沒有少數類別樣本，無法訓練 VAE。")
elif TARGET_RATIO == 60:
    print("提示：維持訓練集原有1:60比例，將不訓練VAE模型。")
else:
    if num_to_generate > 0:
        print(f"需要生成 {num_to_generate} 個新的少數類別樣本以平衡訓練集。")
        random_latent_vectors = np.random.normal(size=(num_to_generate, LATENT_DIM))
        generated_samples = generator.predict(random_latent_vectors)

        X_train_vae_augmented = np.vstack([X_train, generated_samples])
        y_train_numpy = y_train.values if isinstance(y_train, pd.Series) else y_train.values
        
        # 檢查 y_train_numpy 是否為 2D 且只有一列，如果是，則展平 (flatten) 它
        if y_train_numpy.ndim == 2 and y_train_numpy.shape[1] == 1:
            y_train_numpy = y_train_numpy.flatten() # 或者 y_train_numpy.ravel()
        
        # 現在 y_train_numpy 應該是 1D 的
        # np.ones(num_to_generate, dtype=int) 本來就是 1D 的
        y_train_vae_augmented = np.hstack([y_train_numpy, np.ones(num_to_generate, dtype=int)])
        
        shuffle_indices = np.random.permutation(len(X_train_vae_augmented))
        """
        替換X_train與y_train為資料擴增後的版本
        """
        X_train_augmented_shuffled_np = X_train_vae_augmented[shuffle_indices]
        y_train_augmented_shuffled_np = y_train_vae_augmented[shuffle_indices]

        # 將NumPy轉換回Pandas DF
        X_train = pd.DataFrame(X_train_augmented_shuffled_np,
                                        columns=X_train.columns)
        
        y_train = pd.DataFrame(y_train_augmented_shuffled_np,
                                         columns=y_train.columns)
        
        print(f"VAE 增強後訓練集大小: {X_train_vae_augmented.shape}")
        print(f"VAE 增強後訓練集目標分佈:\n{pd.Series(y_train_vae_augmented).value_counts(normalize=True)}")
    else:
        print("訓練集已平衡或少數類別更多，無需使用 VAE 生成樣本。")
        print("VAE 模型未用於增強數據，將跳過 VAE 模型的獨立評估。")

In [None]:
final_model = lgb.LGBMClassifier(
    # scale_pos_weight=TARGET_RATIO, # 目前情境不用此參數F1反而不錯...
    learning_rate=0.1,
    max_depth=20,
    min_child_samples=300, 
    n_estimators=550, 
    lambda_l1=0.7,
    lambda_l2=0.3, 
    feature_fraction=1,
    bagging_fraction=1,
    bagging_freq=1,
    random_state=42,
    verbose=-1,
    colsample_bytree=0.9,
    subsample=0.86,
    n_jobs=-1
)

# 訓練新模型（啟用提前停止，記錄 Logloss 曲線）
final_model.fit(
    X_train,
    y_train,
    eval_set=[(X_train, y_train), (X_test, y_test)],
    eval_metric=['binary_logloss', 'auc'],
    eval_names=['train', 'test'],
    callbacks=[lgb.early_stopping(stopping_rounds=50, verbose=True)]  # 提前停止：50 輪內無提升則停止
)

# 特徵重要性
feature_names = X_train.columns.tolist()
importance = final_model.feature_importances_
feature_df = pd.DataFrame({'Feature': feature_names, 'Importance': importance})
feature_df = feature_df.sort_values(by='Importance', ascending=False)
new_feature_importance_file = 'feature_importance.csv' if INIT_MODEL else 'feature_importance_final_model_report.csv'
feature_df.to_csv(new_feature_importance_file, index=False)
print("特徵重要性已儲存")

# 繪製 Logloss 曲線
plt.figure(figsize=(10, 6))
plt.plot(final_model.evals_result_['train']['binary_logloss'], label='Train Logloss', color='blue')
plt.plot(final_model.evals_result_['test']['binary_logloss'], label='Test Logloss', color='orange')
plt.xlabel('Boosting Rounds')
plt.ylabel('Logloss')
plt.title('Training and Test Logloss Curve')
plt.legend()
plt.grid(True)
plt.savefig('logloss_curve.png')
plt.show()
print("Logloss 曲線已繪製並儲存為 'logloss_curve.png'")

In [None]:
# 預測機率
from sklearn.metrics import auc, confusion_matrix, precision_recall_curve, roc_curve


print("開始預測...")
model = final_model
y_pred_proba = model.predict_proba(X_test)[:, 1]  # 正類機率 (二分類)

# 閾值優化
precisions, recalls, thresholds = precision_recall_curve(y_test, y_pred_proba)
f1_scores = 2 * (precisions * recalls) / (precisions + recalls + 1e-10)  # 計算F1分數
optimal_idx = np.argmax(f1_scores)  # 找到最佳F1分數的索引
optimal_threshold = thresholds[optimal_idx]  # 對應的最佳閾值
y_pred_optimal = (y_pred_proba >= optimal_threshold).astype(int)  # 使用最佳閾值預測

# 顯示結果
best_f1 = f1_score(y_test, y_pred_optimal)
print(f"最佳閾值: {optimal_threshold:.4f}")
print(f"最佳 F1 分數: {best_f1:.4f}")
print("\n最佳分類報告:")
print(classification_report(y_test, y_pred_optimal))
print("最佳閾值下混淆矩陣:\n", confusion_matrix(y_test, y_pred_optimal))

# 儲存最佳閾值
with open('best_threshold.txt', 'w') as f:
    f.write(str(optimal_threshold))
print(f"最佳閾值已儲存至 'best_threshold.txt'")

# 繪製 ROC 曲線（作為最終評估）
fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
roc_auc = auc(fpr, tpr)
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {roc_auc:.2f})', color='blue')
plt.plot([0, 1], [0, 1], 'r--', label='Random Guess')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend()
plt.grid(True)
plt.savefig('roc_curve.png')
plt.show()
print("ROC 曲線已繪製並儲存為 roc_curve.png")