In [None]:
import pandas as pd
import numpy as np
from sklearn.svm import SVR
from sklearn.model_selection import KFold, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.inspection import permutation_importance
import matplotlib.pyplot as plt
import os
from sklearn.model_selection import GridSearchCV

# --- 配置部分 ---
data_path = r"C:\Users\Michael Wang\OneDrive\小论文\毕业论文改写\WGAN-GP\建模_数据预处理\data\development_set_selected_features.xlsx"
target_column_name = 'Rowing distance'
output_plot_path = r"C:\Users\Michael Wang\OneDrive\小论文\毕业论文改写\WGAN-GP\插图"

# 创建导出路径 (如果不存在)
os.makedirs(output_plot_path, exist_ok=True)

# --- 数据加载 ---
original_data = pd.read_excel(data_path)
X_original = original_data.drop(columns=[target_column_name])
y_original = original_data[target_column_name]

# --- SVR 超参数调优 ---
# SVR对特征缩放敏感，因此我们将缩放器包含在Pipeline中进行调优
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svr', SVR())
])

# 准备 RandomizedSearchCV 的参数网格
# 注意：参数名称需要加上模型前缀，例如 'svr__C'
param_grid_svr = {
    'svr__kernel': ['rbf', 'linear', 'poly'],
    'svr__C': [0.1, 1, 10, 100, 500], # SVR的正则化参数
    'svr__gamma': ['scale', 'auto', 0.001, 0.01, 0.1], #核系数，主要用于'rbf'和'poly'
    'svr__epsilon': [0.01, 0.1, 0.2, 0.5], # SVR损失函数中的epsilon
    'svr__degree': [2, 3, 4] # 多项式核的度数，仅当kernel='poly'时使用
}

print("开始SVR超参数调优 (RandomizedSearchCV)...")
# 在原始数据集上执行 Randomized Search (5折交叉验证) 寻找最佳参数
random_search_svr = RandomizedSearchCV(
    estimator=pipeline,
    param_distributions=param_grid_svr,
    n_iter=50,  # 可以根据计算资源调整迭代次数
    cv=5,
    scoring='neg_mean_absolute_error', # 使用负MAE作为评分标准
    verbose=2,
    random_state=42,
    n_jobs=-1 # 使用所有可用CPU核心
)
random_search_svr.fit(X_original, y_original)

best_params_svr_pipeline = random_search_svr.best_params_
print("最佳参数 (SVR Pipeline):", best_params_svr_pipeline)
print(f"最佳交叉验证得分 ({random_search_svr.scoring}): {random_search_svr.best_score_}")

# 提取SVR的最佳参数，去除前缀 'svr__'
best_params_svr = {key.split('__')[1]: value for key, value in best_params_svr_pipeline.items() if key.startswith('svr__')}
print("提取的最佳SVR参数:", best_params_svr)


# --- K折交叉验证评估 ---
kf = KFold(n_splits=5, shuffle=True, random_state=42)
results_svr_metrics = []
fold_train_maes = [] # 每折的训练MAE
fold_test_maes = []  # 每折的测试MAE

print("\n使用最佳参数进行K折交叉验证评估 (SVR):")
for fold, (train_index, test_index) in enumerate(kf.split(X_original), 1):
    X_train, X_test = X_original.iloc[train_index], X_original.iloc[test_index]
    y_train, y_test = y_original.iloc[train_index], y_original.iloc[test_index]

    # 1. 特征缩放
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # 2. 使用最佳参数初始化 SVR 模型
    svr_model_fold = SVR(**best_params_svr)
    svr_model_fold.fit(X_train_scaled, y_train)

    # 3. 预测
    y_pred_test = svr_model_fold.predict(X_test_scaled)
    y_pred_train = svr_model_fold.predict(X_train_scaled)

    # 4. 评估 (在原始尺度上)
    mae_test = mean_absolute_error(y_test, y_pred_test)
    mse_test = mean_squared_error(y_test, y_pred_test)
    rmse_test = np.sqrt(mse_test)
    r2_test = r2_score(y_test, y_pred_test)

    mae_train = mean_absolute_error(y_train, y_pred_train)
    mse_train = mean_squared_error(y_train, y_pred_train)
    rmse_train = np.sqrt(mse_train)
    r2_train = r2_score(y_train, y_pred_train)
    
    fold_train_maes.append(mae_train)
    fold_test_maes.append(mae_test)

    print(f"Fold {fold} Results: Train MAE: {mae_train:.4f}, Train R2: {r2_train:.4f} | Test MAE: {mae_test:.4f}, Test R2: {r2_test:.4f}")
    results_svr_metrics.append((mae_test, mse_test, rmse_test, r2_test, mae_train, mse_train, rmse_train, r2_train))

# --- 计算平均性能 ---
avg_mae_test_svr = np.mean([res[0] for res in results_svr_metrics])
avg_mse_test_svr = np.mean([res[1] for res in results_svr_metrics])
avg_rmse_test_svr = np.mean([res[2] for res in results_svr_metrics])
avg_r2_test_svr = np.mean([res[3] for res in results_svr_metrics])

avg_mae_train_svr = np.mean([res[4] for res in results_svr_metrics])
avg_mse_train_svr = np.mean([res[5] for res in results_svr_metrics])
avg_rmse_train_svr = np.mean([res[6] for res in results_svr_metrics])
avg_r2_train_svr = np.mean([res[7] for res in results_svr_metrics])

print("\nSVR - Average Test Performance:")
print(f"  MAE: {avg_mae_test_svr:.4f}")
print(f"  MSE: {avg_mse_test_svr:.4f}")
print(f"  RMSE: {avg_rmse_test_svr:.4f}")
print(f"  R2 Score: {avg_r2_test_svr:.4f}")

print("\nSVR - Average Train Performance:")
print(f"  MAE: {avg_mae_train_svr:.4f}")
print(f"  MSE: {avg_mse_train_svr:.4f}")
print(f"  RMSE: {avg_rmse_train_svr:.4f}")
print(f"  R2 Score: {avg_r2_train_svr:.4f}")


# --- 绘制图表并导出 ---

# 1. 绘制评估指标图 (MAE 和 R2)
metrics_plot_names_en = ['MAE', 'R2 Score']
values_test_plot_svr = [avg_mae_test_svr, avg_r2_test_svr]
values_train_plot_svr = [avg_mae_train_svr, avg_r2_train_svr]

x_axis_plot = np.arange(len(metrics_plot_names_en))
plt.figure(figsize=(10, 6))
plt.bar(x_axis_plot - 0.2, values_train_plot_svr, width=0.4, label='Train', align='center', color='skyblue')
plt.bar(x_axis_plot + 0.2, values_test_plot_svr, width=0.4, label='Test', align='center', color='salmon')
plt.xticks(x_axis_plot, metrics_plot_names_en)
plt.ylabel('Score')
plt.title('SVR: Average Train vs. Test Set Evaluation Metrics')
plt.legend()
plt.grid(True, linestyle='--', alpha=0.7)
plot_filename_metrics_svr = os.path.join(output_plot_path, "svr_average_evaluation_metrics.png")
plt.savefig(plot_filename_metrics_svr, dpi=300, bbox_inches='tight')
plt.show()


# 2. 绘制每折的训练和测试损失 (MAE)
# 注意: SVR不像XGBoost那样有迭代式的训练过程来展示每轮的损失下降。
# 这里我们展示的是在K折交叉验证中，每一折最终的训练MAE和测试MAE。
plt.figure(figsize=(12, 6))
folds = range(1, kf.get_n_splits() + 1)
plt.plot(folds, fold_train_maes, marker='o', linestyle='-', label='Train MAE per Fold', color='dodgerblue')
plt.plot(folds, fold_test_maes, marker='x', linestyle='--', label='Test MAE per Fold', color='orangered')
plt.xlabel('Fold Number')
plt.ylabel('Mean Absolute Error (MAE)')
plt.title('SVR: Training and Testing MAE Across Folds')
plt.xticks(folds)
plt.legend()
plt.grid(True, linestyle='--', alpha=0.7)
plot_filename_loss_svr = os.path.join(output_plot_path, "svr_mae_across_folds.png")
plt.savefig(plot_filename_loss_svr, dpi=300, bbox_inches='tight')
plt.show()


# --- 过拟合检查 ---
r2_diff_svr = avg_r2_train_svr - avg_r2_test_svr
print("\nSVR Overfitting Check:")
if r2_diff_svr > 0.1 and avg_r2_train_svr > avg_r2_test_svr:
    print(f"Warning: SVR Model might be overfitting! Train R2 ({avg_r2_train_svr:.2f}) is significantly higher than Test R2 ({avg_r2_test_svr:.2f}). Difference: {r2_diff_svr:.2f}")
elif avg_mae_test_svr > 0 and avg_mae_train_svr > 0 and (avg_mae_test_svr - avg_mae_train_svr) / avg_mae_train_svr > 0.20 :
     print(f"Warning: SVR Model might be overfitting! Test MAE ({avg_mae_test_svr:.2f}) is >20% higher than Train MAE ({avg_mae_train_svr:.2f}). Relative difference: {((avg_mae_test_svr - avg_mae_train_svr) / avg_mae_train_svr)*100:.2f}%.")
else:
    print(f"No strong signs of SVR overfitting detected based on current thresholds. R2 Diff (Train-Test): {r2_diff_svr:.2f}. MAE Train: {avg_mae_train_svr:.2f}, MAE Test: {avg_mae_test_svr:.2f}.")


# --- 特征重要性分析 (使用排列重要性) ---
print("\nSVR Feature Importance Analysis (Permutation Importance):")
# 训练最终模型在整个数据集上 (先缩放)
final_scaler = StandardScaler()
X_original_scaled = final_scaler.fit_transform(X_original)
final_svr_model = SVR(**best_params_svr)
final_svr_model.fit(X_original_scaled, y_original)

# 计算排列重要性
# 注意：排列重要性可能需要一些时间，特别是对于较大的n_repeats
# scoring可以设为 'neg_mean_absolute_error', 'r2'等
perm_importance = permutation_importance(
    final_svr_model, X_original_scaled, y_original,
    n_repeats=10,
    random_state=42,
    scoring='neg_mean_absolute_error', # 或者 'r2'
    n_jobs=-1
)

# 整理重要性结果
sorted_idx = perm_importance.importances_mean.argsort()[::-1] # 获取降序排列的索引
importance_df_svr = pd.DataFrame({
    'Feature': X_original.columns[sorted_idx],
    'Importance': perm_importance.importances_mean[sorted_idx]
})

print("SVR Feature Importances (High to Low, based on Permutation Importance):")
print(importance_df_svr.head(min(20, len(X_original.columns))))

# 绘制特征重要性图
num_features_to_plot = min(20, len(X_original.columns))
plot_height = max(6, num_features_to_plot * 0.4)
plt.figure(figsize=(10, plot_height))
plt.barh(importance_df_svr['Feature'][:num_features_to_plot], importance_df_svr['Importance'][:num_features_to_plot], color='mediumseagreen')
plt.xlabel('Permutation Importance (decrease in MAE)') # 如果scoring是neg_mean_absolute_error，正值表示重要
plt.ylabel('Feature')
plt.title(f'SVR: Top {num_features_to_plot} Feature Importances (Permutation)')
plt.gca().invert_yaxis()
plt.tight_layout()
plt.grid(True, axis='x', linestyle='--', alpha=0.7)
plot_filename_importance_svr = os.path.join(output_plot_path, "svr_feature_importances_permutation.png")
plt.savefig(plot_filename_importance_svr, dpi=300, bbox_inches='tight')
plt.show()

print(f"\n所有SVR图表已尝试保存至: {output_plot_path}")

# 检查最佳核函数，如果为线性，可以额外展示系数
if best_params_svr.get('kernel') == 'linear':
    try:
        # final_svr_model 必须是用线性核训练的
        if hasattr(final_svr_model, 'coef_'):
            linear_coeffs = final_svr_model.coef_[0] # SVR的coef_是2D的
            coeff_df = pd.DataFrame({
                'Feature': X_original.columns,
                'Coefficient': linear_coeffs,
                'Absolute Coefficient': np.abs(linear_coeffs)
            }).sort_values(by='Absolute Coefficient', ascending=False)

            print("\nSVR Linear Kernel Coefficients (Feature Importance):")
            print(coeff_df.head(min(20, len(X_original.columns))))

            plt.figure(figsize=(10, plot_height))
            plt.barh(coeff_df['Feature'][:num_features_to_plot], coeff_df['Absolute Coefficient'][:num_features_to_plot], color='lightcoral')
            plt.xlabel('Absolute Coefficient Value')
            plt.ylabel('Feature')
            plt.title(f'SVR: Top {num_features_to_plot} Feature Importances (Linear Kernel Coefficients)')
            plt.gca().invert_yaxis()
            plt.tight_layout()
            plt.grid(True, axis='x', linestyle='--', alpha=0.7)
            plot_filename_coeffs_svr = os.path.join(output_plot_path, "svr_linear_coefficients_importance.png")
            plt.savefig(plot_filename_coeffs_svr, dpi=300, bbox_inches='tight')
            plt.show()
            print(f"SVR线性核系数重要性图已保存至: {plot_filename_coeffs_svr}")
        else:
            print("\n最终模型不是线性核，或未正确训练，无法显示系数。")
    except Exception as e:
        print(f"\n尝试显示线性核系数时出错: {e}")

In [None]:
# 基于原始数据的SVR模型 (已添加最终测试集评估)
import pandas as pd
import numpy as np
from sklearn.svm import SVR
from sklearn.model_selection import KFold, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.inspection import permutation_importance
import matplotlib.pyplot as plt
import os

# --- 配置部分 ---
# 开发集路径 (80%的数据)
development_data_path = r"C:\Users\Michael Wang\OneDrive\小论文\毕业论文改写\WGAN-GP\建模_数据预处理\data\development_set_selected_features.xlsx"
# 最终测试集路径 (20%的数据)
final_test_data_path = r"C:\Users\Michael Wang\OneDrive\小论文\毕业论文改写\WGAN-GP\建模_数据预处理\data\final_test_set_selected_features.xlsx"

target_column_name = 'Rowing distance'
output_plot_path = r"C:\Users\Michael Wang\OneDrive\小论文\毕业论文改写\WGAN-GP\插图"
os.makedirs(output_plot_path, exist_ok=True)

# --- 数据加载 ---
# 只加载开发集进行模型训练和调优
original_data = pd.read_excel(development_data_path)
X_original = original_data.drop(columns=[target_column_name])
y_original = original_data[target_column_name]
print(f"开发集数据加载成功，形状: {original_data.shape}")

# --- SVR 超参数调优 (在开发集上) ---
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svr', SVR())
])

param_grid_svr = {
    'svr__kernel': ['rbf', 'linear', 'poly'],
    'svr__C': [0.1, 1, 10, 100, 500],
    'svr__gamma': ['scale', 'auto', 0.001, 0.01, 0.1],
    'svr__epsilon': [0.01, 0.1, 0.2, 0.5],
    'svr__degree': [2, 3, 4]
}

print("\n开始SVR超参数调优 (RandomizedSearchCV)...")
random_search_svr = RandomizedSearchCV(
    estimator=pipeline,
    param_distributions=param_grid_svr,
    n_iter=50,
    cv=5,
    scoring='neg_mean_absolute_error',
    verbose=1,
    random_state=42,
    n_jobs=-1
)
random_search_svr.fit(X_original, y_original)

best_params_svr_pipeline = random_search_svr.best_params_
print("最佳参数 (SVR Pipeline):", best_params_svr_pipeline)
best_params_svr = {key.split('__')[1]: value for key, value in best_params_svr_pipeline.items() if key.startswith('svr__')}
print("提取的最佳SVR参数:", best_params_svr)


# --- K折交叉验证评估 (在开发集上) ---
kf = KFold(n_splits=5, shuffle=True, random_state=42)
results_svr_metrics = []

print("\n使用最佳参数进行K折交叉验证评估 (在开发集内部):")
for fold, (train_index, test_index) in enumerate(kf.split(X_original), 1):
    X_train, X_test = X_original.iloc[train_index], X_original.iloc[test_index]
    y_train, y_test = y_original.iloc[train_index], y_original.iloc[test_index]

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    svr_model_fold = SVR(**best_params_svr)
    svr_model_fold.fit(X_train_scaled, y_train)

    y_pred_test = svr_model_fold.predict(X_test_scaled)
    y_pred_train = svr_model_fold.predict(X_train_scaled)

    mae_test = mean_absolute_error(y_test, y_pred_test)
    rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))
    r2_test = r2_score(y_test, y_pred_test)

    mae_train = mean_absolute_error(y_train, y_pred_train)
    rmse_train = np.sqrt(mean_squared_error(y_train, y_pred_train))
    r2_train = r2_score(y_train, y_pred_train)
    
    print(f"Fold {fold} Results: CV-Train MAE: {mae_train:.4f}, R2: {r2_train:.4f} | CV-Validation MAE: {mae_test:.4f}, R2: {r2_test:.4f}")
    results_svr_metrics.append({'mae_val': mae_test, 'rmse_val': rmse_test, 'r2_val': r2_test, 'mae_train': mae_train, 'rmse_train': rmse_train, 'r2_train': r2_train})

# --- 计算交叉验证的平均性能 ---
avg_mae_cv_val = np.mean([res['mae_val'] for res in results_svr_metrics])
avg_rmse_cv_val = np.mean([res['rmse_val'] for res in results_svr_metrics])
avg_r2_cv_val = np.mean([res['r2_val'] for res in results_svr_metrics])
avg_mae_cv_train = np.mean([res['mae_train'] for res in results_svr_metrics])
avg_rmse_cv_train = np.mean([res['rmse_train'] for res in results_svr_metrics])
avg_r2_cv_train = np.mean([res['r2_train'] for res in results_svr_metrics])

print("\nSVR - Average CV Validation Performance (on Development Set):")
print(f"  MAE: {avg_mae_cv_val:.4f}")
print(f"  RMSE: {avg_rmse_cv_val:.4f}")
print(f"  R2 Score: {avg_r2_cv_val:.4f}")

print("\nSVR - Average CV Train Performance (on Development Set):")
print(f"  MAE: {avg_mae_cv_train:.4f}")
print(f"  RMSE: {avg_rmse_cv_train:.4f}")
print(f"  R2 Score: {avg_r2_cv_train:.4f}")

# --- 训练最终的基准SVR模型 (在整个开发集上) ---
print("\n--- 训练最终基准SVR模型 (在整个80%开发集上) ---")
# 我们需要一个最终的scaler和最终的model
final_scaler = StandardScaler()
X_original_scaled = final_scaler.fit_transform(X_original)

final_svr_model = SVR(**best_params_svr)
final_svr_model.fit(X_original_scaled, y_original)
print("最终基准SVR模型训练完成。")


# ==============================================================================
# ======================== 新增的最终评估代码块开始 ========================
# ==============================================================================

print("\n--- 最终无偏评估 (在20%最终留出测试集上) ---")

# --- 1. 加载最终测试集数据 ---
try:
    final_test_df = pd.read_excel(final_test_data_path)
    print(f"最终测试集数据加载成功，形状: {final_test_df.shape}")
    X_final_test = final_test_df.drop(columns=[target_column_name])
    y_final_test = final_test_df[target_column_name]
except FileNotFoundError:
    print(f"错误: 最终测试集文件未找到: {final_test_data_path}")
    exit()
except Exception as e:
    print(f"加载最终测试集时发生错误: {e}")
    exit()

# --- 2. 使用在开发集上fit的scaler来转换测试集数据 ---
X_final_test_scaled = final_scaler.transform(X_final_test)

# --- 3. 使用已训练的final_svr_model进行预测 ---
y_pred_final_test = final_svr_model.predict(X_final_test_scaled)

# --- 4. 计算并打印最终性能指标 ---
mae_final_svr = mean_absolute_error(y_final_test, y_pred_final_test)
rmse_final_svr = np.sqrt(mean_squared_error(y_final_test, y_pred_final_test))
r2_final_svr = r2_score(y_final_test, y_pred_final_test)

# 格式化输出
print("\n--- 最终SVR模型在最终测试集上的性能 ---")
print(f"MAE (SVR): {mae_final_svr:.4f}")
print(f"RMSE (SVR): {rmse_final_svr:.4f}")
print(f"R2 Score (SVR): {r2_final_svr:.4f}")

# (可选) 绘制一个真实值 vs 预测值的散点图
plt.figure(figsize=(8, 8))
plt.scatter(y_final_test, y_pred_final_test, alpha=0.7, edgecolors='w', linewidth=0.5)
min_val = min(y_final_test.min(), y_pred_final_test.min())
max_val = max(y_final_test.max(), y_pred_final_test.max())
plt.plot([min_val, max_val], [min_val, max_val], 'k--', lw=2)
plt.xlabel('Actual Rowing Distance')
plt.ylabel('Predicted Rowing Distance (SVR)')
plt.title('SVR Baseline Model: Actual vs. Predicted (Hold-Out Test Set)')
plt.grid(True, linestyle='--', alpha=0.7)
plot_filename_actual_vs_pred_svr = os.path.join(output_plot_path, "svr_baseline_model_actual_vs_predicted.png")
try:
    plt.savefig(plot_filename_actual_vs_pred_svr, dpi=300, bbox_inches='tight')
    print(f"\nSVR基准模型真实值 vs 预测值图已保存到: {plot_filename_actual_vs_pred_svr}")
except Exception as e:
    print(f"保存SVR基准模型真实值 vs 预测值图时发生错误: {e}")
plt.show()

# ==============================================================================
# ========================= 新增的最终评估代码块结束 =========================
# ==============================================================================

## 计算排列重要性
# 注意：排列重要性可能需要一些时间，特别是对于较大的n_repeats
# scoring可以设为 'neg_mean_absolute_error', 'r2'等
perm_importance = permutation_importance(
    final_svr_model, X_original_scaled, y_original,
    n_repeats=10,
    random_state=42,
    scoring='neg_mean_absolute_error', # 或者 'r2'
    n_jobs=-1
)

# 整理重要性结果
sorted_idx = perm_importance.importances_mean.argsort()[::-1] # 获取降序排列的索引
importance_df_svr = pd.DataFrame({
    'Feature': X_original.columns[sorted_idx],
    'Importance': perm_importance.importances_mean[sorted_idx]
})

print("SVR Feature Importances (High to Low, based on Permutation Importance):")
print(importance_df_svr.head(min(20, len(X_original.columns))))

# 绘制特征重要性图
num_features_to_plot = min(20, len(X_original.columns))
plot_height = max(6, num_features_to_plot * 0.4)
plt.figure(figsize=(10, plot_height))
plt.barh(importance_df_svr['Feature'][:num_features_to_plot], importance_df_svr['Importance'][:num_features_to_plot], color='mediumseagreen')
plt.xlabel('Permutation Importance (decrease in MAE)') # 如果scoring是neg_mean_absolute_error，正值表示重要
plt.ylabel('Feature')
plt.title(f'SVR: Top {num_features_to_plot} Feature Importances (Permutation)')
plt.gca().invert_yaxis()
plt.tight_layout()
plt.grid(True, axis='x', linestyle='--', alpha=0.7)
plot_filename_importance_svr = os.path.join(output_plot_path, "svr_feature_importances_permutation.png")
plt.savefig(plot_filename_importance_svr, dpi=300, bbox_inches='tight')
plt.show()

print(f"\n所有SVR图表已尝试保存至: {output_plot_path}")

# 检查最佳核函数，如果为线性，可以额外展示系数
if best_params_svr.get('kernel') == 'linear':
    try:
        # final_svr_model 必须是用线性核训练的
        if hasattr(final_svr_model, 'coef_'):
            linear_coeffs = final_svr_model.coef_[0] # SVR的coef_是2D的
            coeff_df = pd.DataFrame({
                'Feature': X_original.columns,
                'Coefficient': linear_coeffs,
                'Absolute Coefficient': np.abs(linear_coeffs)
            }).sort_values(by='Absolute Coefficient', ascending=False)

            print("\nSVR Linear Kernel Coefficients (Feature Importance):")
            print(coeff_df.head(min(20, len(X_original.columns))))

            plt.figure(figsize=(10, plot_height))
            plt.barh(coeff_df['Feature'][:num_features_to_plot], coeff_df['Absolute Coefficient'][:num_features_to_plot], color='lightcoral')
            plt.xlabel('Absolute Coefficient Value')
            plt.ylabel('Feature')
            plt.title(f'SVR: Top {num_features_to_plot} Feature Importances (Linear Kernel Coefficients)')
            plt.gca().invert_yaxis()
            plt.tight_layout()
            plt.grid(True, axis='x', linestyle='--', alpha=0.7)
            plot_filename_coeffs_svr = os.path.join(output_plot_path, "svr_linear_coefficients_importance.png")
            plt.savefig(plot_filename_coeffs_svr, dpi=300, bbox_inches='tight')
            plt.show()
            print(f"SVR线性核系数重要性图已保存至: {plot_filename_coeffs_svr}")
        else:
            print("\n最终模型不是线性核，或未正确训练，无法显示系数。")
    except Exception as e:
        print(f"\n尝试显示线性核系数时出错: {e}")