In [2]:
import os
import pandas as pd
import numpy as np
from itertools import combinations
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV, KFold
from openpyxl import Workbook

# 读取数据文件
data = pd.read_excel("含TV电流特征不含自己的140.xlsx", sheet_name=0)

# 提取目标变量和特征变量
y_name = 'ttf'  # 目标变量
X_names = [col for col in data.columns if col != y_name]  # 特征变量

# 定义参数网格
param_grid = {
    'n_estimators': [40, 50, 100, 200, 300, 500, 700, 1000, 1200, 1500, 1700, 2000, 2500],
    'max_depth': [3, 4, 5, 6, 7, 8, 10, 12, 15, 20]
}

# 创建Excel工作簿
wb = Workbook()
ws = wb.active
ws.title = "Feature Combination Results"

# 添加表头
headers = ['Feature Combination', 'Best n_estimators', 'Best max_depth', 'mean_rmse_cv', 'std_rmse_cv', 'mean_rmse_test', 'std_rmse_test', 'mean_r2_test', 'std_r2_test']
ws.append(headers)

# 遍历所有特征组合
for i in range(1, len(X_names) + 1):
    for combination in combinations(X_names, i):
        X = data[list(combination)]  # 获取当前组合的自变量
        y = data[y_name]  # 获取因变量

        # 使用10折交叉验证
        kf = KFold(n_splits=10, shuffle=True, random_state=42)

        # 使用 GridSearchCV 进行参数搜索
        rf = RandomForestRegressor(random_state=42)
        grid_search = GridSearchCV(rf, param_grid, scoring='neg_mean_squared_error', cv=kf, n_jobs=-1, refit=True)
        grid_search.fit(X, y)

        # 获取最优参数下的模型
        best_rf = grid_search.best_estimator_

        # 初始化交叉验证结果列表
        rmse_test_list = []
        r2_test_list = []

        # 在每一折中进行评估
        for train_index, test_index in kf.split(X):
            X_train, X_test = X.iloc[train_index], X.iloc[test_index]
            y_train, y_test = y.iloc[train_index], y.iloc[test_index]

            best_rf.fit(X_train, y_train)
            ptest = best_rf.predict(X_test)
            rmse_test_list.append(np.sqrt(mean_squared_error(y_test, ptest)))
            r2_test_list.append(r2_score(y_test, ptest))

        # 计算交叉验证的结果
        cv_results = grid_search.cv_results_
        mean_rmse_cv = np.mean(np.sqrt(-cv_results['mean_test_score']))
        std_rmse_cv = np.std(np.sqrt(-cv_results['mean_test_score']))

        # 计算测试集上的平均和标准差结果
        mean_rmse_test = np.mean(rmse_test_list)
        std_rmse_test = np.std(rmse_test_list)
        mean_r2_test = np.mean(r2_test_list)
        std_r2_test = np.std(r2_test_list)

        # 将结果写入Excel
        row = [
            ' + '.join(combination),
            grid_search.best_params_['n_estimators'],
            grid_search.best_params_['max_depth'],
            mean_rmse_cv,
            std_rmse_cv,
            mean_rmse_test,
            std_rmse_test,
            mean_r2_test,
            std_r2_test
        ]
        ws.append(row)

# 保存Excel文件
excel_output_path = "feature_combination_results.xlsx"
wb.save(excel_output_path)

print(f"所有特征组合的评估结果已保存到 {excel_output_path}")





KeyboardInterrupt: 