In [870]:
import numpy as np
import pandas as pd
from scipy.stats import mannwhitneyu
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error, r2_score

In [21]:
from scipy import stats

In [None]:
df = pd.read_csv("../data/test_data.csv", index_col=0)

In [899]:
model = XGBRegressor(n_estimators=500, learning_rate=0.1, max_depth=3,subsample=0.6,
                     colsample_bytree=0.6, random_state=42)

In [900]:
X = df.iloc[:, 1:].values
y = df.iloc[:, 0].values 
kf = KFold(n_splits=5, shuffle=True, random_state=22)

# 存储每一折的预测结果和真实值
all_predictions = []
all_true_values = []
fold_info = []

# 进行交叉验证
for fold, (train_idx, test_idx) in enumerate(kf.split(X), 1):
    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]
    
    # 训练模型
    model.fit(X_train, y_train)
    
    # 预测
    y_pred = model.predict(X_test)
    
    # 计算评估指标
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    print(f"Fold {fold}: MSE = {mse:.4f}, R2 = {r2:.4f}")
    
    # 保存当前折的预测结果和真实值
    fold_results = pd.DataFrame({
        'Fold': fold,
        'True_Value': y_test,
        'Predicted_Value': y_pred,
        'Sample_Index': test_idx  # 保存原始数据索引以便后续分析
    })
    
    all_predictions.append(y_pred)
    all_true_values.append(y_test)
    fold_info.append(fold_results)

# 合并所有折的结果
all_results = pd.concat(fold_info, axis=0).sort_values('Sample_Index')

# 计算整体评估指标
final_mse = mean_squared_error(all_results['True_Value'], all_results['Predicted_Value'])
final_r2 = r2_score(all_results['True_Value'], all_results['Predicted_Value'])

print("\nOverall Performance:")
print(f"MSE: {final_mse:.4f}")
print(f"R2 Score: {final_r2:.4f}")

Fold 1: MSE = 65.9696, R2 = 0.4051
Fold 2: MSE = 24.9034, R2 = 0.6443
Fold 3: MSE = 13.2042, R2 = 0.7763
Fold 4: MSE = 11.8659, R2 = 0.7943
Fold 5: MSE = 20.5118, R2 = 0.6758

Overall Performance:
MSE: 27.2910
R2 Score: 0.6237
