In [1]:
import pandas as pd
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import KFold
from sklearn.model_selection import GroupShuffleSplit, GroupKFold
from skopt.space import Real, Categorical, Integer
from skopt import BayesSearchCV
from tabpfn import TabPFNRegressor

In [2]:
def evaluate_performance(best_model, X, y_true):
    y_pred = best_model.predict(X)

    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)

    performance = {
        'MAE': mae,
        'MSE': mse,
        'R2': r2,
    }
    return performance

In [None]:
# Load data
data = pd.read_csv("data03.csv")
X = data.iloc[:, 2:]
y = data['value']
group = data['group']

# Initialize lists to store performance metrics
train_performance = []
val_performance = []
test_performance = []

all_test_preds = []

gkf = GroupKFold(n_splits=10)
round_id = 1  # 初始化轮次编号

# Loop through each fold
for train_index, test_index in gkf.split(X, y, group):
    
    print(f"\nRound {round_id}")  # 打印当前轮次
    round_id += 1
    
    train_data = data.iloc[train_index]
    test_data = data.iloc[test_index]

    X_train = train_data.iloc[:, 2:]
    y_train = train_data['value']
    group_train = train_data['group']

    X_test = test_data.iloc[:, 2:]
    y_test = test_data['value']
    group_test = test_data['group']

    model = TabPFNRegressor(device="cuda")   

    # Fit the model
    # Note: TabPFNRegressor does not require hyperparameter tuning like LGBMRegressor
    model_train = model.fit(X_train, y_train)
    
    # Evaluate performance on training and test sets
    performance_train = evaluate_performance(model_train, X_train, y_train)
    performance_test = evaluate_performance(model_train, X_test, y_test)

    # Store performance metrics
    train_performance.append(performance_train)
    test_performance.append(performance_test)
    
    print("performance_train=")
    print(performance_train)
    print("performance_test=")
    y_pred_test = model.predict(X_test)
    fold_result = pd.DataFrame({
        'true_value': y_test.values,
        'predicted_value': y_pred_test,
        'group': test_data['group'].values,
        'time': test_data['Time'].values
    })
    all_test_preds.append(fold_result)

# Combine and save results
final_result = pd.concat(all_test_preds, ignore_index=True)
final_result.to_csv("test_result_03.csv", index=False)


# Calculate average performance metrics
avg_train_performance = pd.DataFrame(train_performance).mean()
avg_test_performance = pd.DataFrame(test_performance).mean()
std_train_performance = pd.DataFrame(train_performance).std()
std_test_performance = pd.DataFrame(test_performance).std()

# Print average performance metrics
print("\nAverage Train Performance:")
for metric in avg_train_performance.index:
    avg = avg_train_performance[metric]
    std = std_train_performance[metric]
    print(f"{metric:<5}: {avg:.3f} ± {std:.3f}")

print("\nAverage Test Performance:")
for metric in avg_test_performance.index:
    avg = avg_test_performance[metric]
    std = std_test_performance[metric]
    print(f"{metric:<5}: {avg:.3f} ± {std:.3f}")


Round 1
performance_train=
{'MAE': 0.1040024861783403, 'MSE': 0.03206843395826335, 'R2': 0.9501124947137605}
performance_test=
{'MAE': 0.26072009487516445, 'MSE': 0.13798345807338214, 'R2': 0.5858354892618254}

Round 2
performance_train=
{'MAE': 0.10059049487236027, 'MSE': 0.02954020298081067, 'R2': 0.9522654632609906}
performance_test=
{'MAE': 0.23824110182346364, 'MSE': 0.11779884192666747, 'R2': 0.8043972365579426}

Round 3
performance_train=
{'MAE': 0.10612567303448915, 'MSE': 0.0338230116990043, 'R2': 0.94469213421151}
performance_test=
{'MAE': 0.2669140505068853, 'MSE': 0.12046245739595314, 'R2': 0.8149099177320812}

Round 4
performance_train=
{'MAE': 0.11253399538881785, 'MSE': 0.03582008675422139, 'R2': 0.9423632324478263}
performance_test=
{'MAE': 0.24037653430301018, 'MSE': 0.15011851351123026, 'R2': 0.7345641559892564}

Round 5
performance_train=
{'MAE': 0.10657013822080434, 'MSE': 0.03288953437313876, 'R2': 0.9481588084219555}
performance_test=
{'MAE': 0.22140983493470848,