In [1]:
import pandas as pd
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import KFold
from sklearn.model_selection import GroupShuffleSplit, GroupKFold
from skopt.space import Real, Categorical, Integer
from skopt import BayesSearchCV
from tabpfn import TabPFNRegressor
import pickle

In [2]:
def evaluate_performance(best_model, X, y_true):
    y_pred = best_model.predict(X)

    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)

    performance = {
        'MAE': mae,
        'MSE': mse,
        'R2': r2,
    }
    return performance

In [3]:
# Load data
data = pd.read_csv("data02.csv")
X = data.iloc[:, 2:]
y = data['value']
group = data['group']

# Initialize lists to store performance metrics
train_performance = []
val_performance = []
test_performance = []

all_test_preds = []

gkf = GroupKFold(n_splits=10)
round_id = 1  # 初始化轮次编号

# Loop through each fold
for train_index, test_index in gkf.split(X, y, group):
    
    print(f"\nRound {round_id}")  # 打印当前轮次
    round_id += 1
    
    train_data = data.iloc[train_index]
    test_data = data.iloc[test_index]

    X_train = train_data.iloc[:, 2:]
    y_train = train_data['value']
    group_train = train_data['group']

    X_test = test_data.iloc[:, 2:]
    y_test = test_data['value']
    group_test = test_data['group']

    model = TabPFNRegressor(device="cuda")   

    # Fit the model
    # Note: TabPFNRegressor does not require hyperparameter tuning like LGBMRegressor
    model_train = model.fit(X_train, y_train)
    
    # Evaluate performance on training and test sets
    performance_train = evaluate_performance(model_train, X_train, y_train)
    performance_test = evaluate_performance(model_train, X_test, y_test)

    # Store performance metrics
    train_performance.append(performance_train)
    test_performance.append(performance_test)
    
    print("performance_train=")
    print(performance_train)
    print("performance_test=")
    print(performance_test)

    # Collect prediction results
    y_pred_test = model.predict(X_test)
    fold_result = pd.DataFrame({
        'true_value': y_test.values,
        'predicted_value': y_pred_test,
        'group': test_data['group'].values,
        'time': test_data['Time'].values
    })
    all_test_preds.append(fold_result)

# Combine and save results
final_result = pd.concat(all_test_preds, ignore_index=True)
final_result.to_csv("test_result_TabPFN.csv", index=False)


# Calculate average performance metrics
avg_train_performance = pd.DataFrame(train_performance).mean()
avg_test_performance = pd.DataFrame(test_performance).mean()
std_train_performance = pd.DataFrame(train_performance).std()
std_test_performance = pd.DataFrame(test_performance).std()

# Print average performance metrics
print("\nAverage Train Performance:")
for metric in avg_train_performance.index:
    avg = avg_train_performance[metric]
    std = std_train_performance[metric]
    print(f"{metric:<5}: {avg:.3f} ± {std:.3f}")

print("\nAverage Test Performance:")
for metric in avg_test_performance.index:
    avg = avg_test_performance[metric]
    std = std_test_performance[metric]
    print(f"{metric:<5}: {avg:.3f} ± {std:.3f}")


Round 1
performance_train=
{'MAE': 0.09909902430979334, 'MSE': 0.03344221305345657, 'R2': 0.9594176787648858}
performance_test=
{'MAE': 0.31621315090585533, 'MSE': 0.2522616124535695, 'R2': 0.594575155241095}

Round 2
performance_train=
{'MAE': 0.10821613464929791, 'MSE': 0.04018297686921839, 'R2': 0.949014563967812}
performance_test=
{'MAE': 0.27969272496634456, 'MSE': 0.2006848565083851, 'R2': 0.7892550300690646}

Round 3
performance_train=
{'MAE': 0.10507547051905028, 'MSE': 0.03693373568073123, 'R2': 0.9532360262658534}
performance_test=
{'MAE': 0.3444194695231446, 'MSE': 0.2686461032920008, 'R2': 0.7127528137316415}

Round 4
performance_train=
{'MAE': 0.09883073927742206, 'MSE': 0.033062569115702274, 'R2': 0.9597999198301082}
performance_test=
{'MAE': 0.28282359469376406, 'MSE': 0.221354369584685, 'R2': 0.6546294226513625}

Round 5
performance_train=
{'MAE': 0.11322581250899111, 'MSE': 0.04202218168896326, 'R2': 0.9484252289596895}
performance_test=
{'MAE': 0.3456676345068871, 'M

In [4]:
final_model = TabPFNRegressor(device="cuda")
final_model.fit(X,y)

with open("TabPFN_moedl.pkl","wb") as f:
    pickle.dump(final_model,f)

print("Final TabPFN model saved as TabPDN_model.pkl")

Final TabPFN model saved as TabPDN_model.pkl
