In [1]:
%load_ext watermark

In [2]:
%watermark

Last updated: 2025-09-24T11:14:13.509354-07:00

Python implementation: CPython
Python version       : 3.13.3
IPython version      : 9.1.0

Compiler    : Clang 15.0.0 (clang-1500.3.9.4)
OS          : Darwin
Release     : 24.6.0
Machine     : arm64
Processor   : arm
CPU cores   : 16
Architecture: 64bit



In [4]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import KFold
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
import optuna
import gc
import logging

In [5]:
%watermark --iversions

pandas : 2.3.2
logging: 0.5.1.2
sklearn: 1.7.1
optuna : 4.5.0
xgboost: 3.0.4
numpy  : 2.3.2



In [6]:
%%time
train_folds = []
val_folds = []
train_ys = []
val_ys = []

for i in range(5):
    print(f'Loading fold {i}')
    train_fold = pd.read_csv(f'../input/xgtrain_fold_{i}_l.csv.gz')

    
    val_fold = pd.read_csv(f'../input/xgval_fold_{i}_l.csv.gz')

    
    
    train_y = train_fold['target']
    train_fold = train_fold[train_fold.columns.difference(['target'])]
    
    val_y = val_fold['target']
    val_fold = val_fold[val_fold.columns.difference(['target'])]
    
    train_folds.append(train_fold)
    val_folds.append(val_fold)
    
    train_ys.append(train_y)
    val_ys.append(val_y)

Loading fold 0
Loading fold 1
Loading fold 2
Loading fold 3
Loading fold 4
CPU times: user 1.64 s, sys: 207 ms, total: 1.85 s
Wall time: 1.86 s


In [7]:
train = pd.read_csv('../input/train.csv.zip')

shift = 200

target0 = train['loss'].values
target = np.log(target0+shift)

In [14]:
train_oof = np.zeros((target.shape[0],))

num_round = 1000

def objective(trial):
        
    params = {
        'objective': 'reg:squarederror', 
        'base_score':7.76,
        'tree_method':'hist',  # 'gpu_hist','hist'
        'lambda': trial.suggest_float('lambda',1e-3,10.0, log=True),
        'alpha': trial.suggest_float('alpha',1e-3,10.0, log=True),
        'gamma': trial.suggest_float('gamma',1e-3,10.0, log=True),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.3,1.0),
        'subsample': trial.suggest_float('subsample', 0.4, 1.0),
        'learning_rate': trial.suggest_float('learning_rate', 0.001,0.1, log=True),
        'max_depth': trial.suggest_int('max_depth', 3, 25),
        'min_child_weight': trial.suggest_int('min_child_weight', 1,300),
        'eval_metric': trial.suggest_categorical('eval_metric',['rmse']),

    }

    kf = KFold(5, shuffle=True, random_state=137)

    for i, (train_index, val_index) in enumerate(kf.split(train,target)):
        dtrain = xgb.DMatrix(train_folds[i].values, train_ys[i], enable_categorical=True)
        dval = xgb.DMatrix(val_folds[i].values, val_ys[i], enable_categorical=True)
        
        output = xgb.train(params, dtrain, num_round)
        #booster = output['booster']  # booster is the trained model
        #booster.set_param({'predictor': 'gpu_predictor'})
        predictions = output.predict(dval)
        train_oof[val_index] = np.exp(predictions) - shift
        del dtrain, dval, output
        gc.collect()
        gc.collect()

    mae = mean_absolute_error(target0, train_oof)
    
    return mae

In [16]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)  # Setup the root logger.
logger.addHandler(logging.FileHandler("optuna_xgb_output_l_4_M4.log", mode="w"))

optuna.logging.enable_propagation()  # Propagate logs to the root logger.
optuna.logging.disable_default_handler()  # Stop showing logs in sys.stderr.

study = optuna.create_study(storage="sqlite:///xgb_optuna_allstate_l_4_M4.db", study_name="five_fold_optuna_xgb_l_4", direction='minimize')

In [17]:
%%time
logger.info("Start optimization.")
study.optimize(objective, n_trials=3)

CPU times: user 6min 4s, sys: 2min 35s, total: 8min 40s
Wall time: 1min 15s


In [18]:
df = study.trials_dataframe(attrs=('number', 'value', 'params', 'state'))
df.head()

Unnamed: 0,number,value,params_alpha,params_colsample_bytree,params_eval_metric,params_gamma,params_lambda,params_learning_rate,params_max_depth,params_min_child_weight,params_subsample,state
0,0,1144.015064,0.005104,0.881655,rmse,0.001001,0.003479,0.016657,7,4,0.968308,COMPLETE
1,1,1151.442027,0.315207,0.829065,rmse,0.230316,0.430886,0.089752,8,234,0.655288,COMPLETE
2,2,1136.993283,7.658246,0.385207,rmse,0.218814,0.02047,0.033287,12,228,0.80987,COMPLETE


In [19]:
%%time
study.optimize(objective, n_trials=5)
df = study.trials_dataframe(attrs=('number', 'value', 'params', 'state'))
df.to_csv('optuna_xgb_output_l_4_M4.csv', index=False)
df

CPU times: user 16min 10s, sys: 8min 15s, total: 24min 26s
Wall time: 3min 40s


Unnamed: 0,number,value,params_alpha,params_colsample_bytree,params_eval_metric,params_gamma,params_lambda,params_learning_rate,params_max_depth,params_min_child_weight,params_subsample,state
0,0,1144.015064,0.005104,0.881655,rmse,0.001001,0.003479,0.016657,7,4,0.968308,COMPLETE
1,1,1151.442027,0.315207,0.829065,rmse,0.230316,0.430886,0.089752,8,234,0.655288,COMPLETE
2,2,1136.993283,7.658246,0.385207,rmse,0.218814,0.02047,0.033287,12,228,0.80987,COMPLETE
3,3,1139.276625,0.042274,0.749953,rmse,0.109671,0.001593,0.028515,8,291,0.977011,COMPLETE
4,4,1182.593444,0.144108,0.858976,rmse,0.059218,3.071065,0.003623,21,225,0.42996,COMPLETE
5,5,1138.05161,8.110548,0.701922,rmse,0.001264,6.012945,0.01546,18,210,0.954934,COMPLETE
6,6,1190.795884,8.665741,0.384413,rmse,0.79159,0.747857,0.00423,20,290,0.554288,COMPLETE
7,7,1157.431779,0.043193,0.593631,rmse,0.00351,0.005308,0.0048,25,233,0.711844,COMPLETE


In [20]:
%%time
study.optimize(objective, n_trials=100)
df = study.trials_dataframe(attrs=('number', 'value', 'params', 'state'))
df.to_csv('optuna_xgb_output_l_4_M4.csv', index=False)
df.head(20)

CPU times: user 4h 35min 47s, sys: 2h 15min 54s, total: 6h 51min 41s
Wall time: 1h 2min 57s


Unnamed: 0,number,value,params_alpha,params_colsample_bytree,params_eval_metric,params_gamma,params_lambda,params_learning_rate,params_max_depth,params_min_child_weight,params_subsample,state
0,0,1144.015064,0.005104,0.881655,rmse,0.001001,0.003479,0.016657,7,4,0.968308,COMPLETE
1,1,1151.442027,0.315207,0.829065,rmse,0.230316,0.430886,0.089752,8,234,0.655288,COMPLETE
2,2,1136.993283,7.658246,0.385207,rmse,0.218814,0.02047,0.033287,12,228,0.80987,COMPLETE
3,3,1139.276625,0.042274,0.749953,rmse,0.109671,0.001593,0.028515,8,291,0.977011,COMPLETE
4,4,1182.593444,0.144108,0.858976,rmse,0.059218,3.071065,0.003623,21,225,0.42996,COMPLETE
5,5,1138.05161,8.110548,0.701922,rmse,0.001264,6.012945,0.01546,18,210,0.954934,COMPLETE
6,6,1190.795884,8.665741,0.384413,rmse,0.79159,0.747857,0.00423,20,290,0.554288,COMPLETE
7,7,1157.431779,0.043193,0.593631,rmse,0.00351,0.005308,0.0048,25,233,0.711844,COMPLETE
8,8,1147.455328,5.666714,0.768054,rmse,0.001054,0.191617,0.008426,13,118,0.451648,COMPLETE
9,9,1154.309275,0.178486,0.606665,rmse,1.202097,0.577846,0.011857,6,149,0.58684,COMPLETE


In [21]:
df

Unnamed: 0,number,value,params_alpha,params_colsample_bytree,params_eval_metric,params_gamma,params_lambda,params_learning_rate,params_max_depth,params_min_child_weight,params_subsample,state
0,0,1144.015064,0.005104,0.881655,rmse,0.001001,0.003479,0.016657,7,4,0.968308,COMPLETE
1,1,1151.442027,0.315207,0.829065,rmse,0.230316,0.430886,0.089752,8,234,0.655288,COMPLETE
2,2,1136.993283,7.658246,0.385207,rmse,0.218814,0.020470,0.033287,12,228,0.809870,COMPLETE
3,3,1139.276625,0.042274,0.749953,rmse,0.109671,0.001593,0.028515,8,291,0.977011,COMPLETE
4,4,1182.593444,0.144108,0.858976,rmse,0.059218,3.071065,0.003623,21,225,0.429960,COMPLETE
...,...,...,...,...,...,...,...,...,...,...,...,...
103,103,1137.003995,0.005875,0.336874,rmse,0.035697,0.827184,0.015309,13,264,0.984250,COMPLETE
104,104,1148.734650,0.037124,0.380004,rmse,0.060424,0.692334,0.020255,16,21,0.949476,COMPLETE
105,105,1138.421057,0.022993,0.402456,rmse,0.016063,0.345680,0.038335,12,234,0.898539,COMPLETE
106,106,1135.609822,0.014488,0.425547,rmse,0.050420,1.186537,0.021499,13,214,0.965162,COMPLETE


In [22]:
df.value.min()

np.float64(1135.4322739544946)