In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys

while any(marker in os.getcwd() for marker in ('exercises', 'notebooks', 'students', 'research', 'projects')):
    os.chdir("..")
sys.path.append('src')

In [2]:
import time
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import mlflow
import optuna
import optuna.visualization as vis
from dotenv import load_dotenv
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import root_mean_squared_log_error # Metric used in the competition for evaluation

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Set MLflow tracking URI to local directory
mlflow.set_tracking_uri("projects/proj_2_team_4/mlruns")

# Use for running ui:
# mlflow ui --backend-store-uri "Absolute path"
# e.g.: mlflow ui --backend-store-uri "/Users/wojciechjurewicz/Desktop/Multivariate Data Analysis/Lab/mda2425/projects/proj_2_team_4/mlruns"



In [4]:
# Load environment variables from the correct .env file location
env_path = 'projects/proj_2_team_4/.env'
load_dotenv(env_path)

True

In [5]:
# Get dataset path from environment variable
train_preprocessed_path = os.getenv('TRAIN_PREPROCESSED_PATH')
test_preprocessed_path = os.getenv('VALID_PREPROCESSED_PATH')

df_train = pd.read_csv(train_preprocessed_path)
df_test = pd.read_csv(test_preprocessed_path)

In [6]:
train_df, valid_df = train_test_split(df_train, test_size=0.10, random_state=42)

In [7]:
X_train = train_df.drop(columns=["SalePrice"])
y_train = train_df["SalePrice"]

X_valid = valid_df.drop(columns=["SalePrice"])
y_valid = valid_df["SalePrice"]

X_test = df_test.drop(columns=["SalePrice"])
y_test = df_test["SalePrice"]

In [8]:

y_train.shape


(278174,)

In [9]:
learning_rate = 0.2
max_depth = 8
n_estimators = 1000
subsample = 0.6
tree_method = 'approx'

In [10]:
pipeline = Pipeline([
    ('xgb', XGBRegressor(
        learning_rate=learning_rate, 
        max_depth=max_depth, 
        n_estimators=n_estimators, 
        subsample=subsample, 
        tree_method=tree_method
    ))
])


In [11]:
# # override Optuna's default logging to ERROR only
# optuna.logging.set_verbosity(optuna.logging.ERROR)

# # define a logging callback that will report on only new challenger parameter configurations if a
# # trial has usurped the state of 'best conditions'


# def champion_callback(study, frozen_trial):
#   """
#   Logging callback that will report when a new trial iteration improves upon existing
#   best trial values.

#   Note: This callback is not intended for use in distributed computing systems such as Spark
#   or Ray due to the micro-batch iterative implementation for distributing trials to a cluster's
#   workers or agents.
#   The race conditions with file system state management for distributed trials will render
#   inconsistent values with this callback.
#   """

#   winner = study.user_attrs.get("winner", None)

#   if study.best_value and winner != study.best_value:
#       study.set_user_attr("winner", study.best_value)
#       if winner:
#           improvement_percent = (abs(winner - study.best_value) / study.best_value) * 100
#           print(
#               f"Trial {frozen_trial.number} achieved value: {frozen_trial.value} with "
#               f"{improvement_percent: .4f}% improvement"
#           )
#       else:
#           print(f"Initial trial {frozen_trial.number} achieved value: {frozen_trial.value}")

In [12]:
mlflow.set_experiment("XGBoost_Pareto_Front")

def objective(trial):
    start = time.time()

    # Hyperparameter search space
    params = {
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
        "max_depth": trial.suggest_int("max_depth", 3, 12),
        "n_estimators": trial.suggest_int("n_estimators", 100, 2200),
        "subsample": trial.suggest_float("subsample", 0.5, 1.0),
        "tree_method": trial.suggest_categorical("tree_method", ["approx", "hist"]),
    }

    # Start MLflow run (one per trial)
    with mlflow.start_run(nested=True):
        mlflow.log_params(params)  # Save all hyperparameters

        # Optional tags
        mlflow.set_tags({
            "trial_number": trial.number,
            "optimizer_engine": "optuna",
            "model_family": "xgboost",
            "phase": "search"
        })

        # Train model
        pipeline = Pipeline([
            ('xgb', XGBRegressor(**params, n_jobs=-1))
        ])
        pipeline.fit(X_train, y_train)
        y_pred = pipeline.predict(X_valid)
        y_pred = np.maximum(0, y_pred)

        # Calculate objective values
        training_time = time.time() - start
        rmsle = root_mean_squared_log_error(y_valid, y_pred)

        # Log metrics
        mlflow.log_metrics({
            "Validation RMSLE": rmsle,
            "training_time": training_time
        })

    return rmsle, training_time


2025/05/26 23:31:17 INFO mlflow.tracking.fluent: Experiment with name 'XGBoost_Pareto_Front' does not exist. Creating a new experiment.


In [13]:
# Create study
study = optuna.create_study(directions=["minimize", "minimize"])
study.optimize(objective, n_trials=300, timeout=1800, show_progress_bar=True) # set n_trials to higher when doing final training and summary

# For multi-objective, get the list of best trials on the Pareto front
best_trials = study.best_trials

# Extract RMSLE and training_time from best trials
rmsle_values = [t.values[0] for t in best_trials]
time_values = [t.values[1] for t in best_trials]

# Normalize function to scale list to [0,1]
def normalize(lst):
    min_val, max_val = min(lst), max(lst)
    if max_val == min_val:
        return [0.0 for _ in lst]  # avoid division by zero if all equal
    return [(x - min_val) / (max_val - min_val) for x in lst]

rmsle_norm = normalize(rmsle_values)
time_norm = normalize(time_values)

alpha = 0.7  # weight for RMSLE vs training time

# Compute combined score for each trial
scores = [alpha * r + (1 - alpha) * t for r, t in zip(rmsle_norm, time_norm)]

# Get the trial with the lowest combined score
best_index = scores.index(min(scores))
best_trial = best_trials[best_index]
best_params = best_trial.params

# Train final model with best parameters
experiment = mlflow.set_experiment("Bluebook_for_bulldozers_XGBoost")
with mlflow.start_run(experiment_id=experiment.experiment_id):
    mlflow.log_params(best_params)

# Log tags
    mlflow.set_tags(
        tags={
            "project": "Bluebook for Bulldozers",
            "optimizer_engine": "optuna",
            "model_family": "xgboost",
            "feature_set_version": 1,
        }
    )
    
    # Train final model
    final_pipeline = Pipeline([
        ('xgb', XGBRegressor(**best_params))
    ])
    final_pipeline.fit(X_train, y_train)
    y_pred = final_pipeline.predict(X_test)

    # Calculate metrics
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    rmsle = root_mean_squared_log_error(y_test, y_pred)

    # Log metrics
    mlflow.log_metrics({
        "MAE": mae,
        "MSE": mse,
        "RMSE": rmse,
        "R2": r2,
        "RMSLE": rmsle
    })

    print(f"Mean Absolute Error (MAE): {mae}")
    print(f"Mean Squared Error (MSE): {mse}")
    print(f"Root Mean Squared Error (RMSE): {rmse}")
    print(f"R-squared (Coefficient of Determination): {r2}")
    print(f"Root Mean Squared Log Error (RMSLE): {rmsle} - Metric used in competition")
    mlflow.sklearn.log_model(final_pipeline, "model")

    # Create and log visualization
    plt.scatter(y_test, y_pred, alpha=0.7)
    plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], color='red', linestyle='--', linewidth=2)
    plt.xlabel('Actual Values')
    plt.ylabel('Predicted Values')
    plt.title('Actual vs Predicted')
    plt.grid(True)
    plt.axis('equal')
    plt.tight_layout()
    actual_vs_predicted_path = "actual_vs_predicted.png"
    plt.savefig(actual_vs_predicted_path)
    plt.close()

    mlflow.log_artifact(actual_vs_predicted_path)
    os.remove(actual_vs_predicted_path)

[I 2025-05-26 23:31:17,585] A new study created in memory with name: no-name-88eff0f3-e711-48f6-8f05-591dac2e460f
  0%|          | 1/300 [00:22<1:52:10, 22.51s/it, 22.51/1800 seconds]

[I 2025-05-26 23:31:40,116] Trial 0 finished with values: [0.25045686960220337, 22.50075626373291] and parameters: {'learning_rate': 0.13732680473655495, 'max_depth': 3, 'n_estimators': 1639, 'subsample': 0.5738291002648241, 'tree_method': 'hist'}.


  1%|          | 2/300 [00:43<1:48:09, 21.78s/it, 43.77/1800 seconds]

[I 2025-05-26 23:32:01,378] Trial 1 finished with values: [0.21005627512931824, 21.25073528289795] and parameters: {'learning_rate': 0.023208347581707246, 'max_depth': 9, 'n_estimators': 415, 'subsample': 0.7658909751428908, 'tree_method': 'approx'}.


  1%|          | 3/300 [00:53<1:19:47, 16.12s/it, 53.16/1800 seconds]

[I 2025-05-26 23:32:10,765] Trial 2 finished with values: [0.20836488902568817, 9.378645896911621] and parameters: {'learning_rate': 0.18789432350242016, 'max_depth': 7, 'n_estimators': 245, 'subsample': 0.7763095825658259, 'tree_method': 'approx'}.


  1%|▏         | 4/300 [01:03<1:07:42, 13.73s/it, 63.22/1800 seconds]

[I 2025-05-26 23:32:20,824] Trial 3 finished with values: [0.19750139117240906, 10.050092220306396] and parameters: {'learning_rate': 0.18333050175611607, 'max_depth': 10, 'n_estimators': 207, 'subsample': 0.8734763118589377, 'tree_method': 'approx'}.


  2%|▏         | 5/300 [01:31<1:32:48, 18.88s/it, 91.23/1800 seconds]

[I 2025-05-26 23:32:48,834] Trial 4 finished with values: [0.20077775418758392, 28.001131057739258] and parameters: {'learning_rate': 0.25297634913736483, 'max_depth': 8, 'n_estimators': 1333, 'subsample': 0.6250314876865684, 'tree_method': 'hist'}.


  2%|▏         | 6/300 [02:53<3:18:54, 40.59s/it, 173.98/1800 seconds]

[I 2025-05-26 23:34:11,586] Trial 5 finished with values: [0.19321498274803162, 82.73339319229126] and parameters: {'learning_rate': 0.18595026700824854, 'max_depth': 12, 'n_estimators': 1509, 'subsample': 0.6952389176684743, 'tree_method': 'approx'}.


  2%|▏         | 7/300 [03:04<2:30:51, 30.89s/it, 184.89/1800 seconds]

[I 2025-05-26 23:34:22,500] Trial 6 finished with values: [0.20439110696315765, 10.904398202896118] and parameters: {'learning_rate': 0.151765350089712, 'max_depth': 8, 'n_estimators': 285, 'subsample': 0.6277453603311338, 'tree_method': 'approx'}.


  3%|▎         | 8/300 [03:33<2:26:13, 30.05s/it, 213.13/1800 seconds]

[I 2025-05-26 23:34:50,739] Trial 7 finished with values: [0.21170563995838165, 28.228970289230347] and parameters: {'learning_rate': 0.2337662524484023, 'max_depth': 5, 'n_estimators': 1643, 'subsample': 0.7352382622073137, 'tree_method': 'hist'}.


  3%|▎         | 9/300 [03:51<2:07:27, 26.28s/it, 231.13/1800 seconds]

[I 2025-05-26 23:35:08,733] Trial 8 finished with values: [0.202654629945755, 17.97351098060608] and parameters: {'learning_rate': 0.2837407936188199, 'max_depth': 12, 'n_estimators': 568, 'subsample': 0.5568850153643141, 'tree_method': 'hist'}.


  3%|▎         | 10/300 [04:14<2:01:59, 25.24s/it, 254.04/1800 seconds]

[I 2025-05-26 23:35:31,648] Trial 9 finished with values: [0.2749067544937134, 22.90506601333618] and parameters: {'learning_rate': 0.12488621453711636, 'max_depth': 3, 'n_estimators': 1049, 'subsample': 0.8843428929468684, 'tree_method': 'approx'}.


  4%|▎         | 11/300 [04:27<1:44:44, 21.75s/it, 267.87/1800 seconds]

[I 2025-05-26 23:35:45,472] Trial 10 finished with values: [0.1955977827310562, 13.808770179748535] and parameters: {'learning_rate': 0.24353299226917016, 'max_depth': 10, 'n_estimators': 542, 'subsample': 0.7801612325049061, 'tree_method': 'hist'}.


  4%|▍         | 12/300 [05:11<2:16:35, 28.46s/it, 311.67/1800 seconds]

[I 2025-05-26 23:36:29,274] Trial 11 finished with values: [0.18996642529964447, 43.78955698013306] and parameters: {'learning_rate': 0.05037736187604336, 'max_depth': 11, 'n_estimators': 954, 'subsample': 0.9765964708545455, 'tree_method': 'approx'}.


  4%|▍         | 13/300 [06:04<2:51:27, 35.84s/it, 364.51/1800 seconds]

[I 2025-05-26 23:37:22,119] Trial 12 finished with values: [0.19159244000911713, 52.83273005485535] and parameters: {'learning_rate': 0.14543809028245058, 'max_depth': 8, 'n_estimators': 1513, 'subsample': 0.7990275312053892, 'tree_method': 'approx'}.


  5%|▍         | 14/300 [07:19<3:46:54, 47.60s/it, 439.29/1800 seconds]

[I 2025-05-26 23:38:36,897] Trial 13 finished with values: [0.19090652465820312, 74.7643780708313] and parameters: {'learning_rate': 0.16175863413456942, 'max_depth': 9, 'n_estimators': 1805, 'subsample': 0.7024238952212076, 'tree_method': 'approx'}.


  5%|▌         | 15/300 [07:45<3:15:56, 41.25s/it, 465.82/1800 seconds]

[I 2025-05-26 23:39:03,425] Trial 14 finished with values: [0.2272036224603653, 26.51832413673401] and parameters: {'learning_rate': 0.2493927986508543, 'max_depth': 4, 'n_estimators': 1076, 'subsample': 0.672508180092818, 'tree_method': 'approx'}.


  5%|▌         | 16/300 [08:01<2:38:26, 33.47s/it, 481.23/1800 seconds]

[I 2025-05-26 23:39:18,840] Trial 15 finished with values: [0.22941075265407562, 15.405494928359985] and parameters: {'learning_rate': 0.22187776247183813, 'max_depth': 5, 'n_estimators': 591, 'subsample': 0.6583365141829998, 'tree_method': 'approx'}.


  6%|▌         | 17/300 [08:46<2:54:27, 36.99s/it, 526.39/1800 seconds]

[I 2025-05-26 23:40:04,000] Trial 16 finished with values: [0.2168622612953186, 45.150007009506226] and parameters: {'learning_rate': 0.03488547055882072, 'max_depth': 6, 'n_estimators': 1570, 'subsample': 0.7792840429268046, 'tree_method': 'approx'}.


  6%|▌         | 18/300 [09:30<3:03:14, 38.99s/it, 570.04/1800 seconds]

[I 2025-05-26 23:40:47,648] Trial 17 finished with values: [0.1890956312417984, 43.63138008117676] and parameters: {'learning_rate': 0.09160181103750953, 'max_depth': 11, 'n_estimators': 1645, 'subsample': 0.5562230317090869, 'tree_method': 'hist'}.


  6%|▋         | 19/300 [09:45<2:29:21, 31.89s/it, 585.39/1800 seconds]

[I 2025-05-26 23:41:03,002] Trial 18 finished with values: [0.18797644972801208, 15.34473991394043] and parameters: {'learning_rate': 0.10448018562617127, 'max_depth': 12, 'n_estimators': 542, 'subsample': 0.9713100084117161, 'tree_method': 'hist'}.


  7%|▋         | 20/300 [09:56<1:59:53, 25.69s/it, 596.64/1800 seconds]

[I 2025-05-26 23:41:14,245] Trial 19 finished with values: [0.19241541624069214, 11.23447299003601] and parameters: {'learning_rate': 0.16222459702513534, 'max_depth': 12, 'n_estimators': 196, 'subsample': 0.9148026938706865, 'tree_method': 'approx'}.


  7%|▋         | 21/300 [10:13<1:46:27, 22.89s/it, 613.01/1800 seconds]

[I 2025-05-26 23:41:30,617] Trial 20 finished with values: [0.2906762957572937, 16.364314079284668] and parameters: {'learning_rate': 0.10876336494188724, 'max_depth': 3, 'n_estimators': 1121, 'subsample': 0.7130697626066065, 'tree_method': 'hist'}.


  7%|▋         | 22/300 [10:56<2:14:36, 29.05s/it, 656.42/1800 seconds]

[I 2025-05-26 23:42:14,032] Trial 21 finished with values: [0.19659718871116638, 43.403939962387085] and parameters: {'learning_rate': 0.15248023393206517, 'max_depth': 7, 'n_estimators': 1482, 'subsample': 0.6486029107101454, 'tree_method': 'approx'}.


  8%|▊         | 23/300 [11:13<1:57:26, 25.44s/it, 673.44/1800 seconds]

[I 2025-05-26 23:42:31,044] Trial 22 finished with values: [0.19117194414138794, 17.00648593902588] and parameters: {'learning_rate': 0.0900735411734131, 'max_depth': 10, 'n_estimators': 713, 'subsample': 0.7995825479195724, 'tree_method': 'hist'}.


  8%|▊         | 24/300 [12:51<3:37:21, 47.25s/it, 771.56/1800 seconds]

[I 2025-05-26 23:44:09,172] Trial 23 finished with values: [0.19046112895011902, 98.11058807373047] and parameters: {'learning_rate': 0.1946960495426151, 'max_depth': 11, 'n_estimators': 2190, 'subsample': 0.9282599614942851, 'tree_method': 'approx'}.


  8%|▊         | 25/300 [13:28<3:22:57, 44.28s/it, 808.92/1800 seconds]

[I 2025-05-26 23:44:46,524] Trial 24 finished with values: [0.21793489158153534, 37.34142208099365] and parameters: {'learning_rate': 0.023740583655367728, 'max_depth': 6, 'n_estimators': 1315, 'subsample': 0.5450702800775289, 'tree_method': 'approx'}.


  9%|▊         | 26/300 [14:20<3:32:04, 46.44s/it, 860.39/1800 seconds]

[I 2025-05-26 23:45:37,999] Trial 25 finished with values: [0.19023564457893372, 51.465742111206055] and parameters: {'learning_rate': 0.19132571928190817, 'max_depth': 10, 'n_estimators': 1285, 'subsample': 0.8445935050709863, 'tree_method': 'approx'}.


  9%|▉         | 27/300 [14:23<2:32:45, 33.57s/it, 863.95/1800 seconds]

[I 2025-05-26 23:45:41,554] Trial 26 finished with values: [0.3267061114311218, 3.549989700317383] and parameters: {'learning_rate': 0.2875354351821902, 'max_depth': 3, 'n_estimators': 146, 'subsample': 0.756833417563748, 'tree_method': 'approx'}.


  9%|▉         | 28/300 [15:39<3:29:45, 46.27s/it, 939.84/1800 seconds]

[I 2025-05-26 23:46:57,451] Trial 27 finished with values: [0.19553227722644806, 75.88543891906738] and parameters: {'learning_rate': 0.20977171734169275, 'max_depth': 9, 'n_estimators': 2044, 'subsample': 0.7925380272364397, 'tree_method': 'approx'}.


 10%|▉         | 29/300 [15:46<2:35:42, 34.48s/it, 946.80/1800 seconds]

[I 2025-05-26 23:47:04,407] Trial 28 finished with values: [0.19833526015281677, 6.950299024581909] and parameters: {'learning_rate': 0.29488201434531636, 'max_depth': 11, 'n_estimators': 240, 'subsample': 0.8462114923665145, 'tree_method': 'hist'}.


 10%|█         | 30/300 [15:54<1:58:43, 26.38s/it, 954.30/1800 seconds]

[I 2025-05-26 23:47:11,913] Trial 29 finished with values: [0.3070330321788788, 7.501520872116089] and parameters: {'learning_rate': 0.19752154185669837, 'max_depth': 3, 'n_estimators': 485, 'subsample': 0.6251844060955963, 'tree_method': 'hist'}.


 10%|█         | 31/300 [15:58<1:28:42, 19.79s/it, 958.70/1800 seconds]

[I 2025-05-26 23:47:16,309] Trial 30 finished with values: [0.3095455765724182, 4.3909900188446045] and parameters: {'learning_rate': 0.21398736448915745, 'max_depth': 3, 'n_estimators': 265, 'subsample': 0.9482237575168802, 'tree_method': 'hist'}.


 11%|█         | 32/300 [16:21<1:32:58, 20.82s/it, 981.92/1800 seconds]

[I 2025-05-26 23:47:39,524] Trial 31 finished with values: [0.20635974407196045, 23.20824885368347] and parameters: {'learning_rate': 0.22748606398175117, 'max_depth': 10, 'n_estimators': 566, 'subsample': 0.5296427669545487, 'tree_method': 'approx'}.


 11%|█         | 33/300 [16:33<1:19:50, 17.94s/it, 993.15/1800 seconds]

[I 2025-05-26 23:47:50,757] Trial 32 finished with values: [0.20180994272232056, 11.227600812911987] and parameters: {'learning_rate': 0.1392924426334572, 'max_depth': 7, 'n_estimators': 590, 'subsample': 0.788661833333151, 'tree_method': 'hist'}.


 11%|█▏        | 34/300 [16:52<1:20:49, 18.23s/it, 1012.06/1800 seconds]

[I 2025-05-26 23:48:09,668] Trial 33 finished with values: [0.2195456326007843, 18.903449058532715] and parameters: {'learning_rate': 0.043179462478558105, 'max_depth': 6, 'n_estimators': 652, 'subsample': 0.9354359073676042, 'tree_method': 'approx'}.


 12%|█▏        | 35/300 [17:55<2:21:02, 31.94s/it, 1075.97/1800 seconds]

[I 2025-05-26 23:49:13,576] Trial 34 finished with values: [0.1874292492866516, 63.89467787742615] and parameters: {'learning_rate': 0.12284641402816608, 'max_depth': 12, 'n_estimators': 1271, 'subsample': 0.7046377101997628, 'tree_method': 'approx'}.


 12%|█▏        | 36/300 [18:16<2:05:12, 28.46s/it, 1096.30/1800 seconds]

[I 2025-05-26 23:49:33,912] Trial 35 finished with values: [0.22683148086071014, 20.326760053634644] and parameters: {'learning_rate': 0.20834471806889163, 'max_depth': 4, 'n_estimators': 1246, 'subsample': 0.7872127360741685, 'tree_method': 'hist'}.


 12%|█▏        | 37/300 [18:56<2:20:16, 32.00s/it, 1136.59/1800 seconds]

[I 2025-05-26 23:50:14,193] Trial 36 finished with values: [0.19298844039440155, 40.268352031707764] and parameters: {'learning_rate': 0.19544311376639345, 'max_depth': 12, 'n_estimators': 1330, 'subsample': 0.7550907135813086, 'tree_method': 'hist'}.


 13%|█▎        | 38/300 [19:24<2:14:32, 30.81s/it, 1164.62/1800 seconds]

[I 2025-05-26 23:50:42,227] Trial 37 finished with values: [0.2689170241355896, 28.027708053588867] and parameters: {'learning_rate': 0.09547771954436657, 'max_depth': 3, 'n_estimators': 1420, 'subsample': 0.9728713630271226, 'tree_method': 'approx'}.


 13%|█▎        | 39/300 [20:16<2:41:40, 37.17s/it, 1216.62/1800 seconds]

[I 2025-05-26 23:51:34,225] Trial 38 finished with values: [0.18895480036735535, 51.98888921737671] and parameters: {'learning_rate': 0.0829021959203673, 'max_depth': 10, 'n_estimators': 1322, 'subsample': 0.6777251989368648, 'tree_method': 'approx'}.


 13%|█▎        | 40/300 [20:43<2:27:56, 34.14s/it, 1243.69/1800 seconds]

[I 2025-05-26 23:52:01,300] Trial 39 finished with values: [0.22077246010303497, 27.06640124320984] and parameters: {'learning_rate': 0.08004833597539303, 'max_depth': 5, 'n_estimators': 1121, 'subsample': 0.8284277468994189, 'tree_method': 'approx'}.


 14%|█▎        | 41/300 [21:19<2:30:09, 34.78s/it, 1279.98/1800 seconds]

[I 2025-05-26 23:52:37,589] Trial 40 finished with values: [0.19144025444984436, 36.281822204589844] and parameters: {'learning_rate': 0.17275708304586496, 'max_depth': 10, 'n_estimators': 1528, 'subsample': 0.7875019121251909, 'tree_method': 'hist'}.


 14%|█▍        | 42/300 [22:18<2:59:56, 41.85s/it, 1338.31/1800 seconds]

[I 2025-05-26 23:53:35,920] Trial 41 finished with values: [0.19087117910385132, 58.32288932800293] and parameters: {'learning_rate': 0.12300125887177007, 'max_depth': 8, 'n_estimators': 1871, 'subsample': 0.8588234220959124, 'tree_method': 'approx'}.


 14%|█▍        | 43/300 [22:29<2:20:22, 32.77s/it, 1349.91/1800 seconds]

[I 2025-05-26 23:53:47,514] Trial 42 finished with values: [0.26157498359680176, 11.589125156402588] and parameters: {'learning_rate': 0.22077696667706656, 'max_depth': 3, 'n_estimators': 848, 'subsample': 0.9519702173194657, 'tree_method': 'hist'}.


 15%|█▍        | 44/300 [23:05<2:22:49, 33.47s/it, 1385.02/1800 seconds]

[I 2025-05-26 23:54:22,629] Trial 43 finished with values: [0.19302062690258026, 35.10772895812988] and parameters: {'learning_rate': 0.12935720049723673, 'max_depth': 10, 'n_estimators': 1520, 'subsample': 0.5282066918753643, 'tree_method': 'hist'}.


 15%|█▌        | 45/300 [23:24<2:03:56, 29.16s/it, 1404.12/1800 seconds]

[I 2025-05-26 23:54:41,729] Trial 44 finished with values: [0.26028797030448914, 19.09441900253296] and parameters: {'learning_rate': 0.027700155823378772, 'max_depth': 4, 'n_estimators': 848, 'subsample': 0.8002496230282903, 'tree_method': 'approx'}.


 15%|█▌        | 46/300 [24:05<2:19:31, 32.96s/it, 1445.94/1800 seconds]

[I 2025-05-26 23:55:23,550] Trial 45 finished with values: [0.20938444137573242, 41.815816164016724] and parameters: {'learning_rate': 0.040687864363570005, 'max_depth': 6, 'n_estimators': 1620, 'subsample': 0.6259794953825981, 'tree_method': 'approx'}.


 16%|█▌        | 47/300 [24:17<1:51:56, 26.55s/it, 1457.52/1800 seconds]

[I 2025-05-26 23:55:35,131] Trial 46 finished with values: [0.22008702158927917, 11.575515985488892] and parameters: {'learning_rate': 0.16087560398596976, 'max_depth': 5, 'n_estimators': 718, 'subsample': 0.6790804593783384, 'tree_method': 'hist'}.


 16%|█▌        | 48/300 [24:21<1:22:45, 19.70s/it, 1461.26/1800 seconds]

[I 2025-05-26 23:55:38,866] Trial 47 finished with values: [0.3039625585079193, 3.7301418781280518] and parameters: {'learning_rate': 0.08003152898676104, 'max_depth': 3, 'n_estimators': 152, 'subsample': 0.7538443893511688, 'tree_method': 'approx'}.


 16%|█▋        | 49/300 [24:40<1:22:26, 19.71s/it, 1480.98/1800 seconds]

[I 2025-05-26 23:55:58,588] Trial 48 finished with values: [0.2378084510564804, 19.717205047607422] and parameters: {'learning_rate': 0.26748350803395426, 'max_depth': 3, 'n_estimators': 1460, 'subsample': 0.7296916537828325, 'tree_method': 'hist'}.


 17%|█▋        | 50/300 [24:43<1:00:56, 14.63s/it, 1483.75/1800 seconds]

[I 2025-05-26 23:56:01,361] Trial 49 finished with values: [0.2176671028137207, 2.7681679725646973] and parameters: {'learning_rate': 0.238723661451508, 'max_depth': 7, 'n_estimators': 113, 'subsample': 0.8862260958174606, 'tree_method': 'hist'}.


 17%|█▋        | 51/300 [25:51<2:06:30, 30.49s/it, 1551.24/1800 seconds]

[I 2025-05-26 23:57:08,847] Trial 50 finished with values: [0.19087332487106323, 67.4801709651947] and parameters: {'learning_rate': 0.14543809028245058, 'max_depth': 8, 'n_estimators': 2111, 'subsample': 0.7990275312053892, 'tree_method': 'approx'}.


 17%|█▋        | 52/300 [25:54<1:32:49, 22.46s/it, 1554.97/1800 seconds]

[I 2025-05-26 23:57:12,577] Trial 51 finished with values: [0.2083473950624466, 3.726151943206787] and parameters: {'learning_rate': 0.29488201434531636, 'max_depth': 9, 'n_estimators': 143, 'subsample': 0.7024238952212076, 'tree_method': 'hist'}.


 18%|█▊        | 53/300 [26:56<2:20:29, 34.13s/it, 1616.32/1800 seconds]

[I 2025-05-26 23:58:13,931] Trial 52 finished with values: [0.19056962430477142, 61.346035957336426] and parameters: {'learning_rate': 0.040687864363570005, 'max_depth': 10, 'n_estimators': 1620, 'subsample': 0.6259794953825981, 'tree_method': 'approx'}.


 18%|█▊        | 54/300 [27:32<2:22:01, 34.64s/it, 1652.15/1800 seconds]

[I 2025-05-26 23:58:49,763] Trial 53 finished with values: [0.1973111480474472, 35.82461881637573] and parameters: {'learning_rate': 0.26748350803395426, 'max_depth': 10, 'n_estimators': 1460, 'subsample': 0.7801612325049061, 'tree_method': 'hist'}.


 18%|█▊        | 55/300 [28:12<2:28:10, 36.29s/it, 1692.30/1800 seconds]

[I 2025-05-26 23:59:29,904] Trial 54 finished with values: [0.23195575177669525, 40.13531303405762] and parameters: {'learning_rate': 0.1829535499752315, 'max_depth': 3, 'n_estimators': 2190, 'subsample': 0.9282599614942851, 'tree_method': 'approx'}.


 19%|█▊        | 56/300 [28:31<2:07:16, 31.30s/it, 1711.95/1800 seconds]

[I 2025-05-26 23:59:49,558] Trial 55 finished with values: [0.22089450061321259, 19.648049116134644] and parameters: {'learning_rate': 0.12284641402816608, 'max_depth': 5, 'n_estimators': 1271, 'subsample': 0.7046377101997628, 'tree_method': 'hist'}.


 19%|█▉        | 57/300 [28:48<1:48:54, 26.89s/it, 1728.55/1800 seconds]

[I 2025-05-27 00:00:06,161] Trial 56 finished with values: [0.21419674158096313, 16.596821069717407] and parameters: {'learning_rate': 0.22187776247183813, 'max_depth': 6, 'n_estimators': 591, 'subsample': 0.6583365141829998, 'tree_method': 'approx'}.


 19%|█▉        | 58/300 [29:10<1:42:10, 25.33s/it, 1750.26/1800 seconds]

[I 2025-05-27 00:00:27,863] Trial 57 finished with values: [0.2016533464193344, 21.6920907497406] and parameters: {'learning_rate': 0.2895041274105668, 'max_depth': 12, 'n_estimators': 718, 'subsample': 0.6425400116921755, 'tree_method': 'hist'}.


 20%|█▉        | 59/300 [29:19<1:22:49, 20.62s/it, 1759.88/1800 seconds]

[I 2025-05-27 00:00:37,488] Trial 58 finished with values: [0.23619423806667328, 9.619578838348389] and parameters: {'learning_rate': 0.05037736187604336, 'max_depth': 5, 'n_estimators': 542, 'subsample': 0.7801612325049061, 'tree_method': 'hist'}.


 20%|██        | 60/300 [29:26<1:05:31, 16.38s/it, 1766.37/1800 seconds]

[I 2025-05-27 00:00:43,976] Trial 59 finished with values: [0.19266530871391296, 6.4821083545684814] and parameters: {'learning_rate': 0.18333050175611607, 'max_depth': 12, 'n_estimators': 207, 'subsample': 0.8734763118589377, 'tree_method': 'hist'}.


 20%|██        | 61/300 [29:34<55:55, 14.04s/it, 1774.95/1800 seconds]  

[I 2025-05-27 00:00:52,555] Trial 60 finished with values: [0.19246293604373932, 8.57093596458435] and parameters: {'learning_rate': 0.12284641402816608, 'max_depth': 12, 'n_estimators': 265, 'subsample': 0.7046377101997628, 'tree_method': 'hist'}.


 21%|██        | 62/300 [30:00<1:55:12, 29.04s/it, 1800.65/1800 seconds]


[I 2025-05-27 00:01:18,255] Trial 61 finished with values: [0.24867834150791168, 25.69218897819519] and parameters: {'learning_rate': 0.19132571928190817, 'max_depth': 3, 'n_estimators': 1285, 'subsample': 0.7296916537828325, 'tree_method': 'approx'}.
Mean Absolute Error (MAE): 6011.3967715370145
Mean Squared Error (MSE): 86203927.48737396
Root Mean Squared Error (RMSE): 9284.607018467392
R-squared (Coefficient of Determination): 0.8621614432988232
Root Mean Squared Log Error (RMSLE): 0.24227875279852001 - Metric used in competition




In [14]:
vis.plot_pareto_front(study, target_names=["RMSLE", "Training Time"]).show()
