In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import polars as pl
import gc
from sklearn.metrics import r2_score
from catboost import CatBoostRegressor, Pool
import seaborn as sns
import joblib
import lightgbm as lgb
from sklearn.inspection import permutation_importance
import os
import wandb
import time

In [2]:
def weighted_r2_metric(y_true, y_pred, weights=None):
    if weights is None:
        weights = np.ones_like(y_true)

    numerator = np.sum(weights * (y_true - y_pred)**2)

    y_weighted_mean = np.sum(weights * y_true) / np.sum(weights)
    denominator = np.sum(weights * (y_true - y_weighted_mean)**2)

    score = 1 - (numerator / denominator)
    return 'weighted_r2', score, True

responder_cols = [f"responder_{col}" for col in range(9)]

In [3]:
df_list = []
for part_in in ["6","7","8","9"]:
    part_id = part_in
    data_dir = f"train.parquet/partition_id={part_id}/part-0.parquet"
    df_list.append(pl.read_parquet(data_dir))
    gc.collect()

In [4]:
train_df = pl.concat(df_list)
del df_list
gc.collect()

0

In [6]:
def create_daily_stats(train_df, target_col):
    daily_avg = (
        train_df
        .group_by(["symbol_id", "date_id"], maintain_order=True)
        .agg([pl.col(target_col).mean().alias("daily_avg"), 
            pl.col(target_col).std().alias("daily_std"),
            pl.col(target_col).min().alias("daily_min"),
            pl.col(target_col).max().alias("daily_max"),
            pl.col(target_col).median().alias("daily_median"),
            pl.col(target_col).skew().alias("daily_skew"),
            pl.col(target_col).kurtosis().alias("daily_kurtosis"),
            pl.col(target_col).last().alias("last_value"),
            pl.col(target_col).first().alias("first_value"),
            (pl.col(target_col).max() - pl.col(target_col).min()).alias("daily_range"),
            pl.col(target_col).sum().alias("target_sum"),
            pl.col(target_col).count().alias("daily_count")
            ]
            )
    )
    daily_avg = daily_avg.sort(["symbol_id", "date_id"])

    columns_to_shift = ["daily_avg", "daily_std", "daily_min", "daily_max", "daily_median", "daily_skew", "daily_kurtosis", "last_value", "first_value",
    "daily_range", "target_sum", "date_id", "daily_count"]

    daily_avg = daily_avg.with_columns([
        pl.col(col_name)
        .shift(1)
        .over("symbol_id")
        .alias(f"lag_1_{col_name}_{target_col}")
        for col_name in columns_to_shift
        ])

    s1 = [f"lag_1_{col_name}_{target_col}" for col_name in columns_to_shift if col_name != "date_id"]

    selected_cols = ["symbol_id","date_id"] + s1
    daily_avg = daily_avg.select(selected_cols)

    train_df = train_df.join(daily_avg,
              on=["symbol_id", "date_id"],
              how="left")

    return train_df

def create_daily_stats2(train_df, target_col):
    daily_avg = (
        train_df
        .group_by(["symbol_id", "date_id"], maintain_order = True)
        .agg([pl.col(target_col).mean().alias("daily_avg"), 
            pl.col(target_col).std().alias("daily_std"),
            pl.col(target_col).min().alias("daily_min"),
            pl.col(target_col).max().alias("daily_max"),
            pl.col(target_col).median().alias("daily_median"),
            pl.col(target_col).last().alias("last_value"),
            (pl.col(target_col).max() - pl.col(target_col).min()).alias("daily_range"),
            pl.col(target_col).sum().alias("target_sum"),
            ]
            )
    )
    daily_avg = daily_avg.sort(["symbol_id", "date_id"])

    columns_to_shift = ["daily_avg", "daily_std", "daily_min", "daily_max", "daily_median", "last_value","daily_range", "target_sum"]

    daily_avg = daily_avg.with_columns([
        pl.col(col_name)
        .shift(1)
        .over("symbol_id")
        .alias(f"lag_1_{col_name}_{target_col}")
        for col_name in columns_to_shift
    ])


    s1 = [f"lag_1_{col_name}_{target_col}" for col_name in columns_to_shift if col_name != "date_id"]

    selected_cols = ["symbol_id","date_id"] + s1
    daily_avg = daily_avg.select(selected_cols)

    train_df = train_df.join(daily_avg,
              on=["symbol_id", "date_id"],
              how="left")

    train_df = train_df.sort(["date_id", "time_id", "symbol_id"])
    gc.collect()
    return train_df


In [7]:
%%time
for col_name in ["responder_6"]:
    train_df = create_daily_stats(train_df, col_name)
gc.collect()

CPU times: user 7.1 s, sys: 2.25 s, total: 9.35 s
Wall time: 605 ms


0

In [8]:
%%time
for col_name in ["responder_0","responder_1","responder_2","responder_3","responder_4","responder_5","responder_7","responder_8"]:
    train_df = create_daily_stats2(train_df, col_name)
gc.collect()

CPU times: user 2min 17s, sys: 1min 57s, total: 4min 15s
Wall time: 16.7 s


0

In [9]:
train_df = train_df.with_columns([
    (2 * np.pi * pl.col("time_id") / 967).sin().alias("sin_time_id").cast(pl.Float32),
    (2 * np.pi * pl.col("time_id") / 967).cos().alias("cos_time_id").cast(pl.Float32),
])
gc.collect()

0

In [10]:
%%time
default_features = [f"feature_{idx:02d}" for idx in range(79)]
train_df = train_df.with_columns(null_count = pl.sum_horizontal([pl.col(col).is_null() for col in default_features]))
gc.collect()

CPU times: user 1.99 s, sys: 4.37 s, total: 6.36 s
Wall time: 525 ms


0

In [11]:
polars_train_df = train_df
train_df = polars_train_df.to_pandas()
del polars_train_df
gc.collect()

0

In [12]:
train_df["symbol_id"] = train_df["symbol_id"].astype("category")
train_df["feature_10"] = train_df["feature_10"].astype("category")
train_df["feature_11"] = train_df["feature_11"].astype("category")
train_df["feature_09"] = train_df["feature_09"].astype("category")
gc.collect()

0

In [13]:
X_train = train_df[train_df["date_id"] < 1550].copy()
X_test = train_df[train_df["date_id"] >= 1550].copy()
del train_df
gc.collect()

0

In [14]:
y_train = X_train["responder_6"].copy()
y_test = X_test["responder_6"].copy()
test_weights = X_test["weight"].values.copy()
train_weights = X_train["weight"].values.copy()
X_train.drop(columns = responder_cols, axis=1, inplace = True, errors = "ignore")
X_test.drop(columns = responder_cols, axis=1, inplace = True, errors = "ignore")
X_train.drop(columns = ["weight"], axis = 1, inplace = True)
X_test.drop(columns = ["weight"], axis = 1, inplace = True)
gc.collect()

0

In [19]:
feature_cols = [col for col in X_train.columns if col not in ["date_id", "weight"]]
len(feature_cols)

174

In [20]:
feature_cols = [col for col in feature_cols if col not in responder_cols]
len(feature_cols)

174

In [16]:
X_train.drop(columns=[col for col in X_train.columns if col not in feature_cols], inplace=True)
X_test.drop(columns=[col for col in X_test.columns if col not in feature_cols], inplace=True)
gc.collect()

0

In [17]:
%%time
short_id = int(time.time()) % 100_000
cb_params = {"random_seed" : 16,
            "iterations" : 1_500,
            "task_type" : "GPU",
            "objective" : "RMSE",
            "eval_metric" : "R2", 
            "cat_features" : ["symbol_id", "feature_09","feature_10","feature_11"],
            "use_best_model" : False,
            "learning_rate" : 0.01,
            "gpu_ram_part" : 0.9,
            "one_hot_max_size" : 64,
            "border_count" : 128,
            "max_depth" : 6}

exp_name = f"cb_run_{short_id}"

train_data = Pool(X_train, y_train, weight = train_weights, cat_features = ["symbol_id", "feature_09","feature_10","feature_11"])
test_data = Pool(X_test, y_test, weight = test_weights, cat_features = ["symbol_id", "feature_09","feature_10","feature_11"])
cb_model = CatBoostRegressor(**cb_params)

wandb.init(
    project="js_catboost", 
    name=exp_name,  
    tags = ["exp", "cb_v21"],
    config={
        "max_depth" : cb_params["max_depth"],
        "random_seed": cb_params["random_seed"],
        "iterations": cb_params["iterations"],
        "objective": cb_params["objective"],
        "one_hot_max_size" : cb_params["one_hot_max_size"],
        "learning_rate" : cb_params["learning_rate"],
        "border_count" : cb_params["border_count"],
        "cat_features" : cb_params["cat_features"],
        "feature_cols" : train_data.get_feature_names(),
    },
)
wandb.run.notes = "Yeni Featurelar ile deneme, border_count 255 deneme"

cb_model.fit(train_data, eval_set = test_data, verbose = 100)

y_pred_cb = np.clip(cb_model.predict(test_data), -5, 5)
y_pred_train_cb = np.clip(cb_model.predict(train_data), -5, 5)

r_score_test = r2_score(test_data.get_label(), y_pred_cb, sample_weight = test_data.get_weight())
r_score_train = r2_score(train_data.get_label(), y_pred_train_cb, sample_weight = train_data.get_weight())
wandb.run.summary["w_test_r2"] = r_score_test
wandb.run.summary["w_train_r2"] = r_score_train
eval_metrics_train = cb_model.eval_metrics(train_data, metrics = ["R2", "RMSE"], eval_period = 100, thread_count = 16)
eval_metrics_test = cb_model.eval_metrics(test_data, metrics = ["R2", "RMSE"], eval_period = 100, thread_count = 16)


for iteration in range(len(eval_metrics_train['R2'])):
    metrics_to_log = {
        "iteration": (iteration) * 100, 
        "train_r2": eval_metrics_train['R2'][iteration],
        "train_rmse": eval_metrics_train['RMSE'][iteration],
        "test_r2": eval_metrics_test['R2'][iteration],
        "test_rmse": eval_metrics_test['RMSE'][iteration],
    }
    
    wandb.log(metrics_to_log)

cb_model.save_model(os.path.join(wandb.run.dir, f"cb_{short_id}.cbm"))

wandb.finish()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mturkenm[0m. Use [1m`wandb login --relogin`[0m to force relogin


Default metric period is 5 because R2 is/are not implemented for GPU
Metric R2 is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.0001346	test: 0.0000680	best: 0.0000680 (0)	total: 299ms	remaining: 7m 28s
100:	learn: 0.0069737	test: 0.0036626	best: 0.0036626 (100)	total: 13.3s	remaining: 3m 4s
200:	learn: 0.0099538	test: 0.0049433	best: 0.0049433 (200)	total: 27s	remaining: 2m 54s
300:	learn: 0.0121170	test: 0.0057043	best: 0.0057043 (300)	total: 41s	remaining: 2m 43s
400:	learn: 0.0137773	test: 0.0063140	best: 0.0063140 (400)	total: 55.3s	remaining: 2m 31s
500:	learn: 0.0152272	test: 0.0067473	best: 0.0067473 (500)	total: 1m 9s	remaining: 2m 19s
600:	learn: 0.0164650	test: 0.0071258	best: 0.0071258 (600)	total: 1m 24s	remaining: 2m 6s
700:	learn: 0.0177102	test: 0.0074460	best: 0.0074460 (700)	total: 1m 39s	remaining: 1m 53s
800:	learn: 0.0191937	test: 0.0076965	best: 0.0076965 (800)	total: 1m 54s	remaining: 1m 39s
900:	learn: 0.0204937	test: 0.0079158	best: 0.0079158 (900)	total: 2m 9s	remaining: 1m 25s
1000:	learn: 0.0217412	test: 0.0080999	best: 0.0080999 (1000)	total: 2m 24s	remaining: 1m 11s
110

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
iteration,▁▁▂▂▃▃▄▄▅▅▆▆▇▇██
test_r2,▁▄▅▆▆▆▇▇▇▇██████
test_rmse,█▅▄▃▃▃▂▂▂▂▁▁▁▁▁▁
train_r2,▁▃▄▄▅▅▅▆▆▆▇▇▇▇██
train_rmse,█▆▅▅▄▄▄▃▃▃▂▂▂▂▁▁

0,1
iteration,1500.0
test_r2,0.00862
test_rmse,0.79855
train_r2,0.02727
train_rmse,0.81823
w_test_r2,0.00862
w_train_r2,0.02727


CPU times: user 10min 43s, sys: 41.6 s, total: 11min 25s
Wall time: 4min 27s


## Catboost Wandb Sweeps

In [23]:
sweep_config = {
    "method": "grid", 
    "metric": {
        "name": "w_test_r2", 
        "goal": "maximize", 
    },
    "parameters": {
        "learning_rate": {
            "values": [0.01, 0.025],
        },
        "max_depth": {
            "values": [5, 6, 7], 
        },
        "border_count": {
            "values": [128, 255], 
        },
        "iterations": {
            "values": [1500],
        },
    },
}

In [24]:
def train():
    wandb.init()

    config = wandb.config

    cb_params = {
        "random_seed": 16,
        "iterations": config.iterations,
        "task_type": "GPU",
        "objective": "RMSE",
        "eval_metric": "R2",
        "cat_features": ["symbol_id", "feature_10"],
        "use_best_model": False,
        "learning_rate": config.learning_rate,
        "gpu_ram_part": 0.9,
        "one_hot_max_size": 64,
        "border_count": config.border_count,
        "max_depth": config.max_depth,
    }

    train_data = Pool(X_train, y_train, weight=train_weights, cat_features=["symbol_id", "feature_10"])
    test_data = Pool(X_test, y_test, weight=test_weights, cat_features=["symbol_id", "feature_10"])

    log_config_update={
        "random_seed": cb_params["random_seed"],
        "objective": cb_params["objective"],
        "one_hot_max_size" : cb_params["one_hot_max_size"],
        "cat_features" : cb_params["cat_features"],
        "feature_cols" : train_data.get_feature_names(),
    }
    wandb.config.update(log_config_update, allow_val_change=True)
    wandb.run.notes = "Data partition 5 eklendi"

    cb_model = CatBoostRegressor(**cb_params)
    cb_model.fit(train_data, eval_set=test_data, verbose=100)

    y_pred_cb = np.clip(cb_model.predict(test_data), -5, 5)
    y_pred_train_cb = np.clip(cb_model.predict(train_data), -5, 5)

    r_score_test = r2_score(test_data.get_label(), y_pred_cb, sample_weight=test_data.get_weight())
    r_score_train = r2_score(train_data.get_label(), y_pred_train_cb, sample_weight=train_data.get_weight())

    wandb.run.summary["w_test_r2"] = r_score_test
    wandb.run.summary["w_train_r2"] = r_score_train

    eval_metrics_train = cb_model.eval_metrics(train_data, metrics=["R2", "RMSE"], eval_period=100, thread_count=16)
    eval_metrics_test = cb_model.eval_metrics(test_data, metrics=["R2", "RMSE"], eval_period=100, thread_count=16)

    for iteration in range(len(eval_metrics_train['R2'])):
        metrics_to_log = {
            "iteration": (iteration) * 100,
            "train_r2": eval_metrics_train['R2'][iteration],
            "train_rmse": eval_metrics_train['RMSE'][iteration],
            "test_r2": eval_metrics_test['R2'][iteration],
            "test_rmse": eval_metrics_test['RMSE'][iteration],
        }
        wandb.log(metrics_to_log)

    cb_model.save_model(os.path.join(wandb.run.dir, f"cb_{wandb.run.name}.cbm"))
    wandb.finish()

In [25]:
sweep_id = wandb.sweep(sweep_config, project="js_catboost")

Create sweep with ID: zevgd9kj
Sweep URL: https://wandb.ai/turkenm/js_catboost/sweeps/zevgd9kj


In [26]:
wandb.agent(sweep_id, function=train)

[34m[1mwandb[0m: Agent Starting Run: b2v9mvbh with config:
[34m[1mwandb[0m: 	border_count: 128
[34m[1mwandb[0m: 	iterations: 1500
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	max_depth: 5
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Default metric period is 5 because R2 is/are not implemented for GPU
Metric R2 is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.0001178	test: 0.0000480	best: 0.0000480 (0)	total: 128ms	remaining: 3m 11s
100:	learn: 0.0067014	test: 0.0031745	best: 0.0031745 (100)	total: 7.64s	remaining: 1m 45s
200:	learn: 0.0097163	test: 0.0044999	best: 0.0044999 (200)	total: 15.4s	remaining: 1m 39s
300:	learn: 0.0117706	test: 0.0052891	best: 0.0052891 (300)	total: 23.4s	remaining: 1m 33s
400:	learn: 0.0136304	test: 0.0057686	best: 0.0057686 (400)	total: 31.5s	remaining: 1m 26s
500:	learn: 0.0153819	test: 0.0061535	best: 0.0061535 (500)	total: 39.9s	remaining: 1m 19s
600:	learn: 0.0170729	test: 0.0064783	best: 0.0064783 (600)	total: 48.1s	remaining: 1m 11s
700:	learn: 0.0185407	test: 0.0068074	best: 0.0068074 (700)	total: 56.5s	remaining: 1m 4s
800:	learn: 0.0198325	test: 0.0070815	best: 0.0070815 (800)	total: 1m 4s	remaining: 56.4s
900:	learn: 0.0211296	test: 0.0072712	best: 0.0072712 (900)	total: 1m 12s	remaining: 48.5s
1000:	learn: 0.0224077	test: 0.0074414	best: 0.0074414 (1000)	total: 1m 21s	remaining: 40.4s
110

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
iteration,▁▁▂▂▃▃▄▄▅▅▆▆▇▇██
test_r2,▁▄▅▆▆▆▇▇▇▇▇█████
test_rmse,█▅▄▃▃▃▂▂▂▂▂▁▁▁▁▁
train_r2,▁▃▃▄▄▅▅▆▆▆▇▇▇▇██
train_rmse,█▆▆▅▅▄▄▃▃▃▃▂▂▂▁▁

0,1
iteration,1500.0
test_r2,0.00802
test_rmse,0.79879
train_r2,0.02844
train_rmse,0.8067
w_test_r2,0.00802
w_train_r2,0.02845


[34m[1mwandb[0m: Agent Starting Run: cqddgdh8 with config:
[34m[1mwandb[0m: 	border_count: 128
[34m[1mwandb[0m: 	iterations: 1500
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	max_depth: 6
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Default metric period is 5 because R2 is/are not implemented for GPU
Metric R2 is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.0001342	test: 0.0000553	best: 0.0000553 (0)	total: 138ms	remaining: 3m 26s
100:	learn: 0.0077813	test: 0.0035332	best: 0.0035332 (100)	total: 9.02s	remaining: 2m 4s
200:	learn: 0.0112530	test: 0.0049044	best: 0.0049044 (200)	total: 17.9s	remaining: 1m 55s
300:	learn: 0.0136534	test: 0.0057004	best: 0.0057004 (300)	total: 27.2s	remaining: 1m 48s
400:	learn: 0.0158238	test: 0.0062296	best: 0.0062296 (400)	total: 36.8s	remaining: 1m 40s
500:	learn: 0.0178607	test: 0.0066880	best: 0.0066880 (500)	total: 46.5s	remaining: 1m 32s
600:	learn: 0.0199499	test: 0.0070248	best: 0.0070248 (600)	total: 56.2s	remaining: 1m 24s
700:	learn: 0.0218726	test: 0.0073259	best: 0.0073259 (700)	total: 1m 6s	remaining: 1m 15s
800:	learn: 0.0236550	test: 0.0075958	best: 0.0075958 (800)	total: 1m 15s	remaining: 1m 6s
900:	learn: 0.0254052	test: 0.0077668	best: 0.0077668 (900)	total: 1m 25s	remaining: 56.9s
1000:	learn: 0.0271439	test: 0.0079469	best: 0.0079469 (1000)	total: 1m 35s	remaining: 47.5s
11

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
iteration,▁▁▂▂▃▃▄▄▅▅▆▆▇▇██
test_r2,▁▄▅▆▆▇▇▇▇▇██████
test_rmse,█▅▄▃▃▂▂▂▂▂▁▁▁▁▁▁
train_r2,▁▃▃▄▄▅▅▅▆▆▇▇▇▇██
train_rmse,█▆▆▅▅▄▄▄▃▃▂▂▂▂▁▁

0,1
iteration,1500.0
test_r2,0.00842
test_rmse,0.79863
train_r2,0.03411
train_rmse,0.80435
w_test_r2,0.00842
w_train_r2,0.03411


[34m[1mwandb[0m: Agent Starting Run: fapj24st with config:
[34m[1mwandb[0m: 	border_count: 128
[34m[1mwandb[0m: 	iterations: 1500
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	max_depth: 7
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Default metric period is 5 because R2 is/are not implemented for GPU
Metric R2 is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.0001517	test: 0.0000637	best: 0.0000637 (0)	total: 148ms	remaining: 3m 41s
100:	learn: 0.0088429	test: 0.0038124	best: 0.0038124 (100)	total: 10s	remaining: 2m 18s
200:	learn: 0.0127244	test: 0.0052915	best: 0.0052915 (200)	total: 20.1s	remaining: 2m 9s
300:	learn: 0.0156356	test: 0.0061182	best: 0.0061182 (300)	total: 30.7s	remaining: 2m 2s
400:	learn: 0.0182021	test: 0.0066678	best: 0.0066678 (400)	total: 41.7s	remaining: 1m 54s
500:	learn: 0.0208021	test: 0.0071077	best: 0.0071077 (500)	total: 52.6s	remaining: 1m 44s
600:	learn: 0.0231200	test: 0.0074667	best: 0.0074667 (600)	total: 1m 3s	remaining: 1m 34s
700:	learn: 0.0255233	test: 0.0077463	best: 0.0077463 (700)	total: 1m 14s	remaining: 1m 24s
800:	learn: 0.0279871	test: 0.0079641	best: 0.0079641 (800)	total: 1m 25s	remaining: 1m 14s
900:	learn: 0.0300211	test: 0.0081700	best: 0.0081700 (900)	total: 1m 36s	remaining: 1m 4s
1000:	learn: 0.0320230	test: 0.0083364	best: 0.0083364 (1000)	total: 1m 47s	remaining: 53.8s
110

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
iteration,▁▁▂▂▃▃▄▄▅▅▆▆▇▇██
test_r2,▁▄▅▆▆▇▇▇▇███████
test_rmse,█▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁
train_r2,▁▂▃▄▄▅▅▅▆▆▇▇▇▇██
train_rmse,█▇▆▅▅▄▄▄▃▃▃▂▂▂▁▁

0,1
iteration,1500.0
test_r2,0.00879
test_rmse,0.79848
train_r2,0.04071
train_rmse,0.80159
w_test_r2,0.00879
w_train_r2,0.04071


[34m[1mwandb[0m: Agent Starting Run: c5uuok0w with config:
[34m[1mwandb[0m: 	border_count: 128
[34m[1mwandb[0m: 	iterations: 1500
[34m[1mwandb[0m: 	learning_rate: 0.025
[34m[1mwandb[0m: 	max_depth: 5
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Default metric period is 5 because R2 is/are not implemented for GPU
Metric R2 is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.0002923	test: 0.0001316	best: 0.0001316 (0)	total: 125ms	remaining: 3m 7s
100:	learn: 0.0108895	test: 0.0049593	best: 0.0049593 (100)	total: 8.03s	remaining: 1m 51s
200:	learn: 0.0154629	test: 0.0061537	best: 0.0061537 (200)	total: 16.3s	remaining: 1m 45s
300:	learn: 0.0191656	test: 0.0069058	best: 0.0069058 (300)	total: 24.6s	remaining: 1m 37s
400:	learn: 0.0221563	test: 0.0074343	best: 0.0074343 (400)	total: 32.9s	remaining: 1m 30s
500:	learn: 0.0257482	test: 0.0077195	best: 0.0077195 (500)	total: 41.2s	remaining: 1m 22s
600:	learn: 0.0285264	test: 0.0078805	best: 0.0078805 (600)	total: 49.7s	remaining: 1m 14s
700:	learn: 0.0310439	test: 0.0079919	best: 0.0080092 (685)	total: 58.1s	remaining: 1m 6s
800:	learn: 0.0330165	test: 0.0080999	best: 0.0081393 (785)	total: 1m 6s	remaining: 58s
900:	learn: 0.0349194	test: 0.0082257	best: 0.0082257 (900)	total: 1m 14s	remaining: 49.5s
1000:	learn: 0.0365044	test: 0.0083012	best: 0.0083012 (1000)	total: 1m 22s	remaining: 40.9s
1100:	

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
iteration,▁▁▂▂▃▃▄▄▅▅▆▆▇▇██
test_r2,▁▅▆▇▇▇██████████
test_rmse,█▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁
train_r2,▁▃▃▄▄▅▆▆▆▇▇▇▇███
train_rmse,█▆▆▅▅▄▄▃▃▂▂▂▂▁▁▁

0,1
iteration,1500.0
test_r2,0.00844
test_rmse,0.79862
train_r2,0.04404
train_rmse,0.8002
w_test_r2,0.00844
w_train_r2,0.04404


[34m[1mwandb[0m: Agent Starting Run: wa5e6ctu with config:
[34m[1mwandb[0m: 	border_count: 128
[34m[1mwandb[0m: 	iterations: 1500
[34m[1mwandb[0m: 	learning_rate: 0.025
[34m[1mwandb[0m: 	max_depth: 6
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Default metric period is 5 because R2 is/are not implemented for GPU
Metric R2 is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.0003330	test: 0.0001498	best: 0.0001498 (0)	total: 132ms	remaining: 3m 18s
100:	learn: 0.0124718	test: 0.0053670	best: 0.0053670 (100)	total: 8.44s	remaining: 1m 56s
200:	learn: 0.0179815	test: 0.0066972	best: 0.0066972 (200)	total: 17.4s	remaining: 1m 52s
300:	learn: 0.0224718	test: 0.0074323	best: 0.0074323 (300)	total: 26.3s	remaining: 1m 44s
400:	learn: 0.0269605	test: 0.0077617	best: 0.0077617 (400)	total: 35.3s	remaining: 1m 36s
500:	learn: 0.0312521	test: 0.0078665	best: 0.0078665 (500)	total: 44.3s	remaining: 1m 28s
600:	learn: 0.0342356	test: 0.0079798	best: 0.0080257 (555)	total: 53.3s	remaining: 1m 19s
700:	learn: 0.0370237	test: 0.0081239	best: 0.0081239 (700)	total: 1m 2s	remaining: 1m 11s
800:	learn: 0.0395930	test: 0.0081970	best: 0.0081970 (800)	total: 1m 11s	remaining: 1m 2s
900:	learn: 0.0420524	test: 0.0082781	best: 0.0082781 (900)	total: 1m 20s	remaining: 53.3s
1000:	learn: 0.0440333	test: 0.0083542	best: 0.0083655 (985)	total: 1m 29s	remaining: 44.5s
11

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
iteration,▁▁▂▂▃▃▄▄▅▅▆▆▇▇██
test_r2,▁▅▇▇▇███████████
test_rmse,█▄▂▂▂▁▁▁▁▁▁▁▁▁▁▁
train_r2,▁▃▃▄▅▅▅▆▆▇▇▇▇███
train_rmse,█▆▆▅▅▄▄▃▃▃▂▂▂▁▁▁

0,1
iteration,1500.0
test_r2,0.00844
test_rmse,0.79862
train_r2,0.05332
train_rmse,0.79631
w_test_r2,0.00844
w_train_r2,0.05333


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 84lhn7ha with config:
[34m[1mwandb[0m: 	border_count: 128
[34m[1mwandb[0m: 	iterations: 1500
[34m[1mwandb[0m: 	learning_rate: 0.025
[34m[1mwandb[0m: 	max_depth: 7
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Default metric period is 5 because R2 is/are not implemented for GPU
Metric R2 is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.0003764	test: 0.0001705	best: 0.0001705 (0)	total: 142ms	remaining: 3m 32s
100:	learn: 0.0142977	test: 0.0057742	best: 0.0057742 (100)	total: 9.46s	remaining: 2m 11s
200:	learn: 0.0209725	test: 0.0071294	best: 0.0071294 (200)	total: 19.6s	remaining: 2m 6s
300:	learn: 0.0268937	test: 0.0077453	best: 0.0077453 (300)	total: 29.7s	remaining: 1m 58s
400:	learn: 0.0324081	test: 0.0081770	best: 0.0081949 (395)	total: 39.8s	remaining: 1m 48s
500:	learn: 0.0371198	test: 0.0083626	best: 0.0083626 (500)	total: 49.8s	remaining: 1m 39s
600:	learn: 0.0408955	test: 0.0085271	best: 0.0085271 (600)	total: 1m	remaining: 1m 29s
700:	learn: 0.0442066	test: 0.0086399	best: 0.0086399 (700)	total: 1m 10s	remaining: 1m 20s
800:	learn: 0.0472689	test: 0.0087243	best: 0.0087344 (755)	total: 1m 20s	remaining: 1m 10s
900:	learn: 0.0501914	test: 0.0087948	best: 0.0088147 (890)	total: 1m 30s	remaining: 1m
1000:	learn: 0.0528851	test: 0.0088584	best: 0.0088622 (995)	total: 1m 40s	remaining: 50.2s
1100:	l

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
iteration,▁▁▂▂▃▃▄▄▅▅▆▆▇▇██
test_r2,▁▅▇▇▇███████████
test_rmse,█▄▂▂▂▁▁▁▁▁▁▁▁▁▁▁
train_r2,▁▃▃▄▄▅▅▆▆▆▇▇▇███
train_rmse,█▇▆▅▅▄▄▃▃▃▂▂▂▂▁▁

0,1
iteration,1500.0
test_r2,0.00892
test_rmse,0.79843
train_r2,0.06472
train_rmse,0.7915
w_test_r2,0.00892
w_train_r2,0.06472


[34m[1mwandb[0m: Agent Starting Run: ta3klyk5 with config:
[34m[1mwandb[0m: 	border_count: 255
[34m[1mwandb[0m: 	iterations: 1500
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	max_depth: 5
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Default metric period is 5 because R2 is/are not implemented for GPU
Metric R2 is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.0001187	test: 0.0000517	best: 0.0000517 (0)	total: 140ms	remaining: 3m 30s
100:	learn: 0.0067009	test: 0.0031974	best: 0.0031974 (100)	total: 7.64s	remaining: 1m 45s
200:	learn: 0.0097643	test: 0.0045255	best: 0.0045255 (200)	total: 15.4s	remaining: 1m 39s
300:	learn: 0.0117866	test: 0.0052526	best: 0.0052526 (300)	total: 23.4s	remaining: 1m 33s
400:	learn: 0.0135970	test: 0.0057215	best: 0.0057215 (400)	total: 31.5s	remaining: 1m 26s
500:	learn: 0.0153564	test: 0.0061586	best: 0.0061586 (500)	total: 39.6s	remaining: 1m 19s
600:	learn: 0.0169960	test: 0.0064685	best: 0.0064685 (600)	total: 47.7s	remaining: 1m 11s
700:	learn: 0.0183781	test: 0.0067580	best: 0.0067580 (700)	total: 55.8s	remaining: 1m 3s
800:	learn: 0.0196832	test: 0.0070340	best: 0.0070340 (800)	total: 1m 3s	remaining: 55.8s
900:	learn: 0.0209493	test: 0.0072718	best: 0.0072718 (900)	total: 1m 12s	remaining: 47.9s
1000:	learn: 0.0223797	test: 0.0074285	best: 0.0074285 (1000)	total: 1m 20s	remaining: 39.9s
110

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
iteration,▁▁▂▂▃▃▄▄▅▅▆▆▇▇██
test_r2,▁▄▅▆▆▆▇▇▇▇██████
test_rmse,█▅▄▃▃▃▂▂▂▂▁▁▁▁▁▁
train_r2,▁▃▃▄▄▅▅▅▆▆▆▇▇▇██
train_rmse,█▆▆▅▅▄▄▄▃▃▃▂▂▂▁▁

0,1
iteration,1500.0
test_r2,0.00795
test_rmse,0.79882
train_r2,0.02861
train_rmse,0.80664
w_test_r2,0.00795
w_train_r2,0.02861


[34m[1mwandb[0m: Agent Starting Run: ranqcyzh with config:
[34m[1mwandb[0m: 	border_count: 255
[34m[1mwandb[0m: 	iterations: 1500
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	max_depth: 6
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Default metric period is 5 because R2 is/are not implemented for GPU
Metric R2 is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.0001339	test: 0.0000509	best: 0.0000509 (0)	total: 143ms	remaining: 3m 34s
100:	learn: 0.0077483	test: 0.0035245	best: 0.0035245 (100)	total: 8.63s	remaining: 1m 59s
200:	learn: 0.0112610	test: 0.0049044	best: 0.0049044 (200)	total: 17.3s	remaining: 1m 51s
300:	learn: 0.0136835	test: 0.0056876	best: 0.0056876 (300)	total: 26.4s	remaining: 1m 45s
400:	learn: 0.0158416	test: 0.0061654	best: 0.0061654 (400)	total: 35.7s	remaining: 1m 37s
500:	learn: 0.0179437	test: 0.0066106	best: 0.0066106 (500)	total: 45.2s	remaining: 1m 30s
600:	learn: 0.0199559	test: 0.0069530	best: 0.0069530 (600)	total: 54.6s	remaining: 1m 21s
700:	learn: 0.0218699	test: 0.0072819	best: 0.0072819 (700)	total: 1m 3s	remaining: 1m 12s
800:	learn: 0.0237028	test: 0.0075350	best: 0.0075350 (800)	total: 1m 13s	remaining: 1m 3s
900:	learn: 0.0255295	test: 0.0077427	best: 0.0077427 (900)	total: 1m 22s	remaining: 54.9s
1000:	learn: 0.0271343	test: 0.0079335	best: 0.0079335 (1000)	total: 1m 32s	remaining: 45.9s
1

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
iteration,▁▁▂▂▃▃▄▄▅▅▆▆▇▇██
test_r2,▁▄▅▆▆▇▇▇▇███████
test_rmse,█▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁
train_r2,▁▃▃▄▄▅▅▅▆▆▇▇▇▇██
train_rmse,█▆▆▅▅▄▄▄▃▃▂▂▂▂▁▁

0,1
iteration,1500.0
test_r2,0.00833
test_rmse,0.79867
train_r2,0.03442
train_rmse,0.80422
w_test_r2,0.00833
w_train_r2,0.03442


[34m[1mwandb[0m: Agent Starting Run: vl4drb27 with config:
[34m[1mwandb[0m: 	border_count: 255
[34m[1mwandb[0m: 	iterations: 1500
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	max_depth: 7
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Default metric period is 5 because R2 is/are not implemented for GPU
Metric R2 is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.0001534	test: 0.0000598	best: 0.0000598 (0)	total: 155ms	remaining: 3m 52s
100:	learn: 0.0088105	test: 0.0038505	best: 0.0038505 (100)	total: 9.76s	remaining: 2m 15s
200:	learn: 0.0127524	test: 0.0052818	best: 0.0052818 (200)	total: 19.6s	remaining: 2m 6s
300:	learn: 0.0157741	test: 0.0060925	best: 0.0060925 (300)	total: 30s	remaining: 1m 59s
400:	learn: 0.0185576	test: 0.0066236	best: 0.0066236 (400)	total: 40.7s	remaining: 1m 51s
500:	learn: 0.0209205	test: 0.0070619	best: 0.0070619 (500)	total: 51.3s	remaining: 1m 42s
600:	learn: 0.0233916	test: 0.0074301	best: 0.0074301 (600)	total: 1m 1s	remaining: 1m 32s
700:	learn: 0.0258333	test: 0.0076981	best: 0.0076981 (700)	total: 1m 12s	remaining: 1m 22s
800:	learn: 0.0283626	test: 0.0079295	best: 0.0079295 (800)	total: 1m 23s	remaining: 1m 12s
900:	learn: 0.0303614	test: 0.0081067	best: 0.0081067 (900)	total: 1m 33s	remaining: 1m 2s
1000:	learn: 0.0322599	test: 0.0082539	best: 0.0082539 (1000)	total: 1m 44s	remaining: 52.1s
11

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
iteration,▁▁▂▂▃▃▄▄▅▅▆▆▇▇██
test_r2,▁▄▅▆▆▇▇▇▇███████
test_rmse,█▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁
train_r2,▁▂▃▄▄▅▅▅▆▆▇▇▇▇██
train_rmse,█▇▆▅▅▄▄▄▃▃▂▂▂▂▁▁

0,1
iteration,1500.0
test_r2,0.00868
test_rmse,0.79852
train_r2,0.04071
train_rmse,0.80159
w_test_r2,0.00868
w_train_r2,0.04071


[34m[1mwandb[0m: Agent Starting Run: 2bce568r with config:
[34m[1mwandb[0m: 	border_count: 255
[34m[1mwandb[0m: 	iterations: 1500
[34m[1mwandb[0m: 	learning_rate: 0.025
[34m[1mwandb[0m: 	max_depth: 5
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Default metric period is 5 because R2 is/are not implemented for GPU
Metric R2 is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.0002945	test: 0.0001410	best: 0.0001410 (0)	total: 139ms	remaining: 3m 27s
100:	learn: 0.0108862	test: 0.0049367	best: 0.0049367 (100)	total: 7.79s	remaining: 1m 47s
200:	learn: 0.0153875	test: 0.0060757	best: 0.0060757 (200)	total: 15.8s	remaining: 1m 42s
300:	learn: 0.0190579	test: 0.0068897	best: 0.0068897 (300)	total: 23.7s	remaining: 1m 34s
400:	learn: 0.0222853	test: 0.0074394	best: 0.0074394 (400)	total: 31.8s	remaining: 1m 27s
500:	learn: 0.0256481	test: 0.0077413	best: 0.0077413 (500)	total: 39.8s	remaining: 1m 19s
600:	learn: 0.0282675	test: 0.0080579	best: 0.0080579 (600)	total: 47.9s	remaining: 1m 11s
700:	learn: 0.0309902	test: 0.0082626	best: 0.0082626 (700)	total: 56.2s	remaining: 1m 4s
800:	learn: 0.0330739	test: 0.0083563	best: 0.0083563 (800)	total: 1m 4s	remaining: 56.1s
900:	learn: 0.0351299	test: 0.0084133	best: 0.0084198 (885)	total: 1m 12s	remaining: 48.1s
1000:	learn: 0.0368797	test: 0.0084893	best: 0.0084893 (1000)	total: 1m 20s	remaining: 40.1s
110

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
iteration,▁▁▂▂▃▃▄▄▅▅▆▆▇▇██
test_r2,▁▅▆▇▇▇██████████
test_rmse,█▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁
train_r2,▁▃▃▄▅▅▅▆▆▇▇▇▇███
train_rmse,█▆▆▅▅▄▄▃▃▂▂▂▂▁▁▁

0,1
iteration,1500.0
test_r2,0.00867
test_rmse,0.79853
train_r2,0.04425
train_rmse,0.80011
w_test_r2,0.00867
w_train_r2,0.04426


[34m[1mwandb[0m: Agent Starting Run: g9ln3x8l with config:
[34m[1mwandb[0m: 	border_count: 255
[34m[1mwandb[0m: 	iterations: 1500
[34m[1mwandb[0m: 	learning_rate: 0.025
[34m[1mwandb[0m: 	max_depth: 6
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Default metric period is 5 because R2 is/are not implemented for GPU
Metric R2 is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.0003322	test: 0.0001387	best: 0.0001387 (0)	total: 144ms	remaining: 3m 36s
100:	learn: 0.0124832	test: 0.0053941	best: 0.0053941 (100)	total: 8.85s	remaining: 2m 2s
200:	learn: 0.0179682	test: 0.0066084	best: 0.0066084 (200)	total: 18.2s	remaining: 1m 57s
300:	learn: 0.0228428	test: 0.0073966	best: 0.0073966 (300)	total: 27.6s	remaining: 1m 49s
400:	learn: 0.0274386	test: 0.0077850	best: 0.0077850 (400)	total: 36.9s	remaining: 1m 41s
500:	learn: 0.0311070	test: 0.0079329	best: 0.0079329 (500)	total: 46.3s	remaining: 1m 32s
600:	learn: 0.0344258	test: 0.0081543	best: 0.0081808 (585)	total: 55.6s	remaining: 1m 23s
700:	learn: 0.0372595	test: 0.0082524	best: 0.0082532 (695)	total: 1m 4s	remaining: 1m 14s
800:	learn: 0.0397836	test: 0.0082515	best: 0.0082649 (705)	total: 1m 14s	remaining: 1m 4s
900:	learn: 0.0421575	test: 0.0083955	best: 0.0083955 (900)	total: 1m 23s	remaining: 55.6s
1000:	learn: 0.0441338	test: 0.0084752	best: 0.0084925 (995)	total: 1m 33s	remaining: 46.4s
110

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
iteration,▁▁▂▂▃▃▄▄▅▅▆▆▇▇██
test_r2,▁▅▆▇▇███████████
test_rmse,█▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁
train_r2,▁▃▃▄▅▅▅▆▆▇▇▇▇███
train_rmse,█▆▆▅▄▄▄▃▃▂▂▂▂▁▁▁

0,1
iteration,1500.0
test_r2,0.00851
test_rmse,0.79859
train_r2,0.0534
train_rmse,0.79627
w_test_r2,0.00851
w_train_r2,0.05341


[34m[1mwandb[0m: Agent Starting Run: s0twatx6 with config:
[34m[1mwandb[0m: 	border_count: 255
[34m[1mwandb[0m: 	iterations: 1500
[34m[1mwandb[0m: 	learning_rate: 0.025
[34m[1mwandb[0m: 	max_depth: 7
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Default metric period is 5 because R2 is/are not implemented for GPU
Metric R2 is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.0003805	test: 0.0001609	best: 0.0001609 (0)	total: 157ms	remaining: 3m 55s
100:	learn: 0.0144187	test: 0.0057204	best: 0.0057204 (100)	total: 9.99s	remaining: 2m 18s
200:	learn: 0.0212640	test: 0.0070248	best: 0.0070248 (200)	total: 20.7s	remaining: 2m 13s
300:	learn: 0.0272199	test: 0.0076743	best: 0.0076743 (300)	total: 31.3s	remaining: 2m 4s
400:	learn: 0.0323261	test: 0.0080597	best: 0.0080597 (400)	total: 41.8s	remaining: 1m 54s
500:	learn: 0.0367515	test: 0.0082351	best: 0.0082351 (500)	total: 52.4s	remaining: 1m 44s
600:	learn: 0.0406881	test: 0.0084805	best: 0.0084805 (600)	total: 1m 3s	remaining: 1m 34s
700:	learn: 0.0441504	test: 0.0085459	best: 0.0085459 (700)	total: 1m 13s	remaining: 1m 24s
800:	learn: 0.0471817	test: 0.0085452	best: 0.0085592 (705)	total: 1m 24s	remaining: 1m 13s
900:	learn: 0.0502010	test: 0.0086532	best: 0.0086532 (900)	total: 1m 34s	remaining: 1m 3s
1000:	learn: 0.0528521	test: 0.0087085	best: 0.0087085 (1000)	total: 1m 45s	remaining: 52.6s


VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
iteration,▁▁▂▂▃▃▄▄▅▅▆▆▇▇██
test_r2,▁▅▆▇▇▇██████████
test_rmse,█▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁
train_r2,▁▃▃▄▄▅▅▆▆▆▇▇▇███
train_rmse,█▆▆▅▅▄▄▃▃▃▂▂▂▂▁▁

0,1
iteration,1500.0
test_r2,0.009
test_rmse,0.7984
train_r2,0.0648
train_rmse,0.79147
w_test_r2,0.009
w_train_r2,0.0648


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.


# CB Inference Training

In [18]:
api = wandb.Api()

run = api.run("turkenm/js_catboost/6evp7lfh")

config = run.config
cb_feature_cols = config["feature_cols"]

In [26]:
cb_version = "v18"
joblib.dump(feature_cols, f"model_cols/cb_{cb_version}_model_cols.pkl")
len(feature_cols)

122

In [19]:
len(cb_feature_cols)

122

In [20]:
%%time
cb_version = "v20"
short_id = int(time.time()) % 100_000
cb_params = {"random_seed" : 16,
            "iterations" : 1_500,
            "task_type" : "GPU",
            "objective" : "RMSE",
            "eval_metric" : "R2", 
            "cat_features" : ["symbol_id", "feature_09","feature_10","feature_11"],
            "use_best_model" : False,
            "learning_rate" : 0.01,
            "gpu_ram_part" : 0.9,
            "one_hot_max_size" : 64,
            "border_count" : 128,
            "max_depth" : 6}

exp_name = f"cb_run_{short_id}"

inference_data = Pool(train_df[cb_feature_cols], train_df["responder_6"], weight = train_df["weight"], cat_features = ["symbol_id", "feature_09","feature_10","feature_11"])
cb_model_inference = CatBoostRegressor(**cb_params)

wandb.init(
    project="js_catboost",
    name=exp_name,
    tags = ["inference", f"cb_{cb_version}"],
    config={
        "max_depth" : cb_params["max_depth"],
        "random_seed": cb_params["random_seed"],
        "iterations": cb_params["iterations"],
        "objective": cb_params["objective"],
        "one_hot_max_size" : cb_params["one_hot_max_size"],
        "learning_rate" : cb_params["learning_rate"],
        "border_count" : cb_params["border_count"],
        "cat_features" : cb_params["cat_features"],
        "feature_cols" : inference_data.get_feature_names(),
    },
)
wandb.run.notes = "new features, inference"

cb_model_inference.fit(inference_data, verbose = 100)

y_pred_cb = np.clip(cb_model_inference.predict(inference_data), -5, 5)

r_score_train = r2_score(inference_data.get_label(), y_pred_cb, sample_weight = inference_data.get_weight())

wandb.run.summary["w_train_r2"] = r_score_train

eval_metrics_train = cb_model_inference.eval_metrics(inference_data, metrics = ["R2", "RMSE"], eval_period = 100, thread_count = 16)


for iteration in range(len(eval_metrics_train['R2'])): 
    metrics_to_log = {
        "iteration": (iteration) * 100,
        "train_r2": eval_metrics_train['R2'][iteration],
        "train_rmse": eval_metrics_train['RMSE'][iteration],
    }
    
    wandb.log(metrics_to_log)

cb_model_inference.save_model(os.path.join(wandb.run.dir, f"cb_{short_id}.cbm"))
cb_model_inference.save_model(f"cb_model_results/cb_{cb_version}.cbm")
joblib.dump(cb_feature_cols, f"model_cols/cb_{cb_version}_model_cols.pkl")
wandb.finish()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mturkenm[0m. Use [1m`wandb login --relogin`[0m to force relogin


Default metric period is 5 because R2 is/are not implemented for GPU
Metric R2 is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.0001184	total: 346ms	remaining: 20m 10s
100:	learn: 0.0061118	total: 16.6s	remaining: 9m 19s
200:	learn: 0.0085705	total: 33.8s	remaining: 9m 15s
300:	learn: 0.0104072	total: 51.9s	remaining: 9m 11s
400:	learn: 0.0118879	total: 1m 9s	remaining: 8m 57s
500:	learn: 0.0131861	total: 1m 28s	remaining: 8m 47s
600:	learn: 0.0144383	total: 1m 46s	remaining: 8m 34s
700:	learn: 0.0156167	total: 2m 5s	remaining: 8m 20s
800:	learn: 0.0166911	total: 2m 23s	remaining: 8m 4s
900:	learn: 0.0178537	total: 2m 41s	remaining: 7m 46s
1000:	learn: 0.0189673	total: 3m	remaining: 7m 30s
1100:	learn: 0.0200166	total: 3m 18s	remaining: 7m 12s
1200:	learn: 0.0210992	total: 3m 36s	remaining: 6m 55s
1300:	learn: 0.0220069	total: 3m 55s	remaining: 6m 37s
1400:	learn: 0.0229765	total: 4m 13s	remaining: 6m 19s
1500:	learn: 0.0239747	total: 4m 31s	remaining: 6m 2s
1600:	learn: 0.0249125	total: 4m 50s	remaining: 5m 44s
1700:	learn: 0.0257455	total: 5m 8s	remaining: 5m 26s
1800:	learn: 0.0266035	total: 5m 2

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
iteration,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
train_r2,▁▂▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇██████
train_rmse,█▇▆▆▆▆▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁

0,1
iteration,3500.0
train_r2,0.03748
train_rmse,0.80743
w_train_r2,0.03748


CPU times: user 22min 36s, sys: 1min 29s, total: 24min 6s
Wall time: 11min 57s
