# CatBoost incremental
## 🪛 Tuning

Author: https://github.com/deburky

In [None]:
!uv pip install -e ../.

In [None]:
from pathlib import Path

import pyarrow.dataset as ds
from ray import tune

from catboost_incremental import DataLoader, CatBoostTrainer, CatBoostTuner

dataset_path = str(Path("../data/").resolve()) + "/"

dataset = ds.dataset(dataset_path)
full_df = dataset.to_table().to_pandas()

label = "target"

# Setup DataLoader
data_loader = DataLoader(
    dataset_path,
    chunk_size=10_000,
    partition_id_col="partition_id",
    label_col=label,
)

# Create data_loader and trainer
data_loader = DataLoader(
    dataset_path, chunk_size=10_000, partition_id_col="partition_id", label_col=label
)
trainer = CatBoostTrainer(data_loader=data_loader, label_col=label)

# Generate train_data generator and test set
train_data = trainer.data_loader.read_parquet()
test_df = ds.dataset(dataset_path).to_table().to_pandas()

# Initialize tuner
tuner = CatBoostTuner(trainer=trainer, metric="accuracy")

result = tuner.tune(
    param_space={
        "iterations": tune.choice([50, 100]),
        "learning_rate": tune.uniform(0.01, 0.3),
        "depth": tune.choice([4, 6, 8]),
        "verbose": 0,
        "allow_writing_files": False,
    },
    num_samples=5,
)

print(f"Best config: {result.config}")