## Install Dependencies

In [None]:
# Install required packages if not already available
!pip install optuna lightgbm xgboost catboost scikit-learn pandas numpy pyyaml -q

## Import Standard Libraries

In [None]:
import optuna
from optuna.samplers import TPESampler
import pandas as pd
import numpy as np
import lightgbm as lgb
import xgboost as xgb
import catboost as cat
from catboost import Pool
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
from typing import List, Optional, Tuple, Callable, Union, Any, Dict
from dataclasses import dataclass
from abc import ABC, abstractmethod
from copy import deepcopy
from enum import Enum
import yaml

## Configuration Classes

In [None]:
@dataclass
class DatasetConfig:
    """Configuration for dataset column information"""
    training_col_names: List[str]
    target_col_name: str
    numerical_col_names: List[str]
    categorical_col_names: List[str]
    name: Optional[str] = None

## Base Classes

In [None]:
class BaseKtoolsModel(ABC):
    """Base class for all models"""
    def __init__(self) -> None:
        self._fitted = False
        self.model = None

    @abstractmethod
    def fit(
        self,
        X,
        y,
        validation_set: Optional[Tuple] = None,
        weights: Optional[np.ndarray] = None,
    ) -> "BaseKtoolsModel":
        pass

    @abstractmethod
    def predict(self, X) -> np.ndarray:
        pass

    @property
    def fitted(self) -> bool:
        return self._fitted


class BasePreprocessor(ABC):
    """Base class for all preprocessors"""
    name = "base-preprocessor"

    def __init__(self, config: DatasetConfig):
        self._fitted = False
        self.config = config

    @abstractmethod
    def fit(self, data: pd.DataFrame) -> "BasePreprocessor":
        pass

    @abstractmethod
    def transform(self, data: pd.DataFrame) -> pd.DataFrame:
        pass

    def fit_transform(self, data: pd.DataFrame) -> pd.DataFrame:
        return self.fit(data).transform(data)

    @property
    def fitted(self) -> bool:
        return self._fitted

## Utility Functions

In [None]:
def infer_task(y: Union[np.ndarray, pd.Series]) -> str:
    """Infer the task type from target values"""
    if isinstance(y, pd.Series):
        y = y.to_numpy()
    y = y.flatten()

    nuniques = np.unique(y).shape[0]
    has_floats = np.any(y % 1 != 0)

    if has_floats:
        print("Target contains float values. Inferring regression task.")
        return "regression"
    elif nuniques == 2:
        print("Target contains two unique values. Inferring binary classification task.")
        return "binary_classification"
    elif nuniques > 2:
        print("Target contains more than two unique values. Inferring multiclass classification task.")
        return "multiclass_classification"

    raise ValueError(
        "Unable to infer task type from target values. Is there only one target value?"
    )


def load_optuna_grid(path: str, model_type: str) -> Callable:
    """Load parameter grid from YAML file and create getter function"""
    with open(path, "r") as f:
        param_grid_all = yaml.safe_load(f)
    param_grid = param_grid_all.get(model_type, {})
    if len(param_grid) == 0:
        raise ValueError(f"No parameter grid found for model type: {model_type}")

    def param_grid_getter(trial: optuna.Trial) -> Dict:
        unpacked = {}
        for param_name, param_info in param_grid.items():
            dtype = param_info.get("type")
            if dtype == "int":
                unpacked[param_name] = trial.suggest_int(
                    param_name,
                    param_info["low"],
                    param_info["high"],
                )
            elif dtype == "float":
                unpacked[param_name] = trial.suggest_float(
                    param_name,
                    param_info["low"],
                    param_info["high"],
                    log=param_info.get("log", False),
                )
            elif dtype == "categorical":
                unpacked[param_name] = trial.suggest_categorical(
                    param_name,
                    param_info["choices"],
                )
            elif dtype == "fixed":
                unpacked[param_name] = param_info["value"]
            else:
                raise ValueError(f"Unsupported parameter type: {dtype}")
        return unpacked
    
    return param_grid_getter

## Preprocessing Classes

In [None]:
class CategoricalEncoder(BasePreprocessor):
    """Encoder for categorical features"""
    name = "categorical-encoder"

    def __init__(
        self,
        config: DatasetConfig,
        handle_unknown: str = "use_encoded_value",
        unknown_value: int = -2,
        encoded_missing_value: int = -1,
        **encoder_kwargs,
    ) -> None:
        super().__init__(config)
        self.encode_missing_value = encoded_missing_value
        self.encoder = OrdinalEncoder(
            handle_unknown=handle_unknown,
            unknown_value=unknown_value,
            encoded_missing_value=encoded_missing_value,
            **encoder_kwargs,
        )

    def fit(self, data: pd.DataFrame) -> "CategoricalEncoder":
        self.encoder.fit(data[self.config.categorical_col_names])
        self._fitted = True
        return self

    def transform(self, data: pd.DataFrame) -> pd.DataFrame:
        copy = data.copy()
        mask = copy[self.config.categorical_col_names].isna()
        copy[self.config.categorical_col_names] = self.encoder.transform(
            copy[self.config.categorical_col_names]
        ).astype(int)
        copy[self.config.categorical_col_names] = (
            copy[self.config.categorical_col_names]
            .where(~mask, self.encode_missing_value)
            .astype("category")
        )
        return copy


class StandardScale(BasePreprocessor):
    """Standard scaler for numerical features"""
    name = "standard-scaler"

    def __init__(self, config: DatasetConfig) -> None:
        super().__init__(config)
        self.scaler = StandardScaler()

    def fit(self, data: pd.DataFrame) -> "StandardScale":
        self.scaler.fit(data[self.config.numerical_col_names])
        self._fitted = True
        return self

    def transform(self, data: pd.DataFrame) -> pd.DataFrame:
        copy = data.copy()
        copy[self.config.numerical_col_names] = self.scaler.transform(
            copy[self.config.numerical_col_names]
        )
        return copy


class PreprocessingPipeline:
    """Pipeline for preprocessing steps"""
    def __init__(self, preprocessors: List[BasePreprocessor]) -> None:
        self.preprocessors = preprocessors

    def train_pipe(self, data: pd.DataFrame) -> pd.DataFrame:
        for preprocessor in self.preprocessors:
            data = preprocessor.fit_transform(data)
        return data

    def inference_pipe(self, data: pd.DataFrame) -> pd.DataFrame:
        for preprocessor in self.preprocessors:
            data = preprocessor.transform(data)
        return data

## Model Classes

In [None]:
class DefaultObjective(Enum):
    """Default objectives for different tasks"""
    regression = "regression"
    binary_classification = "binary"
    multiclass_classification = "multiclass"


class LGBMModel(BaseKtoolsModel):
    """LightGBM model wrapper"""
    def __init__(
        self,
        num_boost_round: int = 100,
        early_stopping_rounds: Union[int, None] = 20,
        random_state: int = 129,
        verbose: int = -1,
        n_jobs: int = 1,
        callbacks: List[Any] = [],
        **lgb_param_grid,
    ) -> None:
        super().__init__()
        self._num_boost_round = num_boost_round
        self._verbose = verbose
        self._n_jobs = n_jobs
        self._callbacks = callbacks
        self.early_stopping_rounds = early_stopping_rounds

        self._lgb_param_grid = {
            "verbose": verbose,
            "random_state": random_state,
            "n_jobs": n_jobs,
            **lgb_param_grid,
        }

    def fit(
        self,
        X,
        y,
        validation_set: Optional[Tuple] = None,
        weights: Optional[np.ndarray] = None,
    ) -> "LGBMModel":
        if "objective" not in self._lgb_param_grid:
            task_id = infer_task(y)
            self._lgb_param_grid["objective"] = DefaultObjective[task_id].value
            if task_id == "multiclass_classification":
                self._lgb_param_grid["num_class"] = np.unique(y).shape[0]

        train_data = lgb.Dataset(X, label=y, weight=weights)
        eval_sets = [train_data]
        eval_names = ["train"]
        if validation_set is not None:
            X_val, y_val = validation_set
            val_data = lgb.Dataset(X_val, label=y_val, reference=train_data)
            eval_sets += [val_data]
            eval_names += ["valid"]
            self._lgb_param_grid["early_stopping_rounds"] = self.early_stopping_rounds

        train_params = {
            "params": self._lgb_param_grid,
            "train_set": train_data,
            "num_boost_round": self._num_boost_round,
            "valid_sets": eval_sets,
            "valid_names": eval_names,
            "callbacks": self._callbacks,
        }

        self.model = lgb.train(**train_params)
        self._fitted = True
        return self

    def predict(self, X) -> np.ndarray:
        y_pred = self.model.predict(X)
        return y_pred

In [None]:
class XGBoostModel(BaseKtoolsModel):
    """XGBoost model wrapper"""
    class DefaultObjective(Enum):
        regression = "reg:squarederror"
        binary_classification = "binary:logistic"
        multiclass_classification = "multi:softprob"

    def __init__(
        self,
        eval_verbosity: bool = False,
        num_boost_round: int = 100,
        early_stopping_rounds: Union[int, None] = 20,
        random_state: int = 129,
        verbosity: int = 0,
        n_jobs: int = 1,
        **xgb_param_grid,
    ) -> None:
        super().__init__()
        self._eval_verbosity = eval_verbosity
        self._num_boost_round = num_boost_round
        self._verbosity = verbosity
        self._n_jobs = n_jobs
        self._early_stopping_rounds = early_stopping_rounds

        self._xgb_param_grid = {
            "verbosity": verbosity,
            "random_state": random_state,
            "n_jobs": n_jobs,
            **xgb_param_grid,
        }

    def fit(
        self,
        X,
        y,
        validation_set: Optional[Tuple] = None,
        weights: Optional[np.ndarray] = None,
    ) -> "XGBoostModel":
        train_params = {}
        if "objective" not in self._xgb_param_grid:
            task_id = infer_task(y)
            self._xgb_param_grid["objective"] = self.DefaultObjective[task_id].value
            if task_id == "multiclass_classification":
                self._xgb_param_grid["num_class"] = np.unique(y).shape[0]

        train_data = xgb.DMatrix(X, label=y, enable_categorical=True, weight=weights)
        eval_data = [(train_data, "train")]
        if validation_set is not None:
            X_val, y_val = validation_set
            valid_data = xgb.DMatrix(X_val, label=y_val, enable_categorical=True)
            eval_data += [(valid_data, "eval")]
            train_params["early_stopping_rounds"] = self._early_stopping_rounds

        train_params = {
            "params": self._xgb_param_grid,
            "dtrain": train_data,
            "evals": eval_data,
            "num_boost_round": self._num_boost_round,
            "verbose_eval": self._eval_verbosity,
            **train_params,
        }

        self.model = xgb.train(**train_params)
        self._fitted = True
        return self

    def predict(self, X) -> np.ndarray:
        test_data = xgb.DMatrix(X, enable_categorical=True)
        y_pred = self.model.predict(test_data)
        return y_pred


class CatBoostModel(BaseKtoolsModel):
    """CatBoost model wrapper"""
    class DefaultObjective(Enum):
        regression = "RMSE"
        binary_classification = "Logloss"
        multiclass_classification = "MultiClass"

    def __init__(
        self,
        num_boost_round: int = 100,
        early_stopping_rounds: Optional[int] = 20,
        random_state: int = 129,
        verbose: bool = False,
        allow_writing_files: bool = False,
        **catboost_params,
    ) -> None:
        super().__init__()
        self.model: Union[cat.CatBoost, None] = None
        self._task: bool = False
        self._num_boost_round = num_boost_round
        self._verbose = verbose
        self._allow_writing_files = allow_writing_files
        self._early_stopping_rounds = early_stopping_rounds

        self._catboost_params = {
            "random_seed": random_state,
            "verbose": verbose,
            "allow_writing_files": allow_writing_files,
            **catboost_params,
        }

    def fit(
        self,
        X,
        y,
        validation_set: Optional[Tuple] = None,
        weights: Optional[np.ndarray] = None,
    ) -> "CatBoostModel":
        task_id = infer_task(y)
        self._task = task_id
        if "loss_function" not in self._catboost_params:
            self._catboost_params["loss_function"] = self.DefaultObjective[task_id].value

        self.cat_col_names = (
            [col for col in X.columns if X[col].dtype == "category"]
            if isinstance(X, pd.DataFrame)
            else []
        )
        train_params: Dict[Any, Any] = {"eval_set": None}
        train_pool = Pool(
            data=X, label=y, cat_features=self.cat_col_names, weight=weights
        )
        if validation_set is not None:
            X_val, y_val = validation_set
            train_params["eval_set"] = Pool(
                data=X_val, label=y_val, cat_features=self.cat_col_names
            )
            train_params["early_stopping_rounds"] = self._early_stopping_rounds

        train_params = {
            "params": self._catboost_params,
            "dtrain": train_pool,
            "num_boost_round": self._num_boost_round,
            **train_params,
        }
        self.model = cat.train(**train_params)
        self._fitted = True
        return self

    def predict(self, X) -> np.ndarray:
        if self.model is None:
            raise ValueError("Model is not fitted yet. Please call 'fit' first.")
        test_pool = Pool(data=X, cat_features=self.cat_col_names)
        if self._task == "binary_classification":
            y_pred = self.model.predict(test_pool, prediction_type="Probability")[:, 1]
        elif self._task == "multiclass_classification":
            y_pred = self.model.predict(test_pool, prediction_type="Probability")
        else:
            y_pred = self.model.predict(test_pool)
        return y_pred

## Model Pipeline

In [None]:
class ModelPipeline:
    """Pipeline for model training with preprocessing"""
    def __init__(
        self,
        model: BaseKtoolsModel,
        config: DatasetConfig,
        preprocessor: PreprocessingPipeline = PreprocessingPipeline([]),
    ) -> None:
        self.model = model
        self.config = config
        self.preprocessor = preprocessor

    def fit(
        self,
        train_data: pd.DataFrame,
        validation_data: Optional[pd.DataFrame] = None,
        weights: Optional[Union[pd.Series, np.ndarray]] = None,
    ) -> "ModelPipeline":
        train_data = self.preprocessor.train_pipe(train_data)
        X_train = train_data.drop(columns=[self.config.target_col_name])
        y_train = train_data[self.config.target_col_name]

        if validation_data is not None:
            validation_data = self.preprocessor.inference_pipe(validation_data)
            X_valid = validation_data.drop(columns=[self.config.target_col_name])
            y_valid = validation_data[self.config.target_col_name]
            validation_data = (X_valid, y_valid)

        self.model.fit(
            X=X_train, y=y_train, validation_set=validation_data, weights=weights
        )
        return self

    def predict(self, data: pd.DataFrame) -> np.ndarray:
        data = self.preprocessor.inference_pipe(data)
        X_test = data[self.config.training_col_names]
        return self.model.predict(X_test)

## Cross-Validation Executor

In [None]:
class CrossValidationExecutor:
    """Execute cross-validation with model pipeline"""
    def __init__(
        self,
        config: DatasetConfig,
        model_pipeline: ModelPipeline,
        evaluation_metric: Callable,
        kfold_object,
    ) -> None:
        self.config = config
        self.model_pipeline = model_pipeline
        self._evaluation_metric = evaluation_metric
        self._splitter = kfold_object
        self._num_splits = kfold_object.get_n_splits()

    def run(
        self,
        train_data: pd.DataFrame,
        weights: Optional[np.ndarray] = None,
        val_data: Optional[pd.DataFrame] = None,
        test_data: Optional[pd.DataFrame] = None,
        groups=None,
        additional_data: Optional[pd.DataFrame] = None,
    ) -> Tuple[float, np.ndarray, List[ModelPipeline], np.ndarray]:
        train_oof_preds = np.empty(train_data.shape[0])
        test_oof_preds = np.zeros(test_data.shape[0])

        mean_score: int = 0
        pipelist: List[ModelPipeline] = []
        for train_index, val_index in self._splitter.split(
            train_data, train_data[self.config.target_col_name]
        ):
            train_fold = train_data.iloc[train_index]
            val_fold = train_data.iloc[val_index]

            pipe = deepcopy(self.model_pipeline)
            all_training_data = (
                pd.concat([train_fold, additional_data])
                if additional_data is not None
                else train_fold
            )
            validation_data = val_fold if val_data is None else val_data
            pipe.fit(
                all_training_data, validation_data=validation_data, weights=weights
            )
            pipelist.append(pipe)

            y_pred = pipe.predict(val_fold)
            test_pred = pipe.predict(test_data)

            score = self._evaluation_metric(
                val_fold[self.config.target_col_name], y_pred
            )
            train_oof_preds[val_index] = y_pred
            test_oof_preds += test_pred / self._num_splits

            mean_score += score / self._num_splits

        return mean_score, train_oof_preds, pipelist, test_oof_preds

## Optuna Hyperparameter Optimizer

In [None]:
class OptunaHyperparameterOptimizer:
    """Hyperparameter optimization using Optuna"""
    def __init__(
        self,
        model,
        grid_yaml_path: str,
        model_type: str,
        config: DatasetConfig,
        evaluation_metric: Callable,
        kfold_object,
        preprocessor: PreprocessingPipeline,
        timeout: int = 3600,
        direction: str = "maximize",
        n_trials: int = 100,
        study_name: str = "ml_experiment",
        explore_fraction: float = 0.1,
        save_study: bool = False,
        verbose=False,
        random_state=42,
    ) -> None:
        super().__init__()
        self.model = model
        self._param_grid_getter = load_optuna_grid(grid_yaml_path, model_type)
        self.config = config
        self._evaluation_metric = evaluation_metric
        self._kfold_object = kfold_object
        self._preprocessor = preprocessor
        self._timeout = timeout
        self._direction = direction
        self._n_trials = n_trials
        self._study_name = study_name
        self._explore_fraction = explore_fraction
        self._save_study = save_study
        self._verbose = verbose
        self._random_state = random_state

    def optimize(
        self,
        *cv_args,
        **cv_kwargs,
    ):
        if self._verbose:
            print("#" * 100)
            print("Starting Optuna Optimizer")
            print("#" * 100)

        sampler = TPESampler(
            n_startup_trials=int(self._n_trials * self._explore_fraction),
            seed=self._random_state,
        )

        storage_name = (
            "sqlite:///{}.db".format(self._study_name) if self._save_study else None
        )
        self.study = study = optuna.create_study(
            sampler=sampler,
            study_name=self._study_name,
            direction=self._direction,
            storage=storage_name,
            load_if_exists=True,
        )

        def objective(trial: optuna.Trial):
            parameters = self._param_grid_getter(trial)
            model = self.model(**parameters)

            cv_executor = CrossValidationExecutor(
                config=self.config,
                model_pipeline=ModelPipeline(
                    model=model,
                    config=self.config,
                    preprocessor=self._preprocessor,
                ),
                evaluation_metric=self._evaluation_metric,
                kfold_object=self._kfold_object,
            )
            score, _, _, _ = cv_executor.run(
                *cv_args,
                **cv_kwargs,
            )
            return score

        study.optimize(
            objective,
            n_trials=self._n_trials,
            timeout=self._timeout,
        )
        optimal_params = study.best_params
        return optimal_params

## Load Data

Load your training and test data. Adjust the paths according to your Kaggle competition.

In [None]:
# Update these paths to match your Kaggle dataset location
DATA_PATH = "/kaggle/input/your-competition-name/"
TARGET = "target_column_name"  # Update with your target column name

# Load data
train_data = pd.read_csv(DATA_PATH + "train.csv", index_col=0)
test_data = pd.read_csv(DATA_PATH + "test.csv", index_col=0)

print(f"Train shape: {train_data.shape}")
print(f"Test shape: {test_data.shape}")
print(f"\nTrain columns: {train_data.columns.tolist()}")

## Configure Dataset

In [None]:
# Identify column types
training_col_names = train_data.drop(columns=TARGET).columns.tolist()
numerical_col_names = (
    train_data.drop(columns=TARGET)
    .select_dtypes(include=["number"])
    .columns.tolist()
)
categorical_col_names = train_data.select_dtypes(
    include=["object"]
).columns.tolist()

# Create dataset configuration
config = DatasetConfig(
    training_col_names=training_col_names,
    numerical_col_names=numerical_col_names,
    categorical_col_names=categorical_col_names,
    target_col_name=TARGET,
)

print(f"Numerical features ({len(numerical_col_names)}): {numerical_col_names}")
print(f"\nCategorical features ({len(categorical_col_names)}): {categorical_col_names}")

## Setup Preprocessing Pipeline

In [None]:
# Create preprocessors
preprocessors = [StandardScale(config), CategoricalEncoder(config)]
preprocessor = PreprocessingPipeline(preprocessors=preprocessors)

print("Preprocessing pipeline created with:")
for p in preprocessors:
    print(f"  - {p.name}")

## Load Parameter Grid from YAML

Upload your parameter grid YAML file to Kaggle and specify the path. The YAML file should follow this format:

```yaml
base:  # or 'model_type' you specify
  num_leaves:
    type: int
    low: 20
    high: 150
  max_depth:
    type: int
    low: 3
    high: 12
  learning_rate:
    type: float
    low: 0.01
    high: 0.3
    log: true
  # ... more parameters
```

Supported types: `int`, `float`, `categorical`, `fixed`

## Run Hyperparameter Optimization

In [None]:
# Specify the path to your parameter grid YAML file
GRID_YAML_PATH = "/kaggle/input/your-dataset/param_grid.yml"
MODEL_TYPE = "base"  # The key in the YAML file (e.g., "base", "lightgbm", "xgboost", "catboost")
MODEL_CLASS = LGBMModel  # Change to XGBoostModel or CatBoostModel as needed

# Create optimizer
optimizer = OptunaHyperparameterOptimizer(
    model=MODEL_CLASS,
    grid_yaml_path=GRID_YAML_PATH,
    model_type=MODEL_TYPE,
    config=config,
    evaluation_metric=roc_auc_score,  # Update with your metric
    kfold_object=StratifiedKFold(n_splits=3, shuffle=True, random_state=42),
    preprocessor=preprocessor,
    timeout=3600,  # 3600 seconds = 1 hour, adjust as needed
    direction="maximize",  # Use "minimize" for loss metrics
    n_trials=100,  # Increase for better optimization (e.g., 100+)
    study_name="kaggle_optuna_optimizer",
    explore_fraction=0.1,
    save_study=False,
    verbose=True,
    random_state=42,
)

print("Starting hyperparameter optimization...")
print(f"Model: {MODEL_CLASS.__name__}")
print(f"This will run for max {optimizer._timeout} seconds or {optimizer._n_trials} trials")
print("="*100)

In [None]:
# Run optimization
optimal_params = optimizer.optimize(
    train_data=train_data,
    test_data=test_data,
)

print("\n" + "="*100)
print("OPTIMIZATION COMPLETE!")
print("="*100)
print(f"\nBest Score: {optimizer.study.best_value:.6f}")
print(f"\nBest Parameters:")
for key, value in optimal_params.items():
    print(f"  {key}: {value}")

## Visualization (Optional)

In [None]:
# Visualize optimization history
from optuna.visualization import plot_optimization_history, plot_param_importances

try:
    # Plot optimization history
    fig1 = plot_optimization_history(optimizer.study)
    fig1.show()
    
    # Plot parameter importances
    fig2 = plot_param_importances(optimizer.study)
    fig2.show()
except Exception as e:
    print(f"Visualization error: {e}")

## Train Final Model with Best Parameters

In [None]:
# # Create final model with best parameters
# final_model = LGBMModel(**optimal_params)

# # Create final pipeline
# final_pipeline = ModelPipeline(
#     model=final_model,
#     config=config,
#     preprocessor=PreprocessingPipeline([StandardScale(config), CategoricalEncoder(config)])
# )

# # Train on full training data
# print("Training final model on full training data...")
# final_pipeline.fit(train_data)

# # Generate predictions
# print("Generating predictions...")
# predictions = final_pipeline.predict(test_data)

# print(f"Predictions shape: {predictions.shape}")
# print(f"Sample predictions: {predictions[:5]}")

## Create Submission File

In [None]:
# # Create submission dataframe
# submission = pd.DataFrame({
#     'id': test_data.index,  # Update 'id' to match your competition's submission format
#     TARGET: predictions
# })

# # Save submission
# submission.to_csv('submission.csv', index=False)
# print("Submission file created: submission.csv")
# print(f"\nSubmission preview:")
# print(submission.head())

## Summary

This notebook provides a complete workflow for:
1. Loading and configuring your dataset
2. Setting up preprocessing pipelines
3. Running Optuna hyperparameter optimization
4. Training a final model with optimal parameters
5. Generating predictions and creating a submission file

All dependencies are embedded in the notebook, making it suitable for Kaggle's isolated environment.

**Supported Models:**
- LGBMModel (LightGBM)
- XGBoostModel (XGBoost)
- CatBoostModel (CatBoost)

**To use this notebook:**
1. Upload your parameter grid YAML file to Kaggle as a dataset
2. Update the `DATA_PATH`, `TARGET`, `GRID_YAML_PATH`, and `MODEL_TYPE` variables
3. Choose your model by setting `MODEL_CLASS` (LGBMModel, XGBoostModel, or CatBoostModel)
4. Modify the evaluation metric if needed
5. Increase `n_trials` and `timeout` for better optimization
6. Run all cells sequentially

**Parameter Grid YAML Format:**
```yaml
base:  # or your model_type
  param_name:
    type: int  # or float, categorical, fixed
    low: 1
    high: 10
    # For float, add: log: true (optional)
    # For categorical, use: choices: [val1, val2, ...]
    # For fixed, use: value: some_value
```