In [None]:
import os
import json
import time
import warnings
from datetime import datetime
from typing import Dict, Tuple, Any, List

warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd

# ML imports
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier

from catboost import CatBoostClassifier

# LightGBM
try:
    from lightgbm import LGBMClassifier, early_stopping, log_evaluation
    _HAS_LGBM = True
    _HAS_LGBM_CB = True
except Exception:
    try:
        from lightgbm import LGBMClassifier
        _HAS_LGBM = True
        _HAS_LGBM_CB = False
    except Exception:
        _HAS_LGBM = False
        _HAS_LGBM_CB = False
        LGBMClassifier = None

# XGBoost
try:
    import xgboost as xgb  
    from xgboost import XGBClassifier
    _HAS_XGB = True
except Exception:
    XGBClassifier = None
    _HAS_XGB = False

# SMOTE
try:
    from imblearn.over_sampling import SMOTE
    _HAS_SMOTE = True
except Exception:
    SMOTE = None
    _HAS_SMOTE = False

In [None]:
CSV_PATH = "dataset.csv"

TARGET_COLS = ["vp", "vsc", "vdf", "dsh", "sa", "peak_irr"]

CLASS_BOUNDS = {
    "vp": (-1, 3),
    "vsc": (0, 3),
    "vdf": (0, 2),
    "dsh": (0, 3),
    "sa": (0, 3),
    "peak_irr": (0, 1),
}

SMOTE_TARGETS = {"vdf", "dsh", "vsc"}

TEST_SIZE = 0.20
RANDOM_STATE = 42

# Boosting hyperparameters
LR_BOOST = 0.05
DEPTH_BOOST = 10
EARLY_STOP_PATIENCE = 60

ITER_CAT = 6000
N_EST_LGB = 3000
N_EST_XGB = 3000

ENABLE_LGBM = True
ENABLE_XGB = True

USE_GPU_CAT = True
USE_GPU_XGB = True

TARGET_ALPHA = {
    "vdf": 3.0,
    "dsh": 2.3,
    "vsc": 2.7,
    "vp": 2.5,
    "sa": 1.5,
    "peak_irr": 1.0,
}

MAX_WEIGHT_MULT = 25.0
MIN_WEIGHT = 0.1

# Thread control (stability / reproducibility)
os.environ["OMP_NUM_THREADS"] = "4"
os.environ["MKL_NUM_THREADS"] = "4"
os.environ["OPENBLAS_NUM_THREADS"] = "4"
os.environ["NUMEXPR_NUM_THREADS"] = "4"


def log(msg: str) -> None:
    print(f"[{time.strftime('%H:%M:%S')}] {msg}", flush=True)

In [None]:
# Data utilities

def clean_df(df: pd.DataFrame) -> pd.DataFrame:
    """Drop rows with missing targets; replace inf; fill numeric NaNs with column median."""
    df = df.dropna(subset=TARGET_COLS).copy()
    num_cols = df.select_dtypes(include=[np.number]).columns
    df[num_cols] = df[num_cols].replace([np.inf, -np.inf], np.nan)
    df[num_cols] = df[num_cols].fillna(df[num_cols].median(numeric_only=True))
    return df


def one_hot_features(df: pd.DataFrame) -> pd.DataFrame:
    """Build X by excluding target columns and one-hot encoding non-numeric columns."""
    feats = [c for c in df.columns if c not in TARGET_COLS]
    X = df[feats].copy()
    nonnum = X.select_dtypes(exclude=[np.number]).columns.tolist()
    if nonnum:
        X = pd.get_dummies(X, columns=nonnum, drop_first=True)
    return X


def class_labels_from_cont(Y_cont: np.ndarray) -> np.ndarray:
    """Discretize continuous targets using rounding + clipping to target-specific bounds."""
    y = np.zeros_like(Y_cont, dtype=int)
    for i, t in enumerate(TARGET_COLS):
        lo, hi = CLASS_BOUNDS[t]
        y[:, i] = np.clip(np.rint(Y_cont[:, i]).astype(int), lo, hi)
    return y


def build_XY(df: pd.DataFrame) -> Tuple[pd.DataFrame, np.ndarray, np.ndarray]:
    """Return X (features), Y_cont (continuous targets), Y_cls (discrete labels in original label space)."""
    X = one_hot_features(df).astype(np.float32)
    Y_cont = df[TARGET_COLS].astype(np.float32).values
    Y_cls = class_labels_from_cont(Y_cont)
    return X, Y_cont, Y_cls


def split_indices_random(n: int, test_size: float, random_state: int) -> Tuple[np.ndarray, np.ndarray]:
    """Random train/test split indices (matches the Methods description)."""
    idx = np.arange(n)
    tr, te = train_test_split(idx, test_size=test_size, random_state=random_state, shuffle=True)
    return np.array(tr), np.array(te)

In [None]:
# Label remapping (per target)

def make_label_maps() -> Dict[str, Dict[str, Any]]:
    """
    For each target with bounds (lo, hi), build mappings:
    - to0: original_label -> 0..K-1
    - from0: 0..K-1 -> original_label
    """
    maps: Dict[str, Dict[str, Any]] = {}
    for t in TARGET_COLS:
        lo, hi = CLASS_BOUNDS[t]
        labels = list(range(int(lo), int(hi) + 1))
        to0 = {lab: i for i, lab in enumerate(labels)}
        from0 = {i: lab for i, lab in enumerate(labels)}
        maps[t] = {"to0": to0, "from0": from0, "K": len(labels), "lo": lo, "hi": hi}
    return maps


LABEL_MAPS = make_label_maps()


def remap_y_to_0k(y: np.ndarray, tname: str) -> np.ndarray:
    m = LABEL_MAPS[tname]["to0"]
    return np.array([m[int(v)] for v in y], dtype=int)


def unmap_y_from_0k(y0: np.ndarray, tname: str) -> np.ndarray:
    m = LABEL_MAPS[tname]["from0"]
    return np.array([m[int(v)] for v in y0], dtype=int)

In [None]:
# Class weights + SMOTE

def per_target_class_weight_dict(y_tr_0k: np.ndarray, tname: str, Kt: int) -> Dict[int, float]:
    """
    Compute class weights in 0..K-1 space, scaled by TARGET_ALPHA[tname].
    Returned dict includes all classes 0..Kt-1 (missing classes default to 1.0).
    """
    uniq, cnt = np.unique(y_tr_0k, return_counts=True)
    N = len(y_tr_0k)
    Kobs = len(uniq)

    base = {int(c): (N / (Kobs * cnt[i])) for i, c in enumerate(uniq)}
    a = TARGET_ALPHA.get(tname, 1.0)

    out = {k: 1.0 for k in range(Kt)}
    for c, w in base.items():
        out[int(c)] = float(min(MAX_WEIGHT_MULT, max(MIN_WEIGHT, a * w)))
    return out


def maybe_smote(Xtr: pd.DataFrame, ytr_0k: np.ndarray, tname: str) -> Tuple[pd.DataFrame, np.ndarray]:
    """Apply SMOTE only for selected targets and only if imblearn is available."""
    if tname not in SMOTE_TARGETS:
        return Xtr, ytr_0k

    if not _HAS_SMOTE:
        log(f"[SMOTE-{tname}] imblearn not available -> skipping SMOTE.")
        return Xtr, ytr_0k

    uniq, cnt = np.unique(ytr_0k, return_counts=True)
    min_cnt = int(cnt.min())
    if min_cnt < 2:
        log(f"[SMOTE-{tname}] minority count too small ({min_cnt}) -> skipping SMOTE.")
        return Xtr, ytr_0k

    k = max(1, min(5, min_cnt - 1))
    log(f"[SMOTE-{tname}] applying SMOTE (k_neighbors={k}) ...")
    sm = SMOTE(random_state=RANDOM_STATE, k_neighbors=k)
    X_res, y_res = sm.fit_resample(Xtr, ytr_0k)
    return X_res, y_res

In [None]:
# Metrics + saving

def cls_metrics(Y_true: np.ndarray, Y_pred: np.ndarray) -> pd.DataFrame:
    """Per-target accuracy and macro-F1 in the original label space."""
    rows: List[Dict[str, Any]] = []
    for i, t in enumerate(TARGET_COLS):
        rows.append(
            {
                "target": t,
                "accuracy": accuracy_score(Y_true[:, i], Y_pred[:, i]),
                "f1_macro": f1_score(Y_true[:, i], Y_pred[:, i], average="macro", zero_division=0),
            }
        )
    df = pd.DataFrame(rows)
    df.loc[len(df)] = {"target": "MEAN", "accuracy": df["accuracy"].mean(), "f1_macro": df["f1_macro"].mean()}
    return df


def save_preds(name: str, Yte: np.ndarray, Ypred: np.ndarray, out_dir: str) -> None:
    """Save ground truth and predictions (class labels) per target."""
    out = pd.DataFrame(
        {**{f"{t}_true_cls": Yte[:, i] for i, t in enumerate(TARGET_COLS)},
         **{f"{t}_pred_cls": Ypred[:, i] for i, t in enumerate(TARGET_COLS)}}
    )
    out.to_csv(os.path.join(out_dir, f"preds_{name}.csv"), index=False)


def save_train_distribution(Ytr: np.ndarray, out_dir: str) -> None:
    """Save class distribution for the training split in the original label space."""
    rows = []
    for i, t in enumerate(TARGET_COLS):
        u, c = np.unique(Ytr[:, i], return_counts=True)
        for uu, cc in zip(u, c):
            rows.append({"target": t, "class": int(uu), "count": int(cc)})
    pd.DataFrame(rows).to_csv(os.path.join(out_dir, "train_class_distribution.csv"), index=False)

In [None]:
# Model training

def train_linear_baseline(Xtr: pd.DataFrame, Ytr_cont: np.ndarray, Yte_true_cls: np.ndarray, Xte: pd.DataFrame) -> Dict[str, Any]:
    """LinearRegression"""
    log(">>> [LinearRegression] baseline (regression -> discretization)")
    t0 = time.time()

    model = Pipeline([("scaler", StandardScaler()), ("reg", LinearRegression())])
    model.fit(Xtr, Ytr_cont)

    y_pred_cont = model.predict(Xte).astype(float)

    preds_cls = np.zeros((y_pred_cont.shape[0], len(TARGET_COLS)), dtype=int)
    for j, t in enumerate(TARGET_COLS):
        lo, hi = CLASS_BOUNDS[t]
        preds_cls[:, j] = np.clip(np.rint(y_pred_cont[:, j]), lo, hi).astype(int)

    met = cls_metrics(Yte_true_cls, preds_cls)
    f1_mean = float(met.query("target=='MEAN'")["f1_macro"].values[0])
    log(f"<<< [LinearRegression] {time.time() - t0:.1f}s | F1(MEAN)={f1_mean:.3f}")
    return {"name": "LinearRegression", "y_cls": preds_cls, "cls": met}


def train_rf_per_target(Xtr, Ytr, Xte, Yte, out_dir: str) -> Dict[str, Any]:
    """RandomForestClassifier"""
    log(">>> [RandomForest] per-target")
    t0 = time.time()

    preds = np.zeros_like(Yte, dtype=int)

    from os import cpu_count
    n_jobs = max(1, (cpu_count() or 4) // 2)

    for j, t in enumerate(TARGET_COLS):
        Kt = LABEL_MAPS[t]["K"]
        ytr0 = remap_y_to_0k(Ytr[:, j], t)

        cw0 = per_target_class_weight_dict(ytr0, t, Kt)

        rf = RandomForestClassifier(
            n_estimators=800,
            max_depth=None,
            min_samples_leaf=1,
            max_features="sqrt",
            bootstrap=True,
            max_samples=0.9,
            random_state=RANDOM_STATE,
            n_jobs=n_jobs,
            class_weight=cw0,
        )
        rf.fit(Xtr, ytr0)
        pred0 = rf.predict(Xte).astype(int)
        preds[:, j] = unmap_y_from_0k(pred0, t)

        # Feature importance
        try:
            fi = pd.DataFrame({"feature": Xtr.columns, "importance": rf.feature_importances_}).sort_values("importance", ascending=False)
            fi.to_csv(os.path.join(out_dir, f"feature_importance_rf__{t}.csv"), index=False)
        except Exception:
            pass

    met = cls_metrics(Yte, preds)
    f1_mean = float(met.query("target=='MEAN'")["f1_macro"].values[0])
    log(f"<<< [RandomForest] {time.time() - t0:.1f}s | F1(MEAN)={f1_mean:.3f}")
    return {"name": "RandomForest", "y_cls": preds, "cls": met}


def train_cat_per_target(Xtr, Ytr, Xte, Yte, out_dir: str) -> Dict[str, Any]:
    """CatBoostClassifier"""
    log(">>> [CatBoost] per-target (GPU if available)")
    t0 = time.time()

    preds = np.zeros_like(Yte, dtype=int)

    base_params = dict(
        learning_rate=LR_BOOST,
        depth=DEPTH_BOOST,
        iterations=ITER_CAT,
        random_seed=RANDOM_STATE,
        od_type="Iter",
        od_wait=EARLY_STOP_PATIENCE,
        verbose=False,
    )

    for j, t in enumerate(TARGET_COLS):
        Kt = LABEL_MAPS[t]["K"]
        ytr0 = remap_y_to_0k(Ytr[:, j], t)
        yte0 = remap_y_to_0k(Yte[:, j], t)

        # Targeted SMOTE (train only)
        X_tr, y_tr = maybe_smote(Xtr, ytr0, t)

        cw0 = per_target_class_weight_dict(y_tr, t, Kt)

        weight_list = [cw0.get(k, 1.0) for k in range(Kt)]

        params = dict(base_params)
        params["loss_function"] = "Logloss" if Kt == 2 else "MultiClass"

        try:
            if USE_GPU_CAT:
                params.update(dict(task_type="GPU", devices="0"))
            clf = CatBoostClassifier(**params, class_weights=weight_list)
            clf.fit(X_tr, y_tr, eval_set=(Xte, yte0), verbose=False, use_best_model=True)
        except Exception as e:
            log(f"[CatBoost-{t}] GPU failed → CPU fallback. ({e})")
            params.pop("task_type", None)
            params.pop("devices", None)
            clf = CatBoostClassifier(**params, class_weights=weight_list)
            clf.fit(X_tr, y_tr, eval_set=(Xte, yte0), verbose=False, use_best_model=True)

        pred0 = clf.predict(Xte).astype(int).reshape(-1)
        preds[:, j] = unmap_y_from_0k(pred0, t)

        # Save params per target
        try:
            with open(os.path.join(out_dir, f"best_params_catboost__{t}.json"), "w") as f:
                json.dump(params, f, indent=2)
        except Exception:
            pass

        # Feature importance
        try:
            fi = clf.get_feature_importance()
            pd.DataFrame({"feature": Xtr.columns, "importance": fi}).sort_values("importance", ascending=False).to_csv(
                os.path.join(out_dir, f"feature_importance_catboost__{t}.csv"), index=False
            )
        except Exception:
            pass

    met = cls_metrics(Yte, preds)
    f1_mean = float(met.query("target=='MEAN'")["f1_macro"].values[0])
    log(f"<<< [CatBoost] {time.time() - t0:.1f}s | F1(MEAN)={f1_mean:.3f}")
    return {"name": "CatBoost", "y_cls": preds, "cls": met}


def train_lgb_per_target(Xtr, Ytr, Xte, Yte, out_dir: str) -> Dict[str, Any]:
    """LightGBMClassifier"""
    if not (_HAS_LGBM and ENABLE_LGBM):
        log("[LightGBM] disabled or unavailable; skipping.")
        return None

    log(">>> [LightGBM] per-target")
    t0 = time.time()

    preds = np.zeros_like(Yte, dtype=int)

    for j, t in enumerate(TARGET_COLS):
        Kt = LABEL_MAPS[t]["K"]
        ytr0 = remap_y_to_0k(Ytr[:, j], t)
        yte0 = remap_y_to_0k(Yte[:, j], t)

        X_tr, y_tr = maybe_smote(Xtr, ytr0, t)

        cw0 = per_target_class_weight_dict(y_tr, t, Kt)
        objective = "binary" if Kt == 2 else "multiclass"

        clf = LGBMClassifier(
            n_estimators=N_EST_LGB,
            learning_rate=LR_BOOST,
            num_leaves=127,
            min_data_in_leaf=30,
            subsample=0.9,
            colsample_bytree=0.9,
            reg_lambda=1.0,
            random_state=RANDOM_STATE,
            n_jobs=-1,
            objective=objective,
            class_weight=cw0,
            verbose=-1,
        )
        if Kt > 2:
            clf.set_params(num_class=Kt)

        if _HAS_LGBM_CB:
            metric = "binary_logloss" if Kt == 2 else "multi_logloss"
            clf.fit(
                X_tr, y_tr,
                eval_set=[(Xte, yte0)],
                eval_metric=metric,
                callbacks=[early_stopping(EARLY_STOP_PATIENCE), log_evaluation(50)],
            )
        else:
            clf.fit(X_tr, y_tr)

        pred0 = clf.predict(Xte).astype(int)
        preds[:, j] = unmap_y_from_0k(pred0, t)

        try:
            with open(os.path.join(out_dir, f"best_params_lgbm__{t}.json"), "w") as f:
                json.dump(clf.get_params(), f, indent=2)
        except Exception:
            pass

    met = cls_metrics(Yte, preds)
    f1_mean = float(met.query("target=='MEAN'")["f1_macro"].values[0])
    log(f"<<< [LightGBM] {time.time() - t0:.1f}s | F1(MEAN)={f1_mean:.3f}")
    return {"name": "LightGBM", "y_cls": preds, "cls": met}


def train_xgb_per_target(Xtr, Ytr, Xte, Yte, out_dir: str) -> Dict[str, Any]:
    """XGBoostClassifier"""
    if not (_HAS_XGB and ENABLE_XGB):
        log("[XGBoost] disabled or unavailable; skipping.")
        return None

    log(">>> [XGBoost] per-target")
    t0 = time.time()

    preds = np.zeros_like(Yte, dtype=int)

    for j, t in enumerate(TARGET_COLS):
        Kt = LABEL_MAPS[t]["K"]
        ytr0 = remap_y_to_0k(Ytr[:, j], t)
        yte0 = remap_y_to_0k(Yte[:, j], t)

        X_tr, y_tr = maybe_smote(Xtr, ytr0, t)

        is_bin = (Kt == 2)
        cw0 = per_target_class_weight_dict(y_tr, t, Kt)
        sw = np.array([cw0.get(int(c), 1.0) for c in y_tr], dtype=float)

        def _mk(use_gpu: bool) -> XGBClassifier:
            params = dict(
                n_estimators=N_EST_XGB,
                learning_rate=LR_BOOST,
                max_depth=DEPTH_BOOST,
                subsample=0.9,
                colsample_bytree=0.9,
                reg_lambda=1.0,
                min_child_weight=2.0,
                gamma=0.0,
                random_state=RANDOM_STATE,
                n_jobs=-1,
                tree_method=("gpu_hist" if use_gpu else "hist"),
            )
            if is_bin:
                params.update(dict(objective="binary:logistic", eval_metric="logloss"))
            else:
                params.update(dict(objective="multi:softprob", num_class=Kt, eval_metric="mlogloss"))
            return XGBClassifier(**params)

        model = None

        for g in ([USE_GPU_XGB, False] if USE_GPU_XGB else [False]):
            try:
                model = _mk(g)
                try:
                    from xgboost.callback import EarlyStopping
                    cbs = [EarlyStopping(rounds=EARLY_STOP_PATIENCE, save_best=True, maximize=False)]
                except Exception:
                    cbs = None

                fit_kw = dict(sample_weight=sw, eval_set=[(Xte, yte0)])
                if cbs is not None:
                    fit_kw["callbacks"] = cbs

                try:
                    model.fit(X_tr, y_tr, **fit_kw)
                except TypeError:
                    fit_kw.pop("callbacks", None)
                    model.fit(X_tr, y_tr, **fit_kw)

                break
            except Exception as e:
                if g:
                    log(f"[XGBoost-{t}] GPU failed → CPU fallback. ({e})")
                model = None

        if model is None:
            model = _mk(False)
            model.fit(X_tr, y_tr, sample_weight=sw)

        pred0 = model.predict(Xte).astype(int)
        preds[:, j] = unmap_y_from_0k(pred0, t)

        try:
            with open(os.path.join(out_dir, f"best_params_xgb__{t}.json"), "w") as f:
                json.dump(model.get_params(), f, indent=2)
        except Exception:
            pass

    met = cls_metrics(Yte, preds)
    f1_mean = float(met.query("target=='MEAN'")["f1_macro"].values[0])
    log(f"<<< [XGBoost] {time.time() - t0:.1f}s | F1(MEAN)={f1_mean:.3f}")
    return {"name": "XGBoost", "y_cls": preds, "cls": met}

In [None]:
# Main

def main() -> None:
    base = os.path.dirname(CSV_PATH) if os.path.dirname(CSV_PATH) else "."
    stamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    out_dir = os.path.join(base, f"models_classification_clean__{stamp}")
    os.makedirs(out_dir, exist_ok=True)

    log(f"Outputs -> {out_dir}")

    log("Loading dataset ...")
    df = pd.read_csv(CSV_PATH)
    df = clean_df(df)

    X, Y_cont, Y_cls = build_XY(df)

    # Random split
    tr_idx, te_idx = split_indices_random(len(X), TEST_SIZE, RANDOM_STATE)

    Xtr, Xte = X.iloc[tr_idx], X.iloc[te_idx]
    Ytr_cls, Yte_cls = Y_cls[tr_idx], Y_cls[te_idx]
    Ytr_cont = Y_cont[tr_idx]  # for baseline only

    log(f"Shapes: X={X.shape} | train={Xtr.shape} | test={Xte.shape} | targets={len(TARGET_COLS)}")
    log("Split strategy: random 80/20 (seed=42)")

    save_train_distribution(Ytr_cls, out_dir)

    results: List[Dict[str, Any]] = []

    r0 = train_linear_baseline(Xtr, Ytr_cont, Yte_cls, Xte)
    results.append(r0)
    save_preds(r0["name"], Yte_cls, r0["y_cls"], out_dir)

    r2 = train_rf_per_target(Xtr, Ytr_cls, Xte, Yte_cls, out_dir)
    results.append(r2)
    save_preds(r2["name"], Yte_cls, r2["y_cls"], out_dir)

    r3 = train_cat_per_target(Xtr, Ytr_cls, Xte, Yte_cls, out_dir)
    results.append(r3)
    save_preds(r3["name"], Yte_cls, r3["y_cls"], out_dir)

    r4 = train_lgb_per_target(Xtr, Ytr_cls, Xte, Yte_cls, out_dir)
    if r4 is not None:
        results.append(r4)
        save_preds(r4["name"], Yte_cls, r4["y_cls"], out_dir)

    r5 = train_xgb_per_target(Xtr, Ytr_cls, Xte, Yte_cls, out_dir)
    if r5 is not None:
        results.append(r5)
        save_preds(r5["name"], Yte_cls, r5["y_cls"], out_dir)

    cls_tables = []
    for r in results:
        dfm = r["cls"].copy()
        dfm["model"] = r["name"]
        dfm["split"] = "80_20"
        cls_tables.append(dfm)
        dfm.to_csv(os.path.join(out_dir, f"metrics_{r['name']}.csv"), index=False)

    cls_all = pd.concat(cls_tables, ignore_index=True)
    cls_all.to_csv(os.path.join(out_dir, "classification_all_split.csv"), index=False)

    with pd.ExcelWriter(os.path.join(out_dir, "results_summary_split.xlsx")) as xl:
        (cls_all[cls_all.target == "MEAN"][["model", "accuracy", "f1_macro", "split"]]
         .sort_values("f1_macro", ascending=False)
         .to_excel(xl, "cls_MEAN", index=False))
        cls_all.to_excel(xl, "cls_all", index=False)

    log("=== Split 80/20 — MEAN Classification (sorted by Macro-F1) ===")
    print(
        cls_all[cls_all.target == "MEAN"][["model", "accuracy", "f1_macro"]]
        .sort_values("f1_macro", ascending=False)
        .round(4)
        .to_string(index=False)
    )

    meta = dict(
        csv_path=CSV_PATH,
        timestamp=stamp,
        test_size=TEST_SIZE,
        random_state=RANDOM_STATE,
        targets=TARGET_COLS,
        class_bounds=CLASS_BOUNDS,
        smote_targets=sorted(list(SMOTE_TARGETS)),
        target_alpha=TARGET_ALPHA,
        booster_params=dict(
            learning_rate=LR_BOOST,
            depth=DEPTH_BOOST,
            early_stop_patience=EARLY_STOP_PATIENCE,
            cat_iterations=ITER_CAT,
            lgb_estimators=N_EST_LGB,
            xgb_estimators=N_EST_XGB,
        ),
        toggles=dict(
            enable_lgbm=ENABLE_LGBM and _HAS_LGBM,
            enable_xgb=ENABLE_XGB and _HAS_XGB,
            use_gpu_cat=USE_GPU_CAT,
            use_gpu_xgb=USE_GPU_XGB,
            has_smote=_HAS_SMOTE,
        ),
        outputs_dir=out_dir,
    )
    with open(os.path.join(out_dir, "run_metadata.json"), "w") as f:
        json.dump(meta, f, indent=2)

    log(f"Saved outputs to: {out_dir}")


if __name__ == "__main__":
    main()