In [None]:
from __future__ import annotations

import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

from src.config import DEFAULT_CONFIG_PATH, load_config
from src.models.pso_lightgbm import PSOLightGBMTuner
from src.utils.logger import setup_logger
from src.utils.paths import data_path

In [None]:
DATASET_PATH = data_path("raw", "Dataset(Over Sampled).csv")
RAW_LABEL_COL = "label"

config = load_config(DEFAULT_CONFIG_PATH)
config.training.use_gpu = True
config.training.gpu_platform_id = 0
config.training.gpu_device_id = 0
config.training.show_progress = True
logger = setup_logger(log_dir=config.paths.logs_dir)

df = pd.read_csv(DATASET_PATH)
df[RAW_LABEL_COL] = df[RAW_LABEL_COL].astype(int)

print(f"Loaded {len(df):,} rows from {DATASET_PATH}")
df[RAW_LABEL_COL].value_counts().sort_index()

In [None]:
temp_dataset = lgb.Dataset(
    data=np.array([[0.0, 1.0], [1.0, 0.0]], dtype=float),
    label=np.array([0, 1]),
)

gpu_probe_params = {
    "objective": "binary",
    "device_type": "gpu",
    "verbosity": -1,
}

try:
    lgb.train(gpu_probe_params, temp_dataset, num_boost_round=1)
    print("GPU-enabled LightGBM detected (device_type='gpu').")
except lgb.basic.LightGBMError as exc:
    raise RuntimeError(
        "LightGBM is not compiled with GPU support. Reinstall using the CUDA wheel."
    ) from exc

In [None]:
X = df.drop(columns=[RAW_LABEL_COL])
y = df[RAW_LABEL_COL]

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=config.training.test_size,
    stratify=y,
    random_state=config.training.random_state,
)

X_train_sub, X_val, y_train_sub, y_val = train_test_split(
    X_train,
    y_train,
    test_size=config.training.val_size,
    stratify=y_train,
    random_state=config.training.random_state,
)

print(
    f"Splits -> train: {len(X_train_sub):,}, val: {len(X_val):,}, test: {len(X_test):,}"
)

In [5]:
tuner = PSOLightGBMTuner(config)
best_params = tuner.fit(X_train_sub, y_train_sub)
model = tuner.train_best_model(
    pd.concat([X_train_sub, X_val]),
    pd.concat([y_train_sub, y_val]),
)

print("Best hyperparameters")
best_params

2025-11-20 12:42:13 | INFO | psolgbm | Iteration 4/25 | best f1_macro = 0.9901
PSO tuning:  16%|█▌        | 4/25 [21:33:39<122:41:04, 21031.64s/iter]

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 2215
[LightGBM] [Info] Number of data points in the train set: 150746, number of used features: 14
[LightGBM] [Info] Using requested OpenCL platform 0 device 0
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 3090, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 3090, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 10 dense feature groups (1.73 MB) transferred to GPU in 0.012966 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score -2.397895
[LightGBM] [Info] Start training from score -2.397895
[LightGBM] [Info] Start training from score -2.397895
[LightGBM] [Info] Start training from score -2.397895
[LightGBM] [Info] Start training from score -2.3

Exception ignored on calling ctypes callback function: <function _log_callback at 0x000001F269AB3CE0>
Traceback (most recent call last):
  File "c:\Users\z-pc\AppData\Local\miniconda3\envs\ddl\Lib\site-packages\lightgbm\basic.py", line 203, in _log_callback
    def _log_callback(msg: bytes) -> None:
    
KeyboardInterrupt: 


No further splits with positive gain, best gain: -inf
No further splits with positive gain, best gain: -inf
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 2209
[LightGBM] [Info] Number of data points in the train set: 150746, number of used features: 14
[LightGBM] [Info] Using requested OpenCL platform 0 device 0
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 2209
[LightGBM] [Info] Number of data points in the train set: 150746, number of used features: 14
[LightGBM] [Info] Using requested OpenCL platform 0 device 0
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 3090, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 10 dense feature groups (1.73 MB) transferred to GPU in 0.002940 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score -2.397895
[LightGBM] [Info

: 

: 

In [1]:
device_attr = model.booster_.attr("device_type")
print(f"Booster reports device_type={device_attr}")
print("Model parameters (subset)")
{
    key: value
    for key, value in model.get_params().items()
    if key in {"device_type", "gpu_platform_id", "gpu_device_id", "n_jobs"}
}


NameError: name 'model' is not defined

In [None]:
test_preds = model.predict(X_test)
report = classification_report(y_test, test_preds, output_dict=True)

print(f"Test macro F1: {report['macro avg']['f1-score']:.4f}")
pd.DataFrame(report).T

In [None]:
from datetime import datetime
from pathlib import Path

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
model_save_path = config.paths.models_path / f"multiclass_pso_lightgbm_{timestamp}.txt"
model.booster_.save_model(str(model_save_path))

print(f"Model weights saved to {model_save_path}")
model_save_path