In [1]:
pip install lightgbm optuna

Collecting optuna
  Downloading optuna-4.7.0-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)
Downloading optuna-4.7.0-py3-none-any.whl (413 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m413.9/413.9 kB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.10.1-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, optuna
Successfully installed colorlog-6.10.1 optuna-4.7.0


In [2]:
import pandas as pd

df = pd.read_csv("train_features.csv")

target = "failure_24h"
X = df.drop(columns=[target, "RUL"])
y = df[target]

from sklearn.model_selection import train_test_split

units = df["unit"].unique()

train_units, val_units = train_test_split(
    units,
    test_size=0.2,
    random_state=42
)

train_idx = df["unit"].isin(train_units)
val_idx   = df["unit"].isin(val_units)

X_train, X_val = X.loc[train_idx], X.loc[val_idx]
y_train, y_val = y.loc[train_idx], y.loc[val_idx]

In [3]:
from lightgbm import LGBMClassifier
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

lgbm_pipe = Pipeline([
    ("imputer", SimpleImputer(strategy="median")),
    ("model", LGBMClassifier(
        n_estimators=500,
        learning_rate=0.05,
        num_leaves=31,
        class_weight="balanced",
        random_state=42,
        n_jobs=-1
    ))
])

lgbm_pipe.fit(X_train, y_train)

 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.


[LightGBM] [Info] Number of positive: 25, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.044095 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12938
[LightGBM] [Info] Number of data points in the train set: 416, number of used features: 120
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


In [4]:
from sklearn.metrics import average_precision_score, precision_score, recall_score

y_probs = lgbm_pipe.predict_proba(X_val)[:, 1]

prauc = average_precision_score(y_val, y_probs)

y_pred = (y_probs >= 0.5).astype(int)

precision = precision_score(y_val, y_pred)
recall = recall_score(y_val, y_pred)

prauc, precision, recall

 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.


(np.float64(1.0), 1.0, 0.64)

In [5]:
import optuna
from sklearn.metrics import average_precision_score

def objective(trial):

    params = {
        "n_estimators": trial.suggest_int("n_estimators", 300, 800),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.2),
        "num_leaves": trial.suggest_int("num_leaves", 16, 128),
        "max_depth": trial.suggest_int("max_depth", 4, 16),
        "min_child_samples": trial.suggest_int("min_child_samples", 20, 100),
        "subsample": trial.suggest_float("subsample", 0.6, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0),
    }

    model = Pipeline([
        ("imputer", SimpleImputer(strategy="median")),
        ("model", LGBMClassifier(
            **params,
            class_weight="balanced",
            random_state=42,
            n_jobs=-1
        ))
    ])

    model.fit(X_train, y_train)

    y_probs = model.predict_proba(X_val)[:, 1]

    prauc = average_precision_score(y_val, y_probs)

    return prauc


study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=25)

study.best_value, study.best_params

[I 2026-01-31 10:23:32,007] A new study created in memory with name: no-name-6af645d6-4411-446f-9e78-55c778686212
 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.


[LightGBM] [Info] Number of positive: 25, number of negative: 391
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001106 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12938
[LightGBM] [Info] Number of data points in the train set: 416, number of used features: 120
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.
[I 2026-01-31 10:23:32,405] Trial 0 finished with value: 0.9220666252918824 and parameters: {'n_estimators': 505, 'learning_rate': 0.1783965133269841, 'num_leaves': 90, 'max_depth': 4, 'min_child_samples': 52, 'subsample': 0.9099083072818266, 'colsample_bytree': 0.7606867303661996}. Best is trial 0 with value: 0.9220666252918824.


[LightGBM] [Info] Number of positive: 25, number of negative: 391
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001097 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12938
[LightGBM] [Info] Number of data points in the train set: 416, number of used features: 120
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.




 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.
[I 2026-01-31 10:23:32,901] Trial 1 finished with value: 0.9468177502579979 and parameters: {'n_estimators': 347, 'learning_rate': 0.08699466698789352, 'num_leaves': 60, 'max_depth': 13, 'min_child_samples': 61, 'subsample': 0.7638758631115639, 'colsample_bytree': 0.9981302721713371}. Best is trial 1 with value: 0.9468177502579979.




 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.


[LightGBM] [Info] Number of positive: 25, number of negative: 391
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005550 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12938
[LightGBM] [Info] Number of data points in the train set: 416, number of used features: 120
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.
[I 2026-01-31 10:23:33,335] Trial 2 finished with value: 0.7448799519347205 and parameters: {'n_estimators': 406, 'learning_rate': 0.19155965122099414, 'num_leaves': 32, 'max_depth': 16, 'min_child_samples': 70, 'subsample': 0.6803648187277318, 'colsample_bytree': 0.8962383328781469}. Best is trial 1 with value: 0.9468177502579979.




 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.


[LightGBM] [Info] Number of positive: 25, number of negative: 391
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.012387 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12938
[LightGBM] [Info] Number of data points in the train set: 416, number of used features: 120
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.
[I 2026-01-31 10:23:33,772] Trial 3 finished with value: 0.9508427128427129 and parameters: {'n_estimators': 599, 'learning_rate': 0.13162353513904948, 'num_leaves': 126, 'max_depth': 8, 'min_child_samples': 79, 'subsample': 0.9304452640533276, 'colsample_bytree': 0.8284110510310524}. Best is trial 3 with value: 0.9508427128427129.




 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.


[LightGBM] [Info] Number of positive: 25, number of negative: 391
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.019984 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12938
[LightGBM] [Info] Number of data points in the train set: 416, number of used features: 120
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.
[I 2026-01-31 10:23:35,404] Trial 4 finished with value: 0.9414473063049913 and parameters: {'n_estimators': 574, 'learning_rate': 0.13225334393009353, 'num_leaves': 37, 'max_depth': 8, 'min_child_samples': 68, 'subsample': 0.8520574790191182, 'colsample_bytree': 0.6557831033859577}. Best is trial 3 with value: 0.9508427128427129.




 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.


[LightGBM] [Info] Number of positive: 25, number of negative: 391
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008329 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12938
[LightGBM] [Info] Number of data points in the train set: 416, number of used features: 120
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.
[I 2026-01-31 10:23:38,911] Trial 5 finished with value: 0.9212553907863238 and parameters: {'n_estimators': 424, 'learning_rate': 0.14041036293429282, 'num_leaves': 113, 'max_depth': 12, 'min_child_samples': 35, 'subsample': 0.9452334164382148, 'colsample_bytree': 0.6371485357722199}. Best is trial 3 with value: 0.9508427128427129.




 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.


[LightGBM] [Info] Number of positive: 25, number of negative: 391
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003208 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12938
[LightGBM] [Info] Number of data points in the train set: 416, number of used features: 120
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.
[I 2026-01-31 10:23:39,495] Trial 6 finished with value: 0.9319631325037004 and parameters: {'n_estimators': 506, 'learning_rate': 0.15341724688734398, 'num_leaves': 44, 'max_depth': 13, 'min_child_samples': 42, 'subsample': 0.707122879586041, 'colsample_bytree': 0.665808126135784}. Best is trial 3 with value: 0.9508427128427129.


[LightGBM] [Info] Number of positive: 25, number of negative: 391
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001219 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12938
[LightGBM] [Info] Number of data points in the train set: 416, number of used features: 120
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.




 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.
[I 2026-01-31 10:23:40,042] Trial 7 finished with value: 0.9503937457067893 and parameters: {'n_estimators': 429, 'learning_rate': 0.11898645052552113, 'num_leaves': 36, 'max_depth': 14, 'min_child_samples': 78, 'subsample': 0.8542812709973142, 'colsample_bytree': 0.6491690662066514}. Best is trial 3 with value: 0.9508427128427129.




 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.


[LightGBM] [Info] Number of positive: 25, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001620 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12938
[LightGBM] [Info] Number of data points in the train set: 416, number of used features: 120
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.
[I 2026-01-31 10:23:41,052] Trial 8 finished with value: 0.8892636536747427 and parameters: {'n_estimators': 343, 'learning_rate': 0.16929728002805636, 'num_leaves': 127, 'max_depth': 16, 'min_child_samples': 35, 'subsample': 0.7579639764441255, 'colsample_bytree': 0.9611215758833286}. Best is trial 3 with value: 0.9508427128427129.


[LightGBM] [Info] Number of positive: 25, number of negative: 391
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001105 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12938
[LightGBM] [Info] Number of data points in the train set: 416, number of used features: 120
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.




 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.
[I 2026-01-31 10:23:42,170] Trial 9 finished with value: 0.9311126767497736 and parameters: {'n_estimators': 653, 'learning_rate': 0.10594617771657557, 'num_leaves': 70, 'max_depth': 14, 'min_child_samples': 52, 'subsample': 0.7708636055253545, 'colsample_bytree': 0.6299007881682713}. Best is trial 3 with value: 0.9508427128427129.




 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.


[LightGBM] [Info] Number of positive: 25, number of negative: 391
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001083 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12930
[LightGBM] [Info] Number of data points in the train set: 416, number of used features: 118
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.
[I 2026-01-31 10:23:43,509] Trial 10 finished with value: 0.958182549064902 and parameters: {'n_estimators': 749, 'learning_rate': 0.04941935268001383, 'num_leaves': 95, 'max_depth': 8, 'min_child_samples': 98, 'subsample': 0.6055599872095002, 'colsample_bytree': 0.8223296702561372}. Best is trial 10 with value: 0.958182549064902.




 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.


[LightGBM] [Info] Number of positive: 25, number of negative: 391
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.017641 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12930
[LightGBM] [Info] Number of data points in the train set: 416, number of used features: 118
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.
[I 2026-01-31 10:23:44,860] Trial 11 finished with value: 0.9357817182817183 and parameters: {'n_estimators': 773, 'learning_rate': 0.02982227023099132, 'num_leaves': 98, 'max_depth': 8, 'min_child_samples': 99, 'subsample': 0.6217469858254923, 'colsample_bytree': 0.81496517594271}. Best is trial 10 with value: 0.958182549064902.




 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.


[LightGBM] [Info] Number of positive: 25, number of negative: 391
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001371 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12930
[LightGBM] [Info] Number of data points in the train set: 416, number of used features: 118
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.
[I 2026-01-31 10:23:45,219] Trial 12 finished with value: 0.9552209140593896 and parameters: {'n_estimators': 755, 'learning_rate': 0.06050385037570663, 'num_leaves': 94, 'max_depth': 7, 'min_child_samples': 94, 'subsample': 0.999941808126059, 'colsample_bytree': 0.8067795452896742}. Best is trial 10 with value: 0.958182549064902.


[LightGBM] [Info] Number of positive: 25, number of negative: 391
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001060 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12930
[LightGBM] [Info] Number of data points in the train set: 416, number of used features: 118
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.
 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.
[I 2026-



 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.


[LightGBM] [Info] Number of positive: 25, number of negative: 391
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001087 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12938
[LightGBM] [Info] Number of data points in the train set: 416, number of used features: 120
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.
[I 2026-01-31 10:23:46,138] Trial 14 finished with value: 0.9484859366096688 and parameters: {'n_estimators': 705, 'learning_rate': 0.011304822349377089, 'num_leaves': 98, 'max_depth': 6, 'min_child_samples': 88, 'subsample': 0.6202952446839237, 'colsample_bytree': 0.8715717962888307}. Best is trial 10 with value: 0.958182549064902.


[LightGBM] [Info] Number of positive: 25, number of negative: 391
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001110 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12938
[LightGBM] [Info] Number of data points in the train set: 416, number of used features: 120
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.




 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.
[I 2026-01-31 10:23:46,451] Trial 15 finished with value: 0.9588332386165727 and parameters: {'n_estimators': 690, 'learning_rate': 0.07001874713173067, 'num_leaves': 77, 'max_depth': 9, 'min_child_samples': 88, 'subsample': 0.8390090562982081, 'colsample_bytree': 0.7254335297112673}. Best is trial 15 with value: 0.9588332386165727.


[LightGBM] [Info] Number of positive: 25, number of negative: 391
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001431 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12938
[LightGBM] [Info] Number of data points in the train set: 416, number of used features: 120
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.
 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.
[I 2026-



 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.


[LightGBM] [Info] Number of positive: 25, number of negative: 391
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001113 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12938
[LightGBM] [Info] Number of data points in the train set: 416, number of used features: 120
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.
[I 2026-01-31 10:23:47,228] Trial 17 finished with value: 0.9615358094937043 and parameters: {'n_estimators': 636, 'learning_rate': 0.03833784207930478, 'num_leaves': 56, 'max_depth': 10, 'min_child_samples': 85, 'subsample': 0.6817744167285987, 'colsample_bytree': 0.7139173042493796}. Best is trial 17 with value: 0.9615358094937043.


[LightGBM] [Info] Number of positive: 25, number of negative: 391
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001102 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12938
[LightGBM] [Info] Number of data points in the train set: 416, number of used features: 120
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.




 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.
[I 2026-01-31 10:23:47,649] Trial 18 finished with value: 0.9662083015024191 and parameters: {'n_estimators': 633, 'learning_rate': 0.031084837152205155, 'num_leaves': 53, 'max_depth': 10, 'min_child_samples': 85, 'subsample': 0.699899736759117, 'colsample_bytree': 0.7198131058104018}. Best is trial 18 with value: 0.9662083015024191.


[LightGBM] [Info] Number of positive: 25, number of negative: 391
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001565 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12938


 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.


[LightGBM] [Info] Number of data points in the train set: 416, number of used features: 120
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.
[I 2026-01-31 10:23:48,075] Trial 19 finished with value: 0.935440065681445 and parameters: {'n_estimators': 623, 'learning_rate': 0.03460250297759665, 'num_leaves': 17, 'max_depth': 10, 'min_child_samples': 79, 'subsample': 0.678079712855292, 'colsample_bytree': 0.7082914400577233}. Best is trial 18 with value: 0.9662083015024191.




 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.


[LightGBM] [Info] Number of positive: 25, number of negative: 391
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002286 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12938
[LightGBM] [Info] Number of data points in the train set: 416, number of used features: 120
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.
[I 2026-01-31 10:23:48,540] Trial 20 finished with value: 0.9561663025663026 and parameters: {'n_estimators': 518, 'learning_rate': 0.01748003093415809, 'num_leaves': 54, 'max_depth': 11, 'min_child_samples': 86, 'subsample': 0.7214978947333018, 'colsample_bytree': 0.6040403361145927}. Best is trial 18 with value: 0.9662083015024191.




 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.


[LightGBM] [Info] Number of positive: 25, number of negative: 391
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001075 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12938
[LightGBM] [Info] Number of data points in the train set: 416, number of used features: 120
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.
[I 2026-01-31 10:23:48,939] Trial 21 finished with value: 0.9560967878633783 and parameters: {'n_estimators': 705, 'learning_rate': 0.07612623420154899, 'num_leaves': 72, 'max_depth': 10, 'min_child_samples': 88, 'subsample': 0.663310792879685, 'colsample_bytree': 0.6982383158844946}. Best is trial 18 with value: 0.9662083015024191.


[LightGBM] [Info] Number of positive: 25, number of negative: 391


 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001252 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12938
[LightGBM] [Info] Number of data points in the train set: 416, number of used features: 120
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.
[I 2026-01-31 10:23:50,190] Trial 22 finished with value: 0.97997335997336 and parameters: {'n_estimators': 645, 'learning_rate': 0.040157865064727, 'num_leaves': 57, 'max_depth': 9, 'min_child_samples': 70, 'subsample': 0.8111116983451957, 'colsample_bytree': 0.7652835045788573}. Best is trial 22 with value: 0.97997335997336.




 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.


[LightGBM] [Info] Number of positive: 25, number of negative: 391
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001187 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12938
[LightGBM] [Info] Number of data points in the train set: 416, number of used features: 120
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.
[I 2026-01-31 10:23:51,092] Trial 23 finished with value: 0.9746455026455026 and parameters: {'n_estimators': 638, 'learning_rate': 0.03751748839087452, 'num_leaves': 55, 'max_depth': 11, 'min_child_samples': 73, 'subsample': 0.7302931937815202, 'colsample_bytree': 0.7599649812762488}. Best is trial 22 with value: 0.97997335997336.




 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.


[LightGBM] [Info] Number of positive: 25, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.035450 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12938
[LightGBM] [Info] Number of data points in the train set: 416, number of used features: 120
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.
[I 2026-01-31 10:23:52,429] Trial 24 finished with value: 0.9383366302899119 and parameters: {'n_estimators': 563, 'learning_rate': 0.025413187160172226, 'num_leaves': 46, 'max_depth': 11, 'min_child_samples': 69, 'subsample': 0.7893420944026871, 'colsample_bytree': 0.7744743487276211}. Best is trial 22 with value: 0.97997335997336.




(0.97997335997336,
 {'n_estimators': 645,
  'learning_rate': 0.040157865064727,
  'num_leaves': 57,
  'max_depth': 9,
  'min_child_samples': 70,
  'subsample': 0.8111116983451957,
  'colsample_bytree': 0.7652835045788573})

In [6]:
best_params = study.best_params

final_lgbm = Pipeline([
    ("imputer", SimpleImputer(strategy="median")),
    ("model", LGBMClassifier(
        **best_params,
        class_weight="balanced",
        random_state=42,
        n_jobs=-1
    ))
])

final_lgbm.fit(X_train, y_train)

 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.


[LightGBM] [Info] Number of positive: 25, number of negative: 391
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001107 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12938
[LightGBM] [Info] Number of data points in the train set: 416, number of used features: 120
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000


In [7]:
import numpy as np

y_probs = final_lgbm.predict_proba(X_val)[:, 1]

thresholds = np.linspace(0.1, 0.9, 50)

results = []

from sklearn.metrics import precision_score, recall_score

for t in thresholds:
    y_pred = (y_probs >= t).astype(int)
    results.append({
        "threshold": t,
        "precision": precision_score(y_val, y_pred),
        "recall": recall_score(y_val, y_pred),
    })

results_df = pd.DataFrame(results)
results_df.sort_values(by="precision", ascending=False).head(10)

 'sensor_4_roll_std_1' 'sensor_5_roll_std_1' 'sensor_6_roll_std_1'
 'sensor_7_roll_std_1' 'sensor_8_roll_std_1' 'sensor_9_roll_std_1'
 'sensor_10_roll_std_1' 'sensor_11_roll_std_1' 'sensor_12_roll_std_1'
 'sensor_13_roll_std_1' 'sensor_14_roll_std_1' 'sensor_15_roll_std_1'
 'sensor_16_roll_std_1' 'sensor_17_roll_std_1' 'sensor_18_roll_std_1'
 'sensor_19_roll_std_1' 'sensor_20_roll_std_1' 'sensor_21_roll_std_1']. At least one non-missing value is needed for imputation with strategy='median'.


Unnamed: 0,threshold,precision,recall
0,0.1,1.0,0.64
1,0.116327,1.0,0.64
2,0.132653,1.0,0.64
3,0.14898,1.0,0.64
4,0.165306,1.0,0.64
5,0.181633,1.0,0.64
6,0.197959,1.0,0.64
7,0.214286,1.0,0.64
8,0.230612,1.0,0.64
9,0.246939,1.0,0.64


In [8]:
import joblib

joblib.dump(final_lgbm, "factoryguard_final_model.joblib")


['factoryguard_final_model.joblib']