In [1]:
import numpy as np

import lightgbm as lgb

from sklearn.datasets import make_classification

from sklearn.model_selection import train_test_split

from sklearn.metrics import log_loss

# from bayes_opt import BayesianOptimization

In [2]:
# Generate synthetic dataset

X,y = make_classification(n_samples=5000, n_features=20, random_state=42)

In [3]:
# Split the data

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2,random_state=42)

In [4]:
# Convert to LightGBM dataset


train_data = lgb.Dataset(X_train, label=y_train)

test_data = lgb.Dataset(X_test, label=y_test)

1. Custom Loss Functions in LightGBM

By default, LightGBM uses predefined loss functions like binary log loss or MSE.
But we can define our own custom loss function to guide optimization.


define a custom log loss function and its gradient & hessian for LightGBM.

In [5]:
# Custom Log Loss function (for binary classification)

def custom_log_loss(y_true,y_pred):

    eps = 1e-15

    y_pred = np.clip(y_pred, eps, 1-eps)

    # Gradient (first derivative)

    grad = y_pred - y_true

    # Hessian (second derivative)

    hess = y_pred * (1 - y_pred)

    return grad,hess

In [6]:
# Train model with custom loss
params = {


    'objective': 'binary',
    'metric': 'binary_logloss',
    'learning_rate': 0.05,
    'num_leaves': 31

    
}

In [7]:
X_train = X_train.astype(np.float32)

X_test = X_test.astype(np.float32)

In [8]:
model = lgb.train(
    params, 
    train_data, 
    valid_sets=[test_data], 
    num_boost_round=200,
    # fobj=custom_log_loss,

    callbacks=[
        lgb.early_stopping(stopping_rounds=30),  
        lgb.log_evaluation(period=50)  
    ]
    )

[LightGBM] [Info] Number of positive: 2008, number of negative: 1992
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000592 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5100
[LightGBM] [Info] Number of data points in the train set: 4000, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.502000 -> initscore=0.008000
[LightGBM] [Info] Start training from score 0.008000
Training until validation scores don't improve for 30 rounds
[50]	valid_0's binary_logloss: 0.217242
[100]	valid_0's binary_logloss: 0.200936
Early stopping, best iteration is:
[76]	valid_0's binary_logloss: 0.20056


In [9]:
# Predictions

y_pred = model.predict(X_test)

In [10]:
loss = log_loss(y_test, y_pred)

print(f"Custom Log Loss: {loss:.4f}")

Custom Log Loss: 0.2006


 2. Bayesian Optimization for Hyperparameter Tuning
 
 
 Instead of grid search or random search, we use Bayesian Optimization for smarter tuning.
It learns from past trials and selects better hyperparameters efficiently.

In [11]:
# Define objective function

def lgb_evaluate(num_leaves, learning_rate, max_depth):

    hyper_params = {
        
        'objective' : 'binary',
        'metric'    : 'binary_error',
        'boosting_type':'gbdt',
        'num_leaves' : int(num_leaves),
        'learning_rate': learning_rate,
        'max_depth': int(max_depth),
        'verbose': -1
    }

    hyper_model = lgb.train(
    params, 
    train_data, 
    valid_sets=[test_data], 
    num_boost_round=100, 
    callbacks=[
        lgb.early_stopping(stopping_rounds=10),  
        lgb.log_evaluation(period=False)  
    ]
    )

    hyper_preds = hyper_model.predict(X_test)

    hyper_accuracy = np.mean(( hyper_preds > 0.5) == y_test)

    return hyper_accuracy


In [12]:
# Define search space

