In [1]:
#necessary imports
import optuna
from optuna.samplers import TPESampler
from joblib import dump, load
from lightgbm import LGBMClassifier
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import make_pipeline as imblearn_make_pipeline
from sklearn.model_selection import cross_validate
from sklearn.metrics import make_scorer,precision_score,recall_score


In [2]:
#resampling method
rs=44
rus=RandomUnderSampler(random_state=rs)


In [3]:
#load saved pickled train set and columntransformer object for hyperparameter tunning
X_train=load('/content/drive/MyDrive/Datascience projects/Bank_term_deposit_subscription_prediction_project/X_train')
y_train=load('/content/drive/MyDrive/Datascience projects/Bank_term_deposit_subscription_prediction_project/y_train')
preprocessor=load('/content/drive/MyDrive/Datascience projects/Bank_term_deposit_subscription_prediction_project/preprocessor')


In [4]:
#make function to compute recall score
second_class_recall_function = {'2nd_class_recall_score': make_scorer(recall_score,average='binary',pos_label=1)}

In [5]:
# Set the random seed
sampler = TPESampler(seed=10)

# Define the objective function to minimize
def objective(trial):
    # Sample hyperparameters from the search space
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 75, 500),
        'max_depth': trial.suggest_int('max_depth', 1, 20),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 1),
        'subsample': trial.suggest_float('subsample', 0.1, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.1, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0, 10),
        'reg_lambda': trial.suggest_float('reg_lambda', 0, 10),
    }
    
    # Define the model to use
    model = LGBMClassifier(class_weight={0:0.4,1:0.6}, **params, random_state=rs, n_jobs=1)
    lgbm_model=imblearn_make_pipeline(preprocessor,rus,model)
    # Evaluate the model using cross-validation
    score = cross_validate(lgbm_model, X_train, y_train, cv=5, scoring=second_class_recall_function)
    
    # Return the negative score (since Optuna tries to minimize the objective function)
    return score['test_2nd_class_recall_score'].mean()

# Run the optimization
lgbm_study = optuna.create_study(direction='maximize', sampler=sampler)
lgbm_study.optimize(objective, n_trials=50)

# Print best score and parameters
print('\n')
best_score = lgbm_study.best_value
best_params = lgbm_study.best_params
print("Best score: ", best_score)
print('Best parameters:', best_params)

[32m[I 2023-05-25 19:01:22,534][0m A new study created in memory with name: no-name-c2757946-c087-40df-8a70-1e1fe66e7450[0m
[32m[I 2023-05-25 19:01:23,968][0m Trial 0 finished with value: 0.7132819160321128 and parameters: {'n_estimators': 403, 'max_depth': 1, 'learning_rate': 0.6373117525770127, 'subsample': 0.7739234942847507, 'colsample_bytree': 0.5486563110723314, 'reg_alpha': 2.2479664553084766, 'reg_lambda': 1.9806286475962398}. Best is trial 0 with value: 0.7132819160321128.[0m
[32m[I 2023-05-25 19:01:26,056][0m Trial 1 finished with value: 0.7343032725927162 and parameters: {'n_estimators': 398, 'max_depth': 4, 'learning_rate': 0.09745641603227016, 'subsample': 0.7168238365310176, 'colsample_bytree': 0.9580540115754429, 'reg_alpha': 0.039482663279144514, 'reg_lambda': 5.121922633857766}. Best is trial 1 with value: 0.7343032725927162.[0m
[32m[I 2023-05-25 19:01:27,053][0m Trial 2 finished with value: 0.7224430715428456 and parameters: {'n_estimators': 421, 'max_depth



Best score:  0.7413091821964571
Best parameters: {'n_estimators': 421, 'max_depth': 16, 'learning_rate': 0.9103669836149376, 'subsample': 0.6919498571754856, 'colsample_bytree': 0.8737560492190305, 'reg_alpha': 1.9589492800297248, 'reg_lambda': 6.012285238177178}
