# Tuning Logistic Regression

In [15]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
import pandas as pd 
import numpy as np 
import optuna

In [16]:
xtrain = pd.read_csv('scaled_data/xtrain.csv', index_col = 0)
xtest = pd.read_csv('scaled_data/xtest.csv', index_col = 0)
ytrain = pd.read_csv('scaled_data/ytrain.csv', index_col = 0)
ytest = pd.read_csv('scaled_data/ytest.csv', index_col = 0)

In [17]:
ytest = ytest.to_numpy().flatten()
ytest.shape

(172572,)

In [18]:
ytrain = ytrain.to_numpy().flatten()
ytrain.shape

(402668,)

In [25]:
# define the objective function
def objective(trial: optuna.Trial, 
              xtrain: pd.DataFrame, 
              ytrain: pd.DataFrame, 
              xtest: pd.DataFrame, 
              ytest: pd.DataFrame) -> float:
    
    # Suggest hyperparameters to tune
    penalty = trial.suggest_categorical('penalty', ['l1', 'l2'])
    C = trial.suggest_loguniform('C', 1e-5, 1e5)
    # solver = trial.suggest_categorical('solver', ['newton-cg', 'liblinear'])
    max_iter = trial.suggest_int('max_iter', 50, 500)
    class_weight = trial.suggest_categorical('class_weight', [None, 'balanced'])

    
    # Create the model with the suggested hyperparameters
    model = LogisticRegression(
        penalty=penalty,
        C=C,
        solver=solver,
        max_iter=max_iter,
        class_weight=class_weight
    )
    
    # Train the model
    model.fit(xtrain, ytrain)

    y_pred_proba = model.predict_proba(xtest)

    roc_auc = roc_auc_score(ytest, y_pred_proba[:, 1])

    return float(roc_auc)

In [None]:
# Create a study object and specify the optimization direction
study = optuna.create_study(direction='maximize')

# Optimize the objective function
study.optimize(lambda trial: objective(trial, 
                                       xtrain = xtrain, 
                                       xtest = xtest,
                                       ytrain = ytrain,
                                       ytest = ytest), n_trials=100)

# Print the best hyperparameters
print('Best hyperparameters:', study.best_params)
print('Best score:', study.best_value)

In [27]:
hyperparams = {
    'penalty': 'l2', 
    'C': 0.3255323004350203, 
    'solver': 'liblinear', 
    'max_iter': 486, 
    'class_weight': 'balanced'
    }