# Logistic Regression parameter search

Duncan Tulimieri

In [1]:
# import libraries 
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 16}) 
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
import warnings 
warnings.filterwarnings('ignore')
import time 
import seaborn as sns
# personal classes
from ProcessData import ProcessForestData
import savingfigR as sf

In [2]:
class pLogisticRegression(ProcessForestData):
    
    penalty_options = ['l1', 'l2', 'elasticnet', 'none']
    C_options = np.linspace(0.01, 1, 5)
    intercept_options = [True, False]
    l1_ratio_options = np.linspace(0, 1, 5)
    
    def __init__(self):
        # method calls 
        self.X_train, self.X_test, self.y_train, self.y_test = self.load_data(perform_scale=True, sub_data_section='')
        self.un_classifiers = np.unique(self.y_train)
        # Raw data 
        start = time.time()
        LR_trained_opt = self.optimize_LogisticRegression_params(self.X_train, self.y_train, self.penalty_options, self.C_options, self.intercept_options, self.l1_ratio_options)
        LR_score = self.score_LogisticRegression(LR_trained_opt, self.X_test, self.y_test)
        end = time.time()
        print(f'Raw data LogisticRegression optimal score = {LR_score}')
        print(f'Time taken = {end-start}')

    # model
    def train_LogisticRegression(self, X, y, penalty, C, fit_B0, l1_ratio):
        if penalty == 'elasticnet':
            return LogisticRegression(penalty=penalty, C=C, fit_intercept=fit_B0, l1_ratio=l1_ratio, n_jobs=4, solver='saga').fit(X, y)
        else: 
            return LogisticRegression(penalty=penalty, C=C, fit_intercept=fit_B0, n_jobs=4, solver='saga').fit(X, y)

    def score_LogisticRegression(self, trained_LogisticRegression_model, X_test, y_test):
        return trained_LogisticRegression_model.score(X_test, y_test)

    def predict_LogisticRegression(self, trained_LogisticRegression_model, X_test):
        return trained_LogisticRegression_model.predict(X_test)

    def optimize_LogisticRegression_params(self, X_train, y_train, penalty_options=penalty_options, C_options=C_options, intercept_options=intercept_options, l1_ratio_options=l1_ratio_options, cv=10, scoring='accuracy'):
        LogisticRegression_raw = LogisticRegression()
        cv_train_model = GridSearchCV(LogisticRegression_raw, param_grid={'penalty':penalty_options, 'C': C_options, 'fit_intercept':intercept_options, 'l1_ratio':l1_ratio_options}, cv=cv, scoring=scoring).fit(X_train, y_train)
        print(f'Best LogisticRegression parameters: penalty = {cv_train_model.best_params_["penalty"]}, C = {cv_train_model.best_params_["C"]}, fit_intercept = {cv_train_model.best_params_["fit_intercept"]}, l1_ratio = {cv_train_model.best_params_["l1_ratio"]}')
        best_model = self.train_LogisticRegression(X_train, y_train, penalty=cv_train_model.best_params_["penalty"], C=cv_train_model.best_params_["C"], fit_B0=cv_train_model.best_params_["fit_intercept"], l1_ratio=cv_train_model.best_params_["l1_ratio"])
        return best_model    

In [3]:
testLR = pLogisticRegression()

Best LogisticRegression parameters: penalty = l2, C = 0.7525, fit_intercept = True, l1_ratio = 0.0
Raw data LogisticRegression optimal score = 0.7166304078429213
Time taken = 14484.176457881927
