# Import Statements

In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import pickle
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

# Load Data

In [2]:
file = open('../../processed_data.pkl', 'rb')
data = pickle.load(file)
file.close()

In [3]:
train_x, train_y, test_x, test_y = data['train_x'], data['train_y'], data['test_x'], data['test_y']

# Logistic Regression

In [4]:
from sklearn.linear_model import LogisticRegression

## Logistic Regression with no regularization

In [5]:
logistic_regression_model = LogisticRegression(penalty='none', fit_intercept=True, random_state=0)

In [6]:
logistic_regression_model.fit(train_x, train_y)
logistic_regression_model_predictions = logistic_regression_model.predict(test_x)

In [7]:
print('Train Accuracy: {} %'.format(100*logistic_regression_model.score(train_x, train_y)))
print('Test Accuracy: {} %'.format(100*logistic_regression_model.score(test_x, test_y)))

Train Accuracy: 88.54724745710071 %
Test Accuracy: 88.1055900621118 %


## Logistic Regression with L1 Regularization

In [8]:
logistic_regression_l1_model = LogisticRegression(penalty='l1', solver='liblinear', fit_intercept=True, random_state=0)

In [9]:
logistic_regression_l1_model.fit(train_x, train_y)
logistic_regression_l1_model_predictions = logistic_regression_l1_model.predict(test_x)

In [10]:
print('Train Accuracy: {} %'.format(100*logistic_regression_l1_model.score(train_x, train_y)))
print('Test Accuracy: {} %'.format(100*logistic_regression_l1_model.score(test_x, test_y)))

Train Accuracy: 88.56277661309107 %
Test Accuracy: 88.1055900621118 %


# Logistic Regression with L2 Regularization

In [11]:
logistic_regression_l2_model = LogisticRegression(penalty='l2', fit_intercept=True, random_state=0)

In [12]:
logistic_regression_l2_model.fit(train_x, train_y)
logistic_regression_l2_model_predictions = logistic_regression_l2_model.predict(test_x)

In [13]:
print('Train Accuracy: {} %'.format(100*logistic_regression_l2_model.score(train_x, train_y)))
print('Test Accuracy: {} %'.format(100*logistic_regression_l2_model.score(test_x, test_y)))

Train Accuracy: 88.56277661309107 %
Test Accuracy: 88.1055900621118 %


# Logistic Regression with Both L1 and L2 Regularization

In [14]:
l1_ratio_list = [0.1, 0.25, 0.5, 0.75, 0.9]

In [15]:
for l1_ratio in l1_ratio_list:
    logistic_regression_elasticnet_model = LogisticRegression(penalty='elasticnet', solver='saga', l1_ratio=l1_ratio, fit_intercept=True, random_state=0)
    
    logistic_regression_elasticnet_model.fit(train_x, train_y)
    logistic_regression_elasticnet_model_predictions = logistic_regression_elasticnet_model.predict(test_x)
    
    print('Train Accuracy: {} %'.format(100*logistic_regression_elasticnet_model.score(train_x, train_y)))
    print('Test Accuracy: {} %'.format(100*logistic_regression_elasticnet_model.score(test_x, test_y)))
    print('\n')

Train Accuracy: 88.56277661309107 %
Test Accuracy: 88.1055900621118 %


Train Accuracy: 88.57054119108626 %
Test Accuracy: 88.1055900621118 %


Train Accuracy: 88.58607034707664 %
Test Accuracy: 88.1055900621118 %


Train Accuracy: 88.58607034707664 %
Test Accuracy: 88.1055900621118 %


Train Accuracy: 88.58607034707664 %
Test Accuracy: 88.1055900621118 %


