In [4]:
# load packages
import numpy as np
import pandas as pd
import random
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

# Part 1: Logistic Regression Classifier

In this section, you will get some practice working with regularisation and hyperparameter tuning in prediction models. You will use a python DIGITS dataset to complete this homework. 

In [7]:
#loading the DIGITS dataset
digits = load_digits()

#training the dataset –– I choose to build the array of c_values from 10^-5 to 10^5 
x_train, x_test, y_train, y_test = train_test_split(digits.data, digits.target, test_size=0.20, random_state=0)
c_vals = np.array([0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0, 10000.0, 100000.0])
skf = StratifiedKFold(n_splits=10)
split_indices = skf.split(x_train, y_train)
test_l1 = []
test_l2 = []
i = 0

In [8]:
#textual representation of the improvements on the test data
for train_index, test_index in split_indices:
    print("~~~~~~~~~~~~C=" + str(c_vals[i]) + '~~~~~~~~~~~~')
    LogRegL1 = LogisticRegression(penalty='l1', C=c_vals[i])
    LogRegL1.fit(x_train[train_index], y_train[train_index])
    train_acc_l1 = LogRegL1.score(x_train[train_index], y_train[train_index])
    test_acc_l1 = LogRegL1.score(x_train[test_index], y_train[test_index])
    test_l1.append(test_acc_l1)
    print("Logistic Regression L1 Train Accuracy: ", train_acc_l1)
    print("Logistic Regression L1 Test Accuracy: ", test_acc_l1)
    print("Confusion Matrix for Train: \n", confusion_matrix(y_train, LogRegL1.predict(x_train)))
    print("Confusion Matrix for Test: \n", confusion_matrix(y_test, LogRegL1.predict(x_test)))

    LogRegL2 = LogisticRegression(penalty='l2', C=c_vals[i])
    LogRegL2.fit(x_train[train_index], y_train[train_index])
    train_acc_l2 = LogRegL2.score(x_train[train_index], y_train[train_index])
    test_acc_l2 = LogRegL2.score(x_train[test_index], y_train[test_index])
    test_l2.append(test_acc_l2)
    print("Logistic Regression L2 Train Accuracy: ", train_acc_l2)
    print("Logistic Regression L2 Test Accuracy: ", test_acc_l2)
    print("Confusion Matrix for Train: \n", confusion_matrix(y_train, LogRegL2.predict(x_train)))
    print("Confusion Matrix for Test: \n", confusion_matrix(y_test, LogRegL2.predict(x_test)))
    
    i += 1

~~~~~~~~~~~~C=1e-05~~~~~~~~~~~~
Logistic Regression L1 Train Accuracy:  0.1048136645962733
Logistic Regression L1 Test Accuracy:  0.10738255033557047
Confusion Matrix for Train: 
 [[151   0   0   0   0   0   0   0   0   0]
 [147   0   0   0   0   0   0   0   0   0]
 [141   0   0   0   0   0   0   0   0   0]
 [154   0   0   0   0   0   0   0   0   0]
 [151   0   0   0   0   0   0   0   0   0]
 [142   0   0   0   0   0   0   0   0   0]
 [137   0   0   0   0   0   0   0   0   0]
 [140   0   0   0   0   0   0   0   0   0]
 [135   0   0   0   0   0   0   0   0   0]
 [139   0   0   0   0   0   0   0   0   0]]
Confusion Matrix for Test: 
 [[27  0  0  0  0  0  0  0  0  0]
 [35  0  0  0  0  0  0  0  0  0]
 [36  0  0  0  0  0  0  0  0  0]
 [29  0  0  0  0  0  0  0  0  0]
 [30  0  0  0  0  0  0  0  0  0]
 [40  0  0  0  0  0  0  0  0  0]
 [44  0  0  0  0  0  0  0  0  0]
 [39  0  0  0  0  0  0  0  0  0]
 [39  0  0  0  0  0  0  0  0  0]
 [41  0  0  0  0  0  0  0  0  0]]
Logistic Regression L2 Train 

Logistic Regression L1 Train Accuracy:  0.9976816074188563
Logistic Regression L1 Test Accuracy:  0.951048951048951
Confusion Matrix for Train: 
 [[151   0   0   0   0   0   0   0   0   0]
 [  0 146   0   1   0   0   0   0   0   0]
 [  0   0 141   0   0   0   0   0   0   0]
 [  0   0   0 154   0   0   0   0   0   0]
 [  0   0   0   0 150   0   0   0   0   1]
 [  0   0   0   1   0 141   0   0   0   0]
 [  0   0   0   0   0   0 137   0   0   0]
 [  0   0   0   0   0   0   0 140   0   0]
 [  0   2   0   1   0   0   0   0 132   0]
 [  0   1   0   1   0   0   0   1   1 135]]
Confusion Matrix for Test: 
 [[27  0  0  0  0  0  0  0  0  0]
 [ 0 32  0  0  0  0  1  0  1  1]
 [ 0  0 34  2  0  0  0  0  0  0]
 [ 0  0  0 29  0  0  0  0  0  0]
 [ 0  0  0  0 30  0  0  0  0  0]
 [ 0  0  0  1  0 39  0  0  0  0]
 [ 0  0  0  0  0  0 44  0  0  0]
 [ 0  1  0  0  1  0  0 37  0  0]
 [ 0  3  1  0  0  0  0  0 35  0]
 [ 0  0  0  1  0  1  0  1  1 37]]
Logistic Regression L2 Train Accuracy:  0.9984544049459042
Logi

Logistic Regression L2 Train Accuracy:  0.9992289899768697
Logistic Regression L2 Test Accuracy:  0.9285714285714286
Confusion Matrix for Train: 
 [[151   0   0   0   0   0   0   0   0   0]
 [  0 145   0   0   0   0   0   0   2   0]
 [  0   0 141   0   0   0   0   0   0   0]
 [  0   0   0 153   0   0   0   1   0   0]
 [  0   1   0   0 149   0   0   0   0   1]
 [  0   0   0   1   0 140   0   0   0   1]
 [  0   0   0   0   0   0 137   0   0   0]
 [  0   0   0   0   0   0   0 139   0   1]
 [  0   2   0   0   0   0   0   0 132   1]
 [  0   0   0   0   0   0   0   0   0 139]]
Confusion Matrix for Test: 
 [[27  0  0  0  0  0  0  0  0  0]
 [ 0 29  0  0  0  0  1  0  5  0]
 [ 0  1 33  2  0  0  0  0  0  0]
 [ 0  0  0 28  0  1  0  0  0  0]
 [ 0  0  0  0 30  0  0  0  0  0]
 [ 0  1  0  0  0 38  0  0  0  1]
 [ 0  1  0  0  0  0 43  0  0  0]
 [ 0  2  0  0  0  0  0 37  0  0]
 [ 0  2  1  2  0  0  0  0 31  3]
 [ 0  0  0  2  0  1  0  0  2 36]]


In [None]:
#visual representation on the 
plt.figure(1)
plt.plot(c_vals, test_l1)
plt.figure(2)
plt.plot(c_vals, test_l2)
plt.show()