In [20]:
#Imports 
import numpy as np #Represent ndarrays a.k.a. tensors
import matplotlib.pyplot as plt #For plotting
np.random.seed(0) #For repeatability of the experiment
import pickle #To read data for this experiment
import _pickle as cPickle
import gzip
from sklearn.cross_validation import StratifiedKFold


# Third-party libraries
import numpy as np

#Setup
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

**Data**

In [16]:

#Read data
def load_data():
    with gzip.open('mnist.pkl.gz', 'rb') as f:
        training_data, validation_data, test_data = cPickle.load(f, encoding="latin-1")
    f.close()
    return (training_data, validation_data, test_data)

def load_data_wrapper():
    tr_d, va_d, te_d = load_data()
    training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
    training_results = [vectorized_result(y) for y in tr_d[1]]
    training_data = np.array(list(zip(training_inputs, training_results)))
    validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
    validation_data = np.array(list(zip(validation_inputs, va_d[1])))
    test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
    test_data = np.array(list(zip(test_inputs, te_d[1])))
    return (training_data, validation_data, test_data)

def vectorized_result(j):
    e = np.zeros((10, 1))
    e[j] = 1.0
    return e

training_data, validation_data, test_data = load_data_wrapper()

training_data_nolab = np.delete(training_data,[1],axis = 1)
training_labels = np.array(np.delete(training_data,[0],axis=1))
training_labels_list = []
training_data_nolab_list = np.zeros(shape=(50000,784))

for i in range(len(training_labels)):
    list.append(training_labels_list, np.argmax(training_labels[i,0]))

for i in range(len(training_data_nolab)):
    training_data_nolab_list[i] = np.array(np.ravel(training_data_nolab[i,0]))

Xtr = training_data_nolab_list
ytr = np.squeeze(training_labels_list)

test_data_nolab = np.delete(test_data,[1],axis = 1)
test_labels = np.array(np.delete(test_data,[0],axis=1))
test_labels_list = []
test_data_nolab_list = np.zeros(shape=(10000,784))

for i in range(len(test_labels)):
    list.append(test_labels_list, np.argmax(test_labels[i,0]))

for i in range(len(test_data_nolab)):
    test_data_nolab_list[i] = np.array(np.ravel(test_data_nolab[i,0]))

Xtst = test_data_nolab_list
ytst = np.squeeze(test_labels_list)


#### Model

In [17]:
# cross validation scheme
def cross_validate_classifier(Xtr, ytr, num_of_folds):
    # create stratified k folds of dataset for cross validation
    skf = StratifiedKFold(ytr, n_folds=num_of_folds,random_state=0)
    # store predicted accuracies of each fold
    CV_pred_accuracies = []

    for train_index, valid_index in skf:
        X_train, X_valid = Xtr[train_index], Xtr[valid_index]
        y_train, y_valid = ytr[train_index], ytr[valid_index]
        W, b = train_linear_classifier(X_train, y_train, 8e-1, 2e-3, 100)
        test_accuracy = test_linear_classifier(X_valid, y_valid, W, b)
        list.append(CV_pred_accuracies,test_accuracy)
        print("cumulative CV accuracy: ", np.mean(CV_pred_accuracies),"\n")
    return W, b

# Linear model    
def train_linear_classifier(X_train, y_train, step_size, reg, gd_iters):    
    #Define some local varaibles
    D = X.shape[1] #Number of features 28x28 = 784 for mnist
    K = 10 #Number of classes assuming class index starts from 0

    # Start with an initialize parameters randomly
    W = 0.01 * np.random.randn(D,K)
    b = np.zeros((1,K))

    #For simplicity we will take the batch size to be the same as number of examples
    num_examples = X.shape[0]
    
    print("reg param:",reg)
    print("step size:",step_size)
    print("iterations:",gd_iters)
    
    # gradient descent loop
    for i in range(gd_iters):

        # evaluate class scores, [N x K]
        scores = np.dot(X, W) + b
        #print("score values")
        #print(scores[:10])
        #print("length:",len(scores))

        # compute the class probabilities
        exp_scores = np.exp(scores)
        # print("score values")
        # print(exp_scores[:10])
        probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True) # [N x K]


        # compute the loss: average cross-entropy loss and regularization
        corect_logprobs = -np.log(probs[range(num_examples),y])
        data_loss = np.sum(corect_logprobs)/num_examples
        reg_loss = 0.5*reg*np.sum(W*W)
        loss = data_loss + reg_loss
        if i % 50 == 0:
            print("iteration:",i, " loss:",loss)

        # compute the gradient on scores
        dscores = probs
        dscores[range(num_examples),y] -= 1
        dscores /= num_examples

        # backpropate the gradient to the parameters (W,b)
        dW = np.dot(X.T, dscores)
        db = np.sum(dscores, axis=0, keepdims=True)

        dW += reg*W # regularization gradient

        # perform a parameter update
        W += -step_size * dW
        b += -step_size * db
    return W, b

In [18]:
def test_linear_classifier(X_data, y_data, W, b):
    # Post-training: evaluate validation set accuracy
    scores = np.dot(X_data, W) + b
    predicted_class = np.argmax(scores, axis=1)
    test_accuracy = (np.mean(predicted_class == y_data))
    return test_accuracy

**Post Training**

In [22]:
# classifier trained using 5-fold cross validation
W_tr, b_tr = cross_validate_classifier(Xtr, ytr, 5)

# testing the performance on hold out set
test_accuracy = test_linear_classifier(Xtst, ytst, W_tr, b_tr)
print("test accuracy: ",test_accuracy,"\n")

#plotting model result
#plot_result(Xtst, ytst, W_tr, b_tr)

#using several different values for the hyper parameter:
step_size_values = list(np.arange(0,1,0.1))  #Also called learning rate
step_size_values = np.append(step_size_values, list(np.arange(1,10,1)))

test_results = [0] * len(step_size_values)

for i, step_size in enumerate(step_size_values):
    W_tr, b_tr = train_linear_classifier(Xtr, ytr, step_size, 1e-3, 100)
    test_result = test_linear_classifier(Xtst, ytst, W_tr, b_tr)
    print("test accuracy: ",test_result,"\n")
    test_results[i] = test_result

plt.plot(step_size_values,test_results,'-')
plt.axis([0, max(step_size_values)+1, 0, 1])
plt.ylabel('test accuracy')
plt.xlabel('learning rate')
plt.show()

# best value for regularization
# we keep the learning rate to be constant
# and alter the reg parameter
reg_values = list([0.001, 0.005, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 1])

test_results = [0] * len(reg_values)

for i, reg_value in enumerate(reg_values):
    W_tr, b_tr = train_linear_classifier(Xtr, ytr, 1, reg_value, 100)
    test_result = test_linear_classifier(Xtst, ytst, W_tr, b_tr)
    print("test accuracy: ",test_result,"\n")
    test_results[i] = test_result

plt.plot(reg_values,test_results,'-')
plt.axis([0, max(reg_values), 0, 1])
plt.ylabel('test accuracy')
plt.xlabel('regularization value')
plt.show()

# modifying the number of gradient descent iterations
# reg = 0.4, step_size = 1
gd_iters_vals = list([100, 500, 1000, 2000, 5000, 10000])

test_results = [0] * len(gd_iters_vals)

for i, gd_iters in enumerate(gd_iters_vals):
    W_tr, b_tr = train_linear_classifier(Xtr, ytr, 1, 0.4, gd_iters)
    test_result = test_linear_classifier(Xtst, ytst, W_tr, b_tr)
    print("test accuracy: ",test_result,"\n")
    test_results[i] = test_result

plt.plot(gd_iters_vals,test_results,'-')
plt.axis([0, max(gd_iters_vals), 0, 1])
plt.ylabel('test accuracy')
plt.xlabel('no of iterations')
plt.show()


reg param: 0.002
step size: 0.8
iterations: 100
iteration: 0  loss: 2.3212349842
iteration: 50  loss: 0.452181966512
cumulative CV accuracy:  0.905347326337 

reg param: 0.002
step size: 0.8
iterations: 100
iteration: 0  loss: 2.31259563997
iteration: 50  loss: 0.452227981408
cumulative CV accuracy:  0.901983801141 

reg param: 0.002
step size: 0.8
iterations: 100
iteration: 0  loss: 2.3074130784
iteration: 50  loss: 0.452242550683
cumulative CV accuracy:  0.901689200761 

reg param: 0.002
step size: 0.8
iterations: 100
iteration: 0  loss: 2.31483938144
iteration: 50  loss: 0.452032424269
cumulative CV accuracy:  0.899533880664 

reg param: 0.002
step size: 0.8
iterations: 100
iteration: 0  loss: 2.31109355176
iteration: 50  loss: 0.452306275297
cumulative CV accuracy:  0.898958837225 

test accuracy:  0.1028 

reg param: 0.001
step size: 0.0
iterations: 100
iteration: 0  loss: 2.30630667265
iteration: 50  loss: 2.30630667265
test accuracy:  0.4162 

reg param: 0.001
step size: 0.1
ite



iteration: 50  loss: 10.1716859914
test accuracy:  0.0926 

reg param: 0.001
step size: 25.0
iterations: 100
iteration: 0  loss: 2.28446734615


KeyboardInterrupt: 

In [None]:
#using several different values for the hyper parameter:
step_size_values = list(np.arange(0,1,0.1))  #Also called learning rate
step_size_values = np.append(step_size_values, list(np.arange(1,10,1)))

test_results = [0] * len(step_size_values)

for i, step_size in enumerate(step_size_values):
    W_tr, b_tr = train_linear_classifier(Xtr, ytr, step_size, 1e-3, 100)
    test_result = test_linear_classifier(Xtst, ytst, W_tr, b_tr)
    print("test accuracy: ",test_result,"\n")
    test_results[i] = test_result

plt.plot(step_size_values,test_results,'-')
plt.axis([0, max(step_size_values)+1, 0, 1])
plt.ylabel('test accuracy')
plt.xlabel('learning rate')
plt.show()

# best value for regularization
# we keep the learning rate to be constant
# and alter the reg parameter
reg_values = list([0.001, 0.005, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 1])

test_results = [0] * len(reg_values)

for i, reg_value in enumerate(reg_values):
    W_tr, b_tr = train_linear_classifier(Xtr, ytr, 1, reg_value, 100)
    test_result = test_linear_classifier(Xtst, ytst, W_tr, b_tr)
    print("test accuracy: ",test_result,"\n")
    test_results[i] = test_result

plt.plot(reg_values,test_results,'-')
plt.axis([0, max(reg_values), 0, 1])
plt.ylabel('test accuracy')
plt.xlabel('regularization value')
plt.show()

# modifying the number of gradient descent iterations
# reg = 0.4, step_size = 1
gd_iters_vals = list([100, 500, 1000, 2000, 5000, 10000])

test_results = [0] * len(gd_iters_vals)

for i, gd_iters in enumerate(gd_iters_vals):
    W_tr, b_tr = train_linear_classifier(Xtr, ytr, 1, 0.4, gd_iters)
    test_result = test_linear_classifier(Xtst, ytst, W_tr, b_tr)
    print("test accuracy: ",test_result,"\n")
    test_results[i] = test_result

plt.plot(gd_iters_vals,test_results,'-')
plt.axis([0, max(gd_iters_vals), 0, 1])
plt.ylabel('test accuracy')
plt.xlabel('no of iterations')
plt.show()

reg param: 0.001
step size: 0.0
iterations: 100
iteration: 0  loss: 2.31133163909
iteration: 50  loss: 2.31133163909
test accuracy:  0.0263 

reg param: 0.001
step size: 0.1
iterations: 100
iteration: 0  loss: 2.32093909799
iteration: 50  loss: 0.815655393825
test accuracy:  0.1043 

reg param: 0.001
step size: 0.2
iterations: 100
iteration: 0  loss: 2.30834819475
iteration: 50  loss: 0.62486140807
test accuracy:  0.1034 

reg param: 0.001
step size: 0.3
iterations: 100
iteration: 0  loss: 2.31290299124
iteration: 50  loss: 0.549981015521
test accuracy:  0.1034 

reg param: 0.001
step size: 0.4
iterations: 100
iteration: 0  loss: 2.29644878086
iteration: 50  loss: 0.508139334811
test accuracy:  0.1037 

reg param: 0.001
step size: 0.5
iterations: 100
iteration: 0  loss: 2.30281638713
