In [1]:
import numpy as np
from scipy.optimize import minimize
from scipy.io import loadmat
from math import sqrt
import pandas as pd
import time
import pickle

In [2]:
def initializeWeights(n_in, n_out):
    """
    # initializeWeights return the random weights for Neural Network given the
    # number of node in the input layer and output layer

    # Input:
    # n_in: number of nodes of the input layer
    # n_out: number of nodes of the output layer
       
    # Output: 
    # W: matrix of random initial weights with size (n_out x (n_in + 1))"""

    epsilon = sqrt(6) / sqrt(n_in + n_out + 1)
    W = (np.random.rand(n_out, n_in + 1) * 2 * epsilon) - epsilon
    return W

In [3]:
def sigmoid(z):
    """# Notice that z can be a scalar, a vector or a matrix
    # return the sigmoid of input z"""
    sig = 1/(1+np.exp(-z))

    return  sig

In [6]:
def preprocess():
    """ Input:
     Although this function doesn't have any input, you are required to load
     the MNIST data set from file 'mnist_all.mat'.

     Output:
     train_data: matrix of training set. Each row of train_data contains 
       feature vector of a image
     train_label: vector of label corresponding to each image in the training
       set
     validation_data: matrix of training set. Each row of validation_data 
       contains feature vector of a image
     validation_label: vector of label corresponding to each image in the 
       training set
     test_data: matrix of training set. Each row of test_data contains 
       feature vector of a image
     test_label: vector of label corresponding to each image in the testing
       set

     Some suggestions for preprocessing step:
     - feature selection"""

    mat = loadmat('mnist_all.mat')  # loads the MAT object as a Dictionary

    # Split the training sets into two sets of 50000 randomly sampled training examples and 10000 validation examples. 
    # Your code here.
    #set up the required arrays
    train = np.zeros(shape=(50000,784)) #28*28=784
    validation = np.zeros(shape=(10000,784))
    test = np.zeros(shape=(10000,784))
    label_train = np.zeros(shape=(50000,))
    label_validation = np.zeros(shape=(10000,))
    label_test = np.zeros(shape=(10000,))
    #initializing to zero
    train_len = 0
    validation_len = 0
    test_len = 0
    label_train_len = 0
    label_validation_len = 0
    #in order to divide data into train,validation and test
    
    for key in mat:     #since it's a dictionary
        if "train" in key:    #for training data
            label = key[-1]    #storing the label e.g "train1"=1
            arr = mat.get(key)
            r = range(arr.shape[0])
            arrP = np.random.permutation(r)
            length = len(arr) - 1000 #leaving 1000 datapoints per label for validation
            
            train[train_len:train_len+length]=arr[arrP[1000:],:]
            train_len = train_len + length
            label_train[label_train_len:label_train_len+length]=label
            label_train_len = label_train_len + length
            #similarly for validation data
            validation[validation_len:validation_len+1000]=arr[arrP[0:1000],:]
            validation_len = validation_len+1000
            label_validation[label_validation_len:label_validation_len+1000]=label
            label_validation_len = label_validation_len+1000
        #for testing data    
        elif "test" in key:
            label = key[-1]    #storing the label e.g "train1"=1
            arr = mat.get(key)
            arr_len = len(arr)
            r = range(arr.shape[0])
            arrP = np.random.permutation(r)
            label_test[test_len:test_len+arr_len]=label
            test[test_len:test_len+arr_len]=arr[arrP]
            test_len = test_len+arr_len
           
            
            
    trainP = np.random.permutation(range(train.shape[0]))
    train_data = train[trainP]
    train_data = (np.double(train_data))/255
    train_label = label_train[trainP]    

    validationP = np.random.permutation(range(validation.shape[0]))
    validation_data = validation[validationP]
    validation_data = (np.double(validation_data))/255
    validation_label = label_validation[validationP]
    
    testP = np.random.permutation(range(test.shape[0]))
    test_data = test[testP]
    test_data = (np.double(test_data))/255
    test_label = label_test[testP]
    
        
    # Feature selection
    # Your code here.
    variance = np.var(train_data,axis=0)
    remCol = list(*np.where(variance == 0))
        
    train_data = np.delete(train_data, remCol, axis = 1)
    validation_data = np.delete(validation_data, remCol, axis = 1)  
    test_data = np.delete(test_data, remCol, axis = 1)
    #features = list(*np.where(variance != 0))
    print('preprocess done')

    return train_data, train_label, validation_data, validation_label, test_data, test_label#,features


In [7]:
def nnObjFunction(params, *args):
    """% nnObjFunction computes the value of objective function (negative log 
    %   likelihood error function with regularization) given the parameters 
    %   of Neural Networks, thetraining data, their corresponding training 
    %   labels and lambda - regularization hyper-parameter.

    % Input:
    % params: vector of weights of 2 matrices w1 (weights of connections from
    %     input layer to hidden layer) and w2 (weights of connections from
    %     hidden layer to output layer) where all of the weights are contained
    %     in a single vector.
    % n_input: number of node in input layer (not include the bias node)
    % n_hidden: number of node in hidden layer (not include the bias node)
    % n_class: number of node in output layer (number of classes in
    %     classification problem
    % training_data: matrix of training data. Each row of this matrix
    %     represents the feature vector of a particular image
    % training_label: the vector of truth label of training images. Each entry
    %     in the vector represents the truth label of its corresponding image.
    % lambda: regularization hyper-parameter. This value is used for fixing the
    %     overfitting problem.
       
    % Output: 
    % obj_val: a scalar value representing value of error function
    % obj_grad: a SINGLE vector of gradient value of error function
    % NOTE: how to compute obj_grad
    % Use backpropagation algorithm to compute the gradient of error function
    % for each weights in weight matrices.

    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % reshape 'params' vector into 2 matrices of weight w1 and w2
    % w1: matrix of weights of connections from input layer to hidden layers.
    %     w1(i, j) represents the weight of connection from unit j in input 
    %     layer to unit i in hidden layer.
    % w2: matrix of weights of connections from hidden layer to output layers.
    %     w2(i, j) represents the weight of connection from unit j in hidden 
    %     layer to unit i in output layer."""

    n_input, n_hidden, n_class, training_data, training_label, lambdaval = args

    w1 = params[0:n_hidden * (n_input + 1)].reshape((n_hidden, (n_input + 1)))
    w2 = params[(n_hidden * (n_input + 1)):].reshape((n_class, (n_hidden + 1)))
    obj_val = 0

    # Your code here
    bias = np.c_[np.ones(len(training_data)),training_data]
    o = np.c_[np.ones(len(sigmoid(np.dot(bias,np.transpose(w1))))),sigmoid(np.dot(bias,np.transpose(w1)))]

    # activation function
    op = sigmoid(np.dot(o,np.transpose(w2)))

    n =len(bias)
    # 1 to k coding
    y = training_label.astype(int) 
    y=np.eye(len(op[0]))[y]
    
     # for eq 8,9
    d= op-y
    err2 = np.dot(np.transpose(d),o)
    
    # for eq 10,11,12 

    err1 = np.dot(np.transpose(np.dot(d,w2)*(o*(1.0- o))),bias)
    
    # for eq 15
    obj_val = -np.sum(np.sum(y*np.log(op)+(1-y)*np.log(1-op),1))/n + lambdaval/2/n*(np.sum(w1**2)+np.sum(w2**2))
    
    # for eq 16,17
    grad_w1 = (err1[1:,:]+lambdaval*w1)/n
    grad_w2 = (err2+lambdaval*w2)/n
    # Make sure you reshape the gradient matrices to a 1D array. for instance if your gradient matrices are grad_w1 and grad_w2
    # you would use code similar to the one below to create a flat array
    #obj_grad = np.concatenate((grad_w1.flatten(), grad_w2.flatten()),0)
    obj_grad = np.array([])
    obj_grad = np.concatenate((grad_w1.flatten(), grad_w2.flatten()),0)
    return (obj_val, obj_grad)

In [8]:
def nnPredict(w1, w2, data):
    """% nnPredict predicts the label of data given the parameter w1, w2 of Neural
    % Network.

    % Input:
    % w1: matrix of weights of connections from input layer to hidden layers.
    %     w1(i, j) represents the weight of connection from unit i in input 
    %     layer to unit j in hidden layer.
    % w2: matrix of weights of connections from hidden layer to output layers.
    %     w2(i, j) represents the weight of connection from unit i in input 
    %     layer to unit j in hidden layer.
    % data: matrix of data. Each row of this matrix represents the feature 
    %       vector of a particular image
       
    % Output: 
    % label: a column vector of predicted labels"""

    #fwdfeed hl1
    n = data.shape[0]
    d = np.c_[np.ones(n),data]
    
    func = sigmoid(np.dot(d,np.transpose(w1)))  #activation func
    #at output layer
    func = np.c_[np.ones(func.shape[0]),func] 
    out= np.dot(func,np.transpose(w2)) 
    sig = sigmoid(out) #activation func o/p

    labels = np.array([])
    labels = np.argmax(sig,axis=1)

    return labels


In [11]:
"""**************Neural Network Script Starts here********************************"""

train_data, train_label, validation_data, validation_label, test_data, test_label = preprocess()

#  Train Neural Network

#iterirated using a for loop for different values of lambda and neurons,
#reults reported in report        
# set the number of nodes in input unit (not including bias unit)
n_input = train_data.shape[1]

# set the number of nodes in hidden unit (not including bias unit)
n_hidden = 20

# set the number of nodes in output unit
n_class = 20
# initialize the weights into some random matrices
initial_w1 = initializeWeights(n_input, n_hidden)
initial_w2 = initializeWeights(n_hidden, n_class)
# unroll 2 weight matrices into single column vector
initialWeights = np.concatenate((initial_w1.flatten(), initial_w2.flatten()), 0)
# set the regularization hyper-parameter
lambdaval = 0
#print("Neurons and lambda value ",n,",",l)
# Train Neural Network using fmin_cg or minimize from scipy,optimize module. Check documentation for a working example
args = (n_input, n_hidden, n_class, train_data, train_label, lambdaval)
opts = {'maxiter': 50}  # Preferred value.
st = time.time()
nn_params = minimize(nnObjFunction, initialWeights, jac=True, args=args, method='CG', options=opts)
en = time.time()
t = (en-st)
print("Training Time :",t)

        
# In Case you want to use fmin_cg, you may have to split the nnObjectFunction to two functions nnObjFunctionVal
# and nnObjGradient. Check documentation for this function before you proceed.
# nn_params, cost = fmin_cg(nnObjFunctionVal, initialWeights, nnObjGradient,args = args, maxiter = 50)


# Reshape nnParams from 1D vector into w1 and w2 matrices
w1 = nn_params.x[0:n_hidden * (n_input + 1)].reshape((n_hidden, (n_input + 1)))
w2 = nn_params.x[(n_hidden * (n_input + 1)):].reshape((n_class, (n_hidden + 1)))

# Test the computed parameters

predicted_label = nnPredict(w1, w2, train_data)

# find the accuracy on Training Dataset
trainA = 100 * np.mean((predicted_label == train_label))
print('\n Training set Accuracy:' + str(trainA) + '%')

#print('\n Training set Accuracy:' + str(100 * np.mean((predicted_label == train_label).astype(float))) + '%')

predicted_label = nnPredict(w1, w2, validation_data)

# find the accuracy on Validation Dataset
validationA = 100 * np.mean((predicted_label == validation_label))
print('\n Validation set Accuracy:' + str(validationA) + '%')

#print('\n Validation set Accuracy:' + str(100 * np.mean((predicted_label == validation_label).astype(float))) + '%')

predicted_label = nnPredict(w1, w2, test_data)

# find the accuracy on Validation Dataset
testA = 100 * np.mean((predicted_label == test_label))
print('\n Test set Accuracy:' + str(testA) + '%')


preprocess done
Training Time : 72.65696477890015

 Training set Accuracy:92.808%

 Validation set Accuracy:91.69%

 Test set Accuracy:92.54%
