In [4]:
# Importing the required libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import pickle

# Sigmoid is used as the activation function
def sigmoid(x):
    x = np.array(x, dtype=np.float128())
    return 1/(1 + np.exp(-x))

# Derivative of the sigmoid function
def sigmoid_prime(x):
    return sigmoid(x)*(1.0 - sigmoid(x))

# Setting up Neural Network
class NeuralNetwork(object):
    
    def __init__(self, architecture):
        #architecture - numpy array with ith element representing the number of neurons in the ith layer.
        
        #Initialize the network architecture
        self.L = architecture.size - 1 #L corresponds to the last layer of the network.
        self.n = architecture #n stores the number of neurons in each layer
        #input_size is the number of neurons in the first layer i.e. n[0]
        #output_size is the number of neurons in the last layer i.e. n[L]
        
        #Parameters will store the network parameters, i.e. the weights and biases
        self.parameters = {}
        
        #Initialize the network weights and biases:
        for i in range (1, self.L + 1): 
            #Initialize weights to small random values
            self.parameters['W' + str(i)] = np.random.randn(self.n[i], self.n[i - 1]) * 0.01
            
            #Initialize rest of the parameters to 1
            self.parameters['b' + str(i)] = np.ones((self.n[i], 1))
            self.parameters['z' + str(i)] = np.ones((self.n[i], 1))
            self.parameters['a' + str(i)] = np.ones((self.n[i], 1))
        
        #As we started the loop from 1, we haven't initialized a[0]:
        self.parameters['a0'] = np.ones((self.n[i], 1))
        
        #Initialize the cost:
        self.parameters['C'] = 1
        
        #Create a dictionary for storing the derivatives:
        self.derivatives = {}
                    
    def forward_propagate(self, X):
        #Note that X here, is just one training example
        self.parameters['a0'] = X
        
        #Calculate the activations for every layer l
        for l in range(1, self.L + 1):
            self.parameters['z' + str(l)] = np.add(np.dot(self.parameters['W' + str(l)], self.parameters['a' + str(l - 1)]), self.parameters['b' + str(l)])
            self.parameters['a' + str(l)] = sigmoid(self.parameters['z' + str(l)])
        
    def compute_cost(self, y):
        self.parameters['C'] = -(y*np.log(self.parameters['a' + str(self.L)]) + (1-y)*np.log( 1 - self.parameters['a' + str(self.L)]))
    
    def compute_derivatives(self, y):
        #Partial derivatives of the cost function with respect to z[L], W[L] and b[L]:        
        #dzL
        self.derivatives['dz' + str(self.L)] = self.parameters['a' + str(self.L)] - y
        #dWL
        self.derivatives['dW' + str(self.L)] = np.dot(self.derivatives['dz' + str(self.L)], np.transpose(self.parameters['a' + str(self.L - 1)]))
        #dbL
        self.derivatives['db' + str(self.L)] = self.derivatives['dz' + str(self.L)]

        #Partial derivatives of the cost function with respect to z[l], W[l] and b[l]
        for l in range(self.L-1, 0, -1):
            self.derivatives['dz' + str(l)] = np.dot(np.transpose(self.parameters['W' + str(l + 1)]), self.derivatives['dz' + str(l + 1)])*sigmoid_prime(self.parameters['z' + str(l)])
            self.derivatives['dW' + str(l)] = np.dot(self.derivatives['dz' + str(l)], np.transpose(self.parameters['a' + str(l - 1)]))
            self.derivatives['db' + str(l)] = self.derivatives['dz' + str(l)]
            
    def update_parameters(self, alpha):
        for l in range(1, self.L+1):
            self.parameters['W' + str(l)] -= alpha*self.derivatives['dW' + str(l)]
            self.parameters['b' + str(l)] -= alpha*self.derivatives['db' + str(l)]
        
    def predict(self, x):
        self.forward_propagate(x)
        return self.parameters['a' + str(self.L)]
        
    def fit(self, X, Y, num_iter, alpha = 0.01):
        for iter in range(0, num_iter):
            c = 0 #Stores the cost
            n_c = 0 #Stores the number of correct predictions
            
            for i in range(0, X.shape[0]):
                x = X[i].reshape((X[i].size, 1))
                y = Y[i].reshape(-1, 1) # convert shape from (10, ) to (10, 1)

                self.forward_propagate(x)
                self.compute_cost(y)
                self.compute_derivatives(y)
                self.update_parameters(alpha)

                c += self.parameters['C']

                y_pred = self.predict(x)
                #y_pred is the probability, so to convert it into a class value:
                #y_pred = (y_pred > 0.5) 

                max_prob = max(y_pred)
                max_prob_index = np.argmax(y_pred)
                y_pred = np.zeros(y_pred.shape[0])
                y_pred[max_prob_index] = 1
                if np.array_equal(y_pred,y):
                    n_c += 1
            c = c/X.shape[0]
            print('Iteration: ', iter)
#             print("Accuracy:", (n_c/X.shape[0])*100)

In [17]:
# Reading in CIFAR-10 images (5 batches)
def unpickle(fileName):
    '''
    Description: retrieve data from CIFAR-10 Pickles
    Params: fileName = filename to unpickle
    Outputs: Unpickled Dict
    '''
    infile = open(fileName,'rb')
    dict = pickle.load(infile,encoding='bytes')
    return dict



def merge_batches(num_to_load=5):
    '''
    Description: Merge batches of CIFAR-10 data pickles
    Params: num_to_load = number of batches of CIFAR-10 to load and merge
    Outputs: merged features and labels from specified no. of batches of CIFAR-10
    '''
    for i in range(num_to_load):
        fileName = "data_batch_" + str(i + 1)
        data = unpickle(fileName)
        if i == 0:
            features = data[b'data']
            labels = np.array(data[b'labels'])
        else:
            features = np.append(features, data[b'data'], axis=0)
            labels = np.append(labels, data[b'labels'], axis=0)
    
    return features, labels

In [24]:
# Function to transform image class to one hot vector
def one_hot_encode(data):
    '''
    Description: Encode Target Label IDs to one hot vector of size L where L is the
    number of unique labels
    Params: data = list of label IDs
    Outputs: List of One Hot Vectors
    '''
    one_hot = np.zeros((len(data), 10))
    one_hot[np.arange(len(data)), data] = 1
    return one_hot

In [25]:
# Setting up X & y
images, class_index = merge_batches(num_to_load=5)
X = images
y = one_hot_encode(class_index)

In [29]:
# Feature Scaling
sc_X = StandardScaler()
X = sc_X.fit_transform(X)

# Splitting the data into train set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

#Defining the model architecture
architecture = np.array([3072, 200, 200, 100, 10])

# Creating the classifier
classifier = NeuralNetwork(architecture)

print(X_train.shape)

# Training the classifier
classifier.fit(X_train, y_train, 50)

# Predicting the test set results:
n_c = 0
for i in range(0, X_test.shape[0]):
    x = X_test[i].reshape((X_test[i].size, 1))
    y = y_test[i]
    y_pred = classifier.predict(x)
    max_prob = max(y_pred)
    max_prob_index = np.argmax(y_pred)
    y_pred = np.zeros(y_pred.shape[0])
    y_pred[max_prob_index] = 1
    if np.array_equal(y_pred, y):
        n_c += 1

#print("Accuracy:", (n_c/X.shape[0])*100)
print("Accuracy:", (n_c/X.shape[0])*100 )

(35000, 3072)
Iteration:  0
Iteration:  1
Iteration:  2
Iteration:  3
Iteration:  4
Iteration:  5
Iteration:  6
Iteration:  7
Iteration:  8
Iteration:  9
Iteration:  10
Iteration:  11
Iteration:  12
Iteration:  13
Iteration:  14
Iteration:  15
Iteration:  16
Iteration:  17
Iteration:  18
Iteration:  19
Iteration:  20
Iteration:  21
Iteration:  22
Iteration:  23
Iteration:  24
Iteration:  25
Iteration:  26
Iteration:  27
Iteration:  28
Iteration:  29
Iteration:  30
Iteration:  31
Iteration:  32
Iteration:  33
Iteration:  34
Iteration:  35
Iteration:  36
Iteration:  37
Iteration:  38
Iteration:  39
Iteration:  40
Iteration:  41
Iteration:  42
Iteration:  43
Iteration:  44
Iteration:  45
Iteration:  46
Iteration:  47
Iteration:  48
Iteration:  49
Accuracy: 13.388
