In [1]:
import numpy as np

'''Layer Class to Abstract representation of weights '''
class Layer():
    '''Weights are randomly intialized to floats between [-1 1)'''
    def __init__(self,input_layer_width, hidden_layer_width):
        self.w = 2*np.random.random((input_layer_width, hidden_layer_width)) - 1


In [36]:
import math
'''The Main Class that implements various helper methods and the training of the network'''
class OilSpill():
    '''l1 , l2 , l3 are layers representing weights between Input-Hidden1, Hidden1-Hidden2 and Hidden2 - Output
    b2 and b3 are bias unit weights for hidden1 and hidden2 respectively. Neural Net is trained on TrainData and TrainLabelData
    And verified on ValidationData - ValidationLabel'''
    def __init__(self, N_input, N_hidden_1, N_hidden_2, TrainData, TrainLabelData, ValidationData, ValidationLabel):
        self.l1 = Layer(N_input,N_hidden_1)
        self.l2 = Layer(N_hidden_1, N_hidden_2)
        self.l3 = Layer(N_hidden_2,10)
        self.b2 = 1 #bias for hidden1
        self.b3 = 1 # bias for hidden2
        self.train = TrainData
        self.labels = TrainLabelData
        self.val = ValidationData
        self.valLabels = ValidationLabel
    '''helper relu method to find max(0,x)'''   
    def _relu(self,x):
        if x <0 :
            return 0
        else :
            return x
    '''helper method to compute derivative of relu function'''
    def _relu_der(self,x):
        if x > 0:
            return 1;
        else :
            return 0;
    '''helper method to compute sofmax outputs for elements in the array X'''
    def SOFTMAX(self, X):
        expMat = []
        den = 0
        for x in X:
            t = np.exp(x)
            #print("x is " + str(x))
            #print("t is " + str(t))
            expMat.append(t)
            den += t
        expMat[:] = [y/den for y in expMat]
        return expMat
    '''helper method to compute relu activations of array X'''
    def RELU(self, X):
        temp = []
        #print(len(X))
        for x in X:
            temp.append(self._relu(x))
        return temp
    '''helper method to compute derivation of relu of array X'''
    def RELU_DER(self, X):
        temp = []
        for x in X:
            temp.append(self._relu_der(x))
        return temp
    ''' method to validate the results by finding misses and hits '''
    def _validate(self):
        hits = 0
        misses = 0
        for data,label in zip(self.val,self.valLabels):
            
            H1_ = (np.dot(data,self.l1.w))
            #print(type(H1_))
            H1 = self.RELU(H1_)
            #print(H1.shape)
            
            H2_ = (np.dot(H1, self.l2.w))
            H2_ = [self.b2 + po for po in H2_]
            H2 = self.RELU(H2_)
            
            O_ = (np.dot(H2,self.l3.w))
            O_ = [self.b3 + gp for gp in O_]
            O = self.SOFTMAX((O_))
            
            maxIndex = O.index(max(O))
            maxValIndex = label.index(max(label))
            if maxIndex == maxValIndex :
                hits+=1
            else:
                misses+=1
        print("accuracy is " + str(hits/len(self.val)))
        print("hits " + str(hits))
    ''' method to perform batch training . Niter = number of iterations, batch_size = size of batch, 
    lamda = regularization parameter. Calls _epoch that performs batch training over one epoch .'''
    def _train(self,eta, Niter, batch_size, lamda):
        counter = 0
        for i in range(Niter):
            self._epoch(eta ,batch_size, lamda)
            counter = (counter+1)%10
            if(counter == 9) :
                self._validate()
    ''' trains the neural net for one epoch'''
    def _epoch(self,eta, batch_size,lamda):
        size = len(self.train)
        num_batches = size//batch_size
        bs = 0
        for i in range(num_batches):
            data = self.train[bs:bs+batch_size]
            label = self.labels[bs:bs+batch_size]
            
            a = len(data) #batch size
            ActivationMatrix = np.matmul(data,self.l1.w)
            
            #Hidden1 and Hidden 2 forward pass 
            H1 = [self.RELU(row_) for row_ in ActivationMatrix]
            H2_ = np.matmul(H1, self.l2.w)
            H2_ = [[self.b2 + po for po in row_] for row_ in H2_]
            H2 = [self.RELU(row_) for row_ in H2_]
            
            #Forward Pass To Output O 
            O_ = np.matmul(H2,self.l3.w)
            O_ = [[self.b3 + gp for gp in row_] for row_ in O_]
            O = [self.SOFTMAX(row_) for row_ in O_]
            
            # Backward Delta computations - BACKWARD PASS
            Del_O = [[l-o for o,l in zip(o_row,l_row)] for o_row,l_row in zip(O,label)] # label is one hot
            # Batch size X N(O)
            #Weights update for Layer 3
            self.l3.w = (1-2*lamda)*self.l3.w + eta*(np.matmul(np.transpose(H2),Del_O))
            
            H2_prime = [self.RELU_DER(row_) for row_ in H2]
            
            # Second Last layer weights delta
            Del_H2 = np.multiply(np.matmul(self.l3.w,np.transpose(Del_O)),np.transpose(H2_prime))
            # N(H2) X Batch Size
            E_1 = np.matmul(Del_H2,H1)
            # N(H2) X N(H1)
            # Weights Update for layer 2
            self.l2.w = (1-2*lamda)*self.l2.w + eta*np.transpose(E_1)
            
            H1_prime = [self.RELU_DER(row_) for row_ in H1]
            
            #Hidden1 Delta Computations
            Del_H1 = np.multiply(np.matmul(self.l2.w,Del_H2),np.transpose(H1_prime))
            #N(H1) X Batch
            E_2 = np.matmul(Del_H1,data)
            # Weights update for layer 1
            self.l1.w = (1-2*lamda)*self.l1.w + eta*np.transpose(E_2)
            
            loss = sum(sum(np.multiply(label,O)))
           
            # Bias Update for layer 2
                        
            (p,b) = self.l2.w.shape
            for j in range(b):
                self.b2 += eta*sum([Del_H2[j][k] for k in range(len(data))])
            
            # Bias update last layer
            (p,b) = self.l3.w.shape
            for j in range(b):
                self.b3 += eta*sum([Del_O[k][j] for k in range(len(data))]) 
            
            #batch index update    
            bs = bs + batch_size
        
    
        

    

In [41]:
import unittest
'''Unit testing '''
class TestOilSpill(unittest.TestCase):
    '''Build up OilSpill Object'''
    def setUp(self):
        self.train = [[1,2,3,4,5],[6,7,8,9,10]]
        self.labels = [[0,0,1,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,1]]
        self.val = [[11,1,1,14,5],[1,2,5,6,2]]
        self.valLab = [[0,0,0,0,0,0,0,0,0,1],[0,0,0,0,0,0,1,0,0,0]]
        self.testObj = OilSpill(5,2,1,self.train,self.labels,self.val,self.valLab)
        pass
    '''Tests whether the size of the layers are correct'''
    def testLayerSize(self):
        (x1,y1) = self.testObj.l1.w.shape
        (x2,y2) = self.testObj.l2.w.shape
        (x3,y3) = self.testObj.l3.w.shape
        self.assertEqual(x1,5)
        self.assertEqual(y1,2)
        self.assertEqual(x2,2)
        self.assertEqual(y2,1)
        self.assertEqual(x3,1)
        self.assertEqual(y3,10)
    '''Tests the relu helper function'''
    def testRelu(self):
        self.assertEqual(self.testObj._relu(-5),max(0,-5))
        self.assertEqual(self.testObj._relu(15),max(0,15))
    '''tests Relu Der function'''
    def testReluDer(self):
        self.assertEqual(self.testObj._relu_der(-5),0)
        self.assertEqual(self.testObj._relu_der(15),1)
    ''' Tests the function SOFTMAX'''
    def testSOFTMAX(self):
        self.assertEqual(self.testObj.SOFTMAX([2,0]),[np.exp(2)/(1 + np.exp(2)) , 1 / (1+np.exp(2))])
    ''' Tests helper method RELU'''
    def testRELU(self):
        self.assertEqual(self.testObj.RELU([-5,4,120,-90]),[0,4,120,0])
    ''' Tests helper method RELU_DER'''
    def testRELU_DER(self):
        self.assertEqual(self.testObj.RELU_DER([-4.1,4.9,95,-0]),[0,1,1,0])

if __name__ == '__main__':
    unittest.main(argv=['first-arg-is-ignored'], exit=False)

......
----------------------------------------------------------------------
Ran 6 tests in 0.013s

OK


In [46]:
'''Script to easily replicate results '''
class Executor():
    def __init__(self):
        self.trainData = []
        self.labels = []
        self.val = []
        self.valLabels = []
        self.csvTrainData = self._readCsv_('C:\\Users\\tanej\\Downloads\\cerebras_\\mnist_train.csv')
        self.csvTestData = self._readCsv_('C:\\Users\\tanej\\Downloads\\cerebras_\\mnist_test.csv')
        self.buildData()
    '''Preprocess csvData to prepare TrainData and TestData'''
    def buildData(self):
        for data in self.csvTrainData:
            trow = [1.0] + [float(r) for r in data[1:]]
            mean = sum(trow)/len(trow)
            # SUBTRACTING MEAN AND NORMALIZING INPUT
            row = [(tr-mean) for tr in trow]
            self.trainData.append(self._normalize(row))
            temp = [0]*10
            #ONE HOT ENCODING OF LABELS
            temp[int(data[0])] = 1
            self.labels.append(temp)
        for data in self.csvTestData:
            trow = [1.0] + [float(r) for r in data[1:]]
            mean = sum(trow)/len(trow)
            # SUBTRACTING MEAN AND NORMALIZING INPUT
            row = [(tr-mean) for tr in trow]
            self.val.append(self._normalize(row))
            temp = [0]*10
            #ONE HOT ENCODING OF LABELS
            temp[int(data[0])] = 1
            self.valLabels.append(temp)
    '''reads CSV file and shuffles the input'''
    def _readCsv_(self, fileName):
        import csv
        trainFile = open(fileName,'r')
        csvReader = csv.reader(trainFile)
        csvData = []
        for row in csvReader:
            csvData.append(row)
        from random import shuffle
        shuffle(csvData)
        return csvData
    ''' normalizes the input '''
    def _normalize(self,v):
        norm = np.linalg.norm(v)
        if norm == 0: 
            return v
        return v / norm
    '''Preprocess csvData to separate training data and validation data'''
    def _preprocess_(self):
        for data in self.csvTrainData:
            trow = [1.0] + [float(r) for r in data[1:]]
            mean = sum(trow)/len(trow)
            # SUBTRACTING MEAN AND NORMALIZING INPUT
            row = [(tr-mean) for tr in trow]
            self.trainData.append(self._normalize(row))
            temp = [0]*10
            #ONE HOT ENCODING OF LABELS
            temp[int(data[0])] = 1
            self.labels.append(temp)
    def __main__(self):
        oilSpill= OilSpill(785,100,110,self.trainData,self.labels,self.val,self.valLabels)
        oilSpill._train(0.001,123, 14,0.00001)
    def __validate__(self):
        self._preprocess_()
        #Reduce DataSet for checking
        train = self.trainData[:20101]
        trainLab = self.labels[:20101]
        valid = self.trainData[50000:]
        validLab = self.labels[50000:]
        
        oilSpill= OilSpill(785,100,110,train,trainLab,valid[:500],validLab[:500])
        oilSpill._train(0.001,123, 14,0.00001)

In [47]:
executorObj = Executor()


In [None]:
executorObj.__main__()

accuracy is 0.9481
hits 9481
accuracy is 0.9556
hits 9556
accuracy is 0.9595
hits 9595
accuracy is 0.9608
hits 9608
accuracy is 0.9624
hits 9624
accuracy is 0.9636
hits 9636
accuracy is 0.9643
hits 9643
accuracy is 0.9648
hits 9648
accuracy is 0.9647
hits 9647
accuracy is 0.9652
hits 9652
accuracy is 0.9658
hits 9658
accuracy is 0.9655
hits 9655


In [44]:
executorObj.__validate__()

accuracy is 0.91
hits 455
accuracy is 0.936
hits 468
accuracy is 0.946
hits 473
accuracy is 0.952
hits 476
accuracy is 0.956
hits 478
accuracy is 0.954
hits 477
accuracy is 0.954
hits 477
accuracy is 0.954
hits 477
accuracy is 0.954
hits 477
accuracy is 0.956
hits 478
accuracy is 0.958
hits 479
accuracy is 0.958
hits 479


In [45]:
#oilSpill._train(0.001,12300, 14,0.00001)