In [None]:
# https://www.youtube.com/watch?v=7q7E91pHoW4
# https://towardsdatascience.com/math-neural-network-from-scratch-in-python-d6da9f29ce65
# 1.1, 1.2, 1.3, 1.4
from sklearn import datasets
import numpy as np
import time
from sklearn.neural_network import MLPClassifier
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import random
from operator import add
from scipy.special import expit
import math
from tqdm import tqdm
import pickle
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from copy import deepcopy
import pandas as pd
import sys

class MyNeuralNetwork():
    """
    My implementation of a Neural Network Classifier.
    """

    acti_fns = ['relu', 'sigmoid', 'linear', 'tanh', 'softmax']
    weight_inits = ['zero', 'random', 'normal']

    def __init__(self, n_layers, layer_sizes, activation, learning_rate, weight_init, batch_size, num_epochs):
        """
        Initializing a new MyNeuralNetwork object

        Parameters
        ----------
        n_layers : int value specifying the number of layers. Assumption: doesnt include input layer but includes output layer
        layer0 = input layer
        layer1
        ...
        layern_layers = output layer

        layer_sizes : integer array of size n_layers specifying the number of nodes in each layer

        activation : string specifying the activation function to be used
                     possible inputs: relu, sigmoid, linear, tanh

        learning_rate : float value specifying the learning rate to be used

        weight_init : string specifying the weight initialization function to be used
                      possible inputs: zero, random, normal

        batch_size : int value specifying the batch size to be used

        num_epochs : int value specifying the number of epochs to be used
        """
        self.n_layers, self.layer_sizes, self.activation, self.learning_rate, self.weight_init, self.batch_size, self.num_epochs =  n_layers, layer_sizes, activation, learning_rate, weight_init, batch_size, num_epochs
        if activation not in self.acti_fns:
            raise Exception('Incorrect Activation Function')

        if weight_init not in self.weight_inits:
            raise Exception('Incorrect Weight Initialization Function')
        self.activ = [None]
        self.W = [[None]]
        self.B = [[None]]

        if(weight_init == 'zero'):
          for i in range(n_layers):
            self.W.append(self.zero_init(shape=(layer_sizes[i+1],layer_sizes[i]) ))          
            self.B.append(self.zero_init(shape=(layer_sizes[i+1], 1) ))
          
        elif(weight_init == 'normal'):
          for i in range(n_layers):
            self.W.append(self.normal_init(shape=(layer_sizes[i+1],layer_sizes[i]) ))          
            self.B.append(self.zero_init(shape=(layer_sizes[i+1], 1) ))
        else:
          for i in range(n_layers):
            self.W.append(self.random_init(shape=(layer_sizes[i+1],layer_sizes[i]) ))          
            self.B.append(self.zero_init(shape=(layer_sizes[i+1], 1) ))
        for i in range(n_layers-1):
          self.activ.append(activation)
          
        self.activ.append('softmax')
        self.tloss, self.vloss = [], []

    def selectactiv(self, X, activ):
      # print('select')
      # print(X.shape)
      if(activ == 'relu'):
         return self.relu(X)
      elif(activ == 'linear'):
         return self.linear(X)
      elif(activ == 'sigmoid'):
         return self.sigmoid(X)
      elif(activ == 'tanh'):
         return self.tanh(X)
      elif(activ == 'softmax'):
        return self.softmax(X)
      else:
        raise Exception('selectactiv:Incorrect Activation Function')

    def selectgrad(self, X, activ, yid = 0):
      if(activ == 'relu'):
         return self.relu_grad(X)
      elif(activ == 'linear'):
         return self.linear_grad(X)
      elif(activ == 'sigmoid'):
         return self.sigmoid_grad(X)
      elif(activ == 'tanh'):
         return self.tanh_grad(X)
      elif(activ == 'softmax'):
        return self.softmax_grad(X, yid)
      else:
        raise Exception('selectgrad:Incorrect Activation Function')



    def zero_init(self, shape):
        """
        Calculating the initial weights after Zero Activation for a particular layer

        Parameters
        ----------
        shape : tuple specifying the shape of the layer for which weights have to be generated 

        Returns
        -------
        weight : 2-dimensional numpy array which contains the initial weights for the requested layer
        """
        np.random.seed(0)
        weight = np.zeros(shape)
        return weight

    def random_init(self, shape):
        """
        Calculating the initial weights after Random Activation for a particular layer

        Parameters
        ----------
        shape : tuple specifying the shape of the layer for which weights have to be generated 

        Returns
        -------
        weight : 2-dimensional numpy array which contains the initial weights for the requested layer
        """
        np.random.seed(0)
        weight = np.random.uniform(-1,1,shape)
         
        return weight*0.01

    def normal_init(self, shape):
        """
        Calculating the initial weights after Normal(0,1) Activation for a particular layer

        Parameters
        ----------
        shape : tuple specifying the shape of the layer for which weights have to be generated 

        Returns
        -------
        weight : 2-dimensional numpy array which contains the initial weights for the requested layer
        """
        np.random.seed(0)
        weight = np.random.normal(0,1,shape)
         
        return weight*0.01

    def compare_with_mlp(self,loss_from_your_model,X,y):
        # comparing with MLP
        # X is all data and Y is all labels
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)
        st = ''
        if(self.activation == 'relu'):
          st = 'relu'
        elif(self.activation == 'tanh'):
          st = 'tanh'
        elif(self.activation == 'sigmoid'):
          st = 'logistic'
        elif(self.activation == 'linear'):
          st = 'identity'
        print('activation:'+st)

        mlp = MLPClassifier(verbose = True, hidden_layer_sizes=(256, 128, 64), activation =st ,solver = 'sgd' , alpha = 0, batch_size = 32,max_iter=50, learning_rate_init = 0.02, learning_rate = 'constant',shuffle=True,momentum = 0,nesterovs_momentum=False,validation_fraction = 0.11111, warm_start = True)

        mlp.fit(X_train, y_train)
        pickle.dump(mlp, open('50mlp'+self.activation+'.sav', 'wb'))
         
        mlp.fit(X_train, y_train)
        pickle.dump(mlp, open('100mlp'+self.activation+'.sav', 'wb'))
        loss_from_sklearn = mlp.loss_curve_
        plt.plot(loss_from_sklearn,label="sklearn")
        plt.plot(loss_from_your_model,label="your NN")
        plt.legend(loc="upper left")
        plt.savefig("result.png")
        plt.close()

  
  
    def predict_proba(self, X):
        """
        Predicting probabilities using the trained linear model.

        Parameters
        ----------
        X : 2-dimensional numpy array of shape (n_samples, n_features) which acts as testing data.

        Returns
        -------
        y : 2-dimensional numpy array of shape (n_samples, n_classes) which contains the 
            class wise prediction probabilities.
        """
        # return the numpy array y which contains the predicted values
         
        output = [None]*(self.n_layers+1)
        output[0] = X
        z = [None]*(self.n_layers+1)
        # print('output[0].shape')
        # print(output[0].shape)
        for layern in range(1, self.n_layers+1):
          # print('self.W[layern].shape')
          # print(self.W[layern].shape)
          
          # print('output[layern-1].T.shape')
          # print(output[layern-1].T.shape)
          # print('self.B[layern].shape')
          # print(self.B[layern].shape)
          z[layern] = np.dot( self.W[layern], output[layern-1].T)+self.B[layern]
          z[layern] = z[layern].T
          # print('z[layern].shape')
          # print(z[layern].shape)

          output[layern] = self.selectactiv(z[layern], self.activ[layern])
        y = output[self.n_layers]
        return y 
    def predict(self, X):
        """
        Predicting values using the trained linear model.

        Parameters
        ----------
        X : 2-dimensional numpy array of shape (n_samples, n_features) which acts as testing data.

        Returns
        -------
        y : 1-dimensional numpy array of shape (n_samples,) which contains the predicted values.
        """
        var = self.predict_proba(X)
        y = np.zeros(shape=(len(X)))
        # print('y')
        # print(y)
        for i in range(len(X)):
          # print('var[i,:]')
          # print(var[i,:])
          maxp = max(var[i,:])
          # print('maxp')
          # print(maxp)
          # print('np.where(var[i, :] == maxp)')
          # print(np.where(var[i, :] == maxp))
          # print('np.where(var[i, :] == maxp)[0][0]')
          # print(np.where(var[i, :] == maxp)[0][0])
          y[i] = np.where(var[i, :] == maxp)[0][0]

        # return the numpy array y which contains the predicted values
        return y

    def score(self, X, y):
        """
        Predicting values using the trained linear model.

        Parameters
        ----------
        X : 2-dimensional numpy array of shape (n_samples, n_features) which acts as testing data.

        y : 1-dimensional numpy array of shape (n_samples,) which acts as testing labels.

        Returns
        -------
        acc : float value specifying the accuracy of the model on the provided testing set
        """
        print('##########score###############')
        ypred = self.predict(X)
        acc = 0
        for i in range(len(y)):
          if( y[i] == ypred[i]):
            acc+=1
        acc /= len(y)
        return acc 
    def fit(self, X, y):
        """
        Fitting (training) the linear model.

        Parameters
        ----------
        X : 2-dimensional numpy array of shape (n_samples, n_features) which acts as training data.

        y : 1-dimensional numpy array of shape (n_samples,) which acts as training labels.
        
        Returns
        -------
        self : an instance of self
        """
        # fit function has to return an instance of itself or else it won't work with test.py
        X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.11111, random_state=1) # changing random_state
        # print('self.W')
        # print(self.W)
        for epoc in range(self.num_epochs):
          print('epoc:'+str(epoc))
          np.random.seed(0)
          go = np.random.permutation(len(X_train))
          X_train, y_train = X_train[go], y_train[go]
          # print(epoc)
          # print('r1')        
          # print('X_train')
          # print(X_train)
          # print('X_val')
          # print(X_val)
          # print('y_train')
          # print(y_train)
          # print('y_val')
          # print(y_val)
          numb = int(math.ceil(len(X_train)/self.batch_size))
          # print('numb')
          # print(numb)
          co = 0
          # print('self.W[2][0]')
          # print(self.W[2][0])
          self.sumgw = [[None]] # sum of all gradients of each w
          self.sumgb = [[None]]
          for i in range(0, self.n_layers):
            self.sumgw.append(self.zero_init(shape=(self.layer_sizes[i+1], self.layer_sizes[i]) ))          
            self.sumgb.append(self.zero_init(shape=(self.layer_sizes[i+1], 1) ))

          for batch in range(1, numb+1):
            # print('batch')
            # print(batch)
            bsize = 0
            for i in range(1, self.n_layers+1):
              self.sumgw[i].fill(0) #.append(self.zero_init(shape=(self.layer_sizes[i+1], self.layer_sizes[i]) ))          
              self.sumgb[i].fill(0) #.append(self.zero_init(shape=(self.layer_sizes[i+1]) ))
             
       
            z = [None]*(self.n_layers+1)
            output = [None]*(self.n_layers+1)
            output[0] = X_train[(self.batch_size)*(batch-1):(self.batch_size)*(batch)]
            bsize = output[0].shape[0]
            # print('output[0].shape')
            # print(output[0].shape)
            # print('output[0]')
            # print(output[0])
            # print('y_train[(self.batch_size)*(batch-1):(self.batch_size)*(batch)].shape')
            # print(y_train[(self.batch_size)*(batch-1):(self.batch_size)*(batch)].shape)
            # print('ytrain[(self.batch_size)*(batch-1):(self.batch_size)*(batch)]')
            # print(y_train[(self.batch_size)*(batch-1):(self.batch_size)*(batch)])
           

            for layern in range(1, self.n_layers+1):
              # print('self.W[layern].shape')
              # print(self.W[layern].shape)
              
              # print('output[layern-1].T.shape')
              # print(output[layern-1].T.shape)
              # print('self.B[layern].shape')
              # print(self.B[layern].shape)
              z[layern] = np.dot( self.W[layern], output[layern-1].T)+self.B[layern]
              z[layern] = z[layern].T
              # print('z[layern].shape')
              # print(z[layern].shape)

              output[layern] = self.selectactiv(z[layern], self.activ[layern])
              # print('output[layern].T.shape')
              # print(output[layern].T.shape)
            # print('output[layern].shape')
            # print(output[layern].shape)
            # print('output[layern]')
            # print(output[layern])
       
            gz = [None]*(self.n_layers+1)
            gout = [None]*self.n_layers
            # print('output[self.n_layers].shape')
            # print(output[self.n_layers].shape)
            # print('output[self.n_layers]')
            # print(output[self.n_layers])
            # print('y_train[(self.batch_size)*(batch-1):(self.batch_size)*(batch)]')
            # print(y_train[(self.batch_size)*(batch-1):(self.batch_size)*(batch)])

            gz[self.n_layers] = self.selectgrad( output[self.n_layers], self.activ[self.n_layers], y_train[(self.batch_size)*(batch-1):(self.batch_size)*(batch)] )
            for layern in range(self.n_layers - 1, 0, -1):  
              # print('layern')
              # print(layern)
              # print('self.W[layern+1].shape')
              # print(self.W[layern+1].shape) 
              # print('gz[layern+1]')
              # print(gz[layern+1].shape)   
               
              gout[layern]  = np.dot(  gz[layern+1], self.W[layern+1]   )
              # print('gout[layern].shape')
              # print(gout[layern].shape)
              # print('gout[layern].shape')
              # print(gout[layern].shape)
              # print('gout[layern]')
              # print(gout[layern])

              # print('z[layern].shape')
              # print(z[layern].shape)
              # print('z[layern]')
              # print(z[layern])
              # print('self.selectgrad(z[layern], self.activ[layern] )')
              # print(self.selectgrad(z[layern], self.activ[layern] ))
              # print('self.selectgrad(z[layern], self.activ[layern] ).shape')
              # print(self.selectgrad(z[layern], self.activ[layern] ).shape)

              gz[layern] = self.selectgrad(z[layern], self.activ[layern] )*gout[layern] 
              # print('gz[layern].shape')
              # print(gz[layern].shape)  
              # print('gz[layern]')
              # print(gz[layern])   
              # print('r5')
            
            for layern in range(self.n_layers , 0, -1):
              # print('layern')
              # print(layern)
              # print('gz[layern].shape')
              # print(gz[layern].shape)
              # print('output[layern-1].shape')
              # print(output[layern-1].shape)
              # print(' self.sumgw[layern].shape')
              # print( self.sumgw[layern].shape)
              # print(' self.sumgb[layern].shape')
              # print( self.sumgb[layern].shape)
              # print('gz[layern].T.shape')
              # print(gz[layern].T.shape)

              # print('gz[layern].T')
              # print(gz[layern].T)
              # print('output[layern-1]')
              # print(output[layern-1])
              # self.sumgw[layern] = np.dot(output[layern-1].reshape( output[layern-1].shape[0], 1), gz[layern].reshape( 1, gz[layern].shape[0])).T
              self.sumgw[layern] = (np.dot(gz[layern].T, output[layern-1]))/(bsize)
              self.sumgb[layern]=np.sum(gz[layern], axis = 0)/(bsize) 
              self.sumgb[layern]=  np.reshape(self.sumgb[layern], (self.sumgb[layern].shape[0], 1))
              # print("average_weight_gradients and shape")
              # print(self.sumgw[layern].shape)
              # print( self.sumgw[layern][0])
              # print(' self.sumgw[layern].shape')
              # print( self.sumgw[layern].shape)
              # print(' self.sumgb[layern].shape')
              # print( self.sumgb[layern].shape)
              # print(' self.sumgb[layern]')
              # print( self.sumgb[layern])

         
            #update Ws
            for layern in range(1, self.n_layers+1):
              # print(' self.sumgw[layern]')
              # print(  self.sumgw[layern][0])
              # self.W[layern] -= np.clip ((self.learning_rate)*self.sumgw[layern], -100, 100)
              self.W[layern] -= (self.learning_rate)*self.sumgw[layern]
              # print('self.W[layern][0]')
              # print(self.W[layern][0])
              
              # print('np.clip ((self.learning_rate)*self.sumgb[layern], -100, 100)')
              # print(np.clip ((self.learning_rate)*self.sumgb[layern], -100, 100).shape)
              # self.B[layern] -= np.clip ((self.learning_rate)*self.sumgb[layern], -100, 100)
              # self.B[layern] -= (self.learning_rate)*self.sumgb[layern]
            
          #cal loss  
          var = self.predict_proba(X_train)
          # print('var')
          # print(var)
          loss = 0;
          # print('y_train')
          # print(y_train)
          for i in range(len(y_train)):
            # print('y_train[i]')
            # print(y_train[i])
            # print('var[i]')
            # print(var[i])
            # print('np.where(self.uni == y_train[i])')
            # print(np.where(self.uni == y_train[i]))
            loss += (-math.log( var[i][ y_train[i] ] + 1e-8 ))
          loss = loss/len(y_train)
          self.tloss.append(loss)
          print('tloss:'+str(loss))
          print(loss)
          var = self.predict_proba(X_val)
          loss = 0;
          for i in range(len(y_val)):
            loss += (-math.log( var[i][ y_val[i]] + 1e-8 ))
          # loss = loss/len(y_val)
          loss = loss/(len(y_val))
          self.vloss.append(loss)
          print('vloss:'+str(loss))
          if(epoc == 49):
            self.W50 = deepcopy(self.W)
            self.B50 = deepcopy(self.B)
          if(epoc == 99):
            self.W100 = deepcopy(self.W)
            self.B100 = deepcopy(self.B)
        return self
    def softmax(self, X):
        """
        Calculating the ReLU activation for a particular layer

        Parameters
        ----------
        X : 1-dimentional numpy array 

        Returns
        -------
        x_calc : 1-dimensional numpy array after calculating the necessary function over X
        """
        # print('X.shape')
        # print(X.shape)
        # print('np.amax(X, axis=1)')
        # print(np.amax(X, axis=-1, keepdims = True))
        X = X - np.amax(X, axis=-1, keepdims = True)

        # print('X')
        # print(X)
        X = np.exp(X)
        # print('X')
        # print(X)
        bot = np.sum(X, axis=-1, keepdims = True)
        # print('bot')
        # print(bot)
        x_calc = X/bot
        # print('x_calc')
        # print(x_calc)
        return x_calc
    def relu(self, X):
        """
        Calculating the ReLU activation for a particular layer

        Parameters
        ----------
        X : 2-dimentional numpy array(samples, features)

        Returns
        -------
        x_calc : 2-dimensional numpy array after calculating the necessary function over X
        """
         
        x_calc = np.maximum(X, 0)
        return x_calc
 

    def relu_grad(self, X):
        """
        Calculating the gradient of ReLU activation for a particular layer

        Parameters
        ----------
        X : 2-dimentional numpy array (samples, features)

        Returns
        -------
        x_calc : 1-dimensional numpy array after calculating the necessary function over X
        """
        X[X<0] = 0
        X[X>0] = 1
        x_calc = X
        return x_calc

    def sigmoid(self, X):
        """
        Calculating the Sigmoid activation for a particular layer

        Parameters
        ----------
        X : 2-dimentional numpy array (samples, features)

        Returns
        -------
        x_calc : 2-dimensional numpy array after calculating the necessary function over X
        """
        x_calc = 1/(1+np.exp(-X))
        return x_calc

    def sigmoid_grad(self, X):
        """
        Calculating the gradient of Sigmoid activation for a particular layer

        Parameters
        ----------
        X : 2-dimentional numpy array (samples, features)

        Returns
        -------
        x_calc : 1-dimensional numpy array after calculating the necessary function over X
        """
        X = self.sigmoid(X)
        x_calc = np.multiply(X, 1-X)
        return x_calc

    def linear(self, X):
        """
        Calculating the Linear activation for a particular layer

        Parameters
        ----------
        X : 2-dimentional numpy array (samples, features)

        Returns
        -------
        x_calc : 2-dimensional numpy array after calculating the necessary function over X
        """
        
        x_calc = X
        return x_calc
 

    def linear_grad(self, X):
        """
        Calculating the gradient of Linear activation for a particular layer

        Parameters
        ----------
        X : 2-dimentional numpy array (samples, features)

        Returns
        -------
        x_calc : 1-dimensional numpy array after calculating the necessary function over X
        """
    
        x_calc = np.ones(X.shape)
        return x_calc

    def tanh(self, X):
        """
        Calculating the Tanh activation for a particular layer

        Parameters
        ----------
        X : 2-dimentional numpy array (samples, features)

        Returns
        -------
        x_calc : 2-dimensional numpy array after calculating the necessary function over X
        """
        x_calc = np.tanh(X)
        return x_calc

    def tanh_grad(self, X):
        """
        Calculating the gradient of Tanh activation for a particular layer

        Parameters
        ----------
        X : 2-dimentional numpy array (samples, features)

        Returns
        -------
        x_calc : 1-dimensional numpy array after calculating the necessary function over X
        """
        X = np.square(np.tanh(X))
        x_calc = 1 - X
        return x_calc

 
 

    def softmax_grad(self, X, yid):
        """
        Calculating the gradient of Softmax activation for a particular layer

        Parameters
        ----------
        X : 2-dimentional numpy array (samples, features)

        Returns
        -------
        x_calc : 1-dimensional numpy array after calculating the necessary function over X
        """
        for i in range(len(yid)):
          X[i][yid[i]] -= 1
        x_calc = X
        return x_calc

In [None]:
#1.5
notebookpath = "/content/drive/MyDrive/Colab Notebooks/fashion-mnist_train.csv"
train = pd.read_csv(notebookpath);

trainy =  train['label'].to_numpy()
train = train.drop(['label'], axis = 1)
trainX = train.to_numpy()
trainX = trainX/255.0
X_train, X_test, y_train, y_test = train_test_split(trainX, trainy, test_size=0.1, random_state=1 )
y_test

array([8, 5, 8, ..., 0, 6, 4])

In [None]:
model = {}
nn = MyNeuralNetwork( n_layers = 4, layer_sizes = [784,256,128,64,10], activation = 'sigmoid', learning_rate = 0.1, weight_init = 'normal', batch_size = 32, num_epochs = 100)
# nn.compare_with_mlp(None,trainX,trainy)
model['sigmoid'] = nn.fit(X_train, y_train)
print(model['sigmoid'].score(X_test, y_test ))

epoc:0
tloss:2.30649729941423
2.30649729941423
vloss:2.3065304155842523
epoc:1
tloss:2.3053277514411112
2.3053277514411112
vloss:2.305002615052124
epoc:2
tloss:2.3039172097066234
2.3039172097066234
vloss:2.3051203701026264
epoc:3
tloss:2.303018648546453
2.303018648546453
vloss:2.303811481125183
epoc:4
tloss:2.303272199597278
2.303272199597278
vloss:2.3034783651581403
epoc:5
tloss:2.3027370397857836
2.3027370397857836
vloss:2.3030747339061395
epoc:6
tloss:2.3025221949707175
2.3025221949707175
vloss:2.3028730271805875
epoc:7
tloss:2.301680141263908
2.301680141263908
vloss:2.3022718804681297
epoc:8
tloss:2.2994826893574354
2.2994826893574354
vloss:2.300011587816628
epoc:9
tloss:2.291666336370679
2.291666336370679
vloss:2.2924113931393655
epoc:10
tloss:2.237555001660282
2.237555001660282
vloss:2.2380346019695514
epoc:11
tloss:1.987260660709825
1.987260660709825
vloss:1.9856939329010135
epoc:12
tloss:1.9513288995822162
1.9513288995822162
vloss:1.952482181455668
epoc:13
tloss:1.8275083682188

In [None]:

for i in model:
  pickle.dump(model[i].W50, open('50_'+i+'_weights.sav', 'wb'))
  pickle.dump(model[i].B50, open('50_'+i+'_biases.sav', 'wb'))
  pickle.dump(model[i].W100, open('100_'+i+'_weights.sav', 'wb'))
  pickle.dump(model[i].B100, open('100_'+i+'_biases.sav', 'wb'))
  pickle.dump(model[i].tloss, open(i+'_tloss.sav', 'wb'))
  pickle.dump(model[i].vloss, open(i+'_vloss.sav', 'wb'))
 


In [None]:
#plot curves
def plotgraph(rmselist, vallist):
    x=[i for i in range(1, len(rmselist) + 1)] 
     
    plt.xlabel('epochs')
    plt.ylabel('entropy loss')
#     plt.xscale('linear')
    twoaxis = plt.gca()
#     twoaxis.set_xlim([xmin, xmax])
#     twoaxis.set_ylim([min(rmselist) - 0.001, max(rmselist) + 0.001])

#     plt.set_ylim([10,30])
#     plt.xscale('linear')
    
    plt.plot(rmselist,label="train")
    plt.plot(vallist,label="val")
    plt.legend(loc="upper left")
    plt.savefig("plott.png")
    plt.close()
 
# plotgraph(model['relu'].tloss, model['relu'].vloss)
plotgraph(pickle.load( open('sigmoid_tloss.sav', 'rb')),pickle.load( open('sigmoid_vloss.sav', 'rb')))

In [None]:
def extraload(w, b, i):
  arr = []
  for j in w[1:]:
    arr.append(j.shape[1])
  arr.append(w[-1].shape[0])# should be [784,256,128,64,10]
  nn = MyNeuralNetwork( n_layers = len(b)-1, layer_sizes = arr, activation = i, learning_rate = 0.1, weight_init = 'normal', batch_size = 32, num_epochs = 100)
  nn.W = w
  nn.B = b
  return nn

for i in model:
  w = pickle.load( open('50_'+i+'_weights.sav', 'rb'))
  b = pickle.load( open('50_'+i+'_biases.sav', 'rb'))
  model2 = extraload(w, b, i)
  print(model2.score(X_test, y_test))
  w = pickle.load( open('100_'+i+'_weights.sav', 'rb'))
  b = pickle.load( open('100_'+i+'_biases.sav', 'rb'))
  model2 = extraload(w, b, i)
  print(model2.score(X_test, y_test))

##########score###############
0.8711666666666666
##########score###############
0.8865


In [None]:
#1.6
for i in model:
  model[i].compare_with_mlp(model[i].tloss,trainX,trainy)
 


activation:logistic
Iteration 1, loss = 2.30431432
Iteration 2, loss = 2.29627604
Iteration 3, loss = 2.22819348
Iteration 4, loss = 1.74750319
Iteration 5, loss = 1.55886105
Iteration 6, loss = 1.28345592
Iteration 7, loss = 1.02382099
Iteration 8, loss = 0.92200620
Iteration 9, loss = 0.85225875
Iteration 10, loss = 0.79877947
Iteration 11, loss = 0.75932408
Iteration 12, loss = 0.72922126
Iteration 13, loss = 0.70537213
Iteration 14, loss = 0.68436217
Iteration 15, loss = 0.66599222
Iteration 16, loss = 0.64923107
Iteration 17, loss = 0.63371777
Iteration 18, loss = 0.61998656
Iteration 19, loss = 0.60611780
Iteration 20, loss = 0.59279852
Iteration 21, loss = 0.58008410
Iteration 22, loss = 0.56738316
Iteration 23, loss = 0.55387841
Iteration 24, loss = 0.54046008
Iteration 25, loss = 0.52635585
Iteration 26, loss = 0.51298126
Iteration 27, loss = 0.50182979
Iteration 28, loss = 0.49147082
Iteration 29, loss = 0.48180899
Iteration 30, loss = 0.47317263
Iteration 31, loss = 0.464060



Iteration 51, loss = 0.37196921
Iteration 52, loss = 0.36863709
Iteration 53, loss = 0.36599389
Iteration 54, loss = 0.36369261
Iteration 55, loss = 0.35971067
Iteration 56, loss = 0.35816650
Iteration 57, loss = 0.35572445
Iteration 58, loss = 0.35357810
Iteration 59, loss = 0.35113518
Iteration 60, loss = 0.34840563
Iteration 61, loss = 0.34660089
Iteration 62, loss = 0.34377006
Iteration 63, loss = 0.34224355
Iteration 64, loss = 0.33917347
Iteration 65, loss = 0.33785196
Iteration 66, loss = 0.33506653
Iteration 67, loss = 0.33426744
Iteration 68, loss = 0.33212526
Iteration 69, loss = 0.33044605
Iteration 70, loss = 0.32806208
Iteration 71, loss = 0.32639894
Iteration 72, loss = 0.32429419
Iteration 73, loss = 0.32218872
Iteration 74, loss = 0.32138050
Iteration 75, loss = 0.31834961
Iteration 76, loss = 0.31744089
Iteration 77, loss = 0.31598330
Iteration 78, loss = 0.31417313
Iteration 79, loss = 0.31244998
Iteration 80, loss = 0.31149115
Iteration 81, loss = 0.30916267
Iteratio

In [None]:

model=['sigmoid']
for i in model: 
  print("mlp "+i)
  m = pickle.load(  open('50mlp'+i+'.sav', 'rb'))
  print(m.score(X_test, y_test))
  m = pickle.load( open('100mlp'+i+'.sav', 'rb'))
  loss_from_sklearn = m.loss_curve_
  print(m.score(X_test, y_test))
  loss_from_your_model = pickle.load( open(i+'_tloss.sav', 'rb'))
 
   
  plt.plot(loss_from_sklearn,label="sklearn")
  plt.plot(loss_from_your_model,label="your NN")
  plt.legend(loc="upper left")
  plt.savefig("result.png")
  plt.close()

mlp sigmoid
0.8566666666666667
0.8891666666666667
