In [4]:
import numpy as np
import matplotlib.pyplot as plt
from math import *
import pandas as pd
import random
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import confusion_matrix,f1_score,accuracy_score,precision_score,recall_score
from sklearn.linear_model import Ridge, Lasso
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score 
np.set_printoptions(suppress=True)
import warnings
warnings.filterwarnings('ignore')

In [3]:
def loadData():
    df=pd.read_csv("mnist_train.csv")
    x=df.iloc[:,1:]
    y=df.iloc[:,0]
    b = np.zeros((y.size, y.max()+1))
    b[np.arange(y.size),y] = 1
    y=b
    
    return x,y

In [53]:
class MyNeuralNetwork:
    
    def __init__(self,nLayers,layerSize,activation,learningRate,weightInit,batchSize,epoch):
        self.nLayers=nLayers
        self.layerSize=layerSize
        self.activation=activation
        self.learningRate=learningRate
        self.weightInit=weightInit
        self.batchSize=batchSize
        self.epoch=epoch
        
        bias=[]
        for i in range(nLayers-1):
            bias.append(np.zeros(layerSize[i+1]))
        
        weight=[]
        for i in range(nLayers-1):
            if(weightInit=='zero'):
                weight.append(np.zero((layerSize[i],layerSize[i+1])))
            elif(weightInit=='random'):
                weight.append(np.random.rand(layerSize[i],layerSize[i+1])*0.01)
            else:
                weight.append(np.random.normal(size=(layerSize[i],layerSize[i+1]))*0.01)
                
        self.bias=bias
        self.weight=weight
    
    def ReLU(self,x):
        if(x.all()>=0):
            return x
        else:
            return 0
    
    def sigmoid(self,x):
        return 1/(1+np.exp(-x))
    
    def linear(self,x):
        return x
    
    def tanh(self,x):
        return np.tanh(x)
    
    def softmax(self,x):
#         print("softmaaaaaaaaaaaaaaax",x.shape)
        return np.exp(x)/(np.exp(x).sum(axis=1,keepdims=True))
    
    def tanhDerivative(self,x):
        return 1-self.tanh(x)**2
    
    def ReLUDerivation(self,x):
        if(x.all()>=0):
            return 1
        else:
            return 0
        
    def sigmoidDerivative(self,x):
        return self.sigmoid(x)*(1-self.sigmoid(x))
    
    def linearDerivative(self,x):
        return np.ones(x.shape)
    
    def softmaxDerivative(self,x):
        return self.softmax(x)*(1-self.softmax(x))
    
    def leakyReLU(self,x):
        return np.max(x,0.05*x)
    
    def leakyReLUDerivative(self,x):
        if(x>=0):
            return 1
        else:
            return 0.05
    
        
    def activate(self,x):
        
        if(self.activation=='relu'):
            return self.ReLU(x),self.ReLUDerivation(x)
        elif(self.activation=='sigmoid'):
            return self.sigmoid(x),self.sigmoidDerivative(x)
        elif(self.activation=='tanh'):
            return self.tanh(x),self.tanhDerivative(x)
        elif(self.activation=='linear'):
            return self.linear(x),self.linearDerivative(x)
        elif(self.activation=='leakyrelu'):
            return self.leakyReLU(x),self.leakyReLUDerivative(x)
        else:
            return self.softmax(x),self.softmaxDerivative(x)
    
    
    def forwardPropagation(self,x):
        
        finalOut=x
        allActivations=[]
        allPreActivations=[]
#         print("finalllllllllll",finalOut)
#         print("weightssssssssssssssss",self.weight)
        for i in range(len(self.weight)-1):
#             print("ccccccccccccccccccccccccccccccc",self.weight[i].shape,self.bias[i].shape)
            layerActivation=np.dot(finalOut,self.weight[i])+self.bias[i]
            finalOut=self.activate(layerActivation)[0]
#             print("ssssssssssssssssssssssssssss",finalOut.shape)
            allActivations.append(finalOut)
            allPreActivations.append(layerActivation)
#         print("sssssssssssssss",finalOut)    
        layerActivation=np.dot(finalOut,self.weight[-1])+self.bias[-1]
        finalOut=self.softmax(layerActivation)
        allActivations.append(finalOut)
        allPreActivations.append(layerActivation)
            
        return (allActivations,allPreActivations)
         
    def backwardPropagation(self,y,yPred,allActivations,allPreActivations):
        
        delta=yPred-y
        derivatives=[]
        derivatives.append(delta)
#         print("checkkkkkkkkkkk",allPreActivations)
        for i in range(len(self.weight)-2,-1,-1):
            e=np.dot(delta,self.weight[i+1].T)
#             print("cccccccccccccccccccc",e.shape)
#             print('ssssssssssssssssssssssssss',allPreActivations[i].shape)
            delta=e*self.activate(allPreActivations[i])[1]
            
            derivatives.append(delta)
        
        return derivatives[::-1]
    
            
    def fit(self,x,y):
        
        allTrainError=[]
        
        for i in range(self.epoch):
            for size in range(0,x.shape[0],self.batchSize):
                xNew=[]
                yNew=[]
                for temp in range(self.batchSize):
                    xNew.append(x[size+temp])
                    yNew.append(y[size+temp])
#                 print("xnewwwwwwwwwwwwwwwwwwww",np.array(xNew))
                xNew=np.array(xNew)
                yNew=np.array(yNew)
                allActivations,allPreActivations=self.forwardPropagation(xNew)
                yPred=allActivations[-1]
                
                derivatives=self.backwardPropagation(yNew,yPred,allActivations,allPreActivations)
#                 allActivations[-1]=xNew
                self.updateWeights(xNew,allActivations,derivatives)
                
            trainError=self.crossEntropyLoss(yPred,yNew)
            allTrainError.append(trainError)
            
            if(((i+1)%5)==0):
                print("For the epoch ",i+1,"error = ",trainError)
                
        
        return np.array(allTrainError)
    
    def updateWeights(self,xNew,allActivations,derivatives):
        for layer in range(len(self.weight)):
            if(layer==0):
                grad=xNew.T.dot(derivatives[layer])/len(xNew)
                self.weight[layer]=self.weight[layer]-self.learningRate*grad
                self.bias[layer]=self.bias[layer]-self.learningRate*np.sum(derivatives[layer],axis=0)/len(xNew)
            else:
                grad=allActivations[layer-1].T.dot(derivatives[layer])/len(xNew)
                self.weight[layer]=self.weight[layer]-self.learningRate*grad
                self.bias[layer]=self.bias[layer]-self.learningRate*np.sum(derivatives[layer],axis=0)/len(xNew)
            
                
        
    def crossEntropyLoss(self,yPred,y):
        logy=-np.log(yPred[np.arange(y.shape[0]),y.argmax(axis=1)])
        return np.sum(logy)/y.shape[0]
                
        
        
        
        
    
        
    

In [5]:
x,y=loadData()
print(x.shape,y.shape)
scaler = StandardScaler()
x=scaler.fit_transform(x)


(60000, 784) (60000, 10)


In [54]:
classifier=MyNeuralNetwork(5,[784,256, 128, 64,10],'relu',0.1,'random',3000,100)
errors=classifier.fit(x,y)

NameError: name 'epoch' is not defined