In [3]:
#Load the dependencies
import numpy as np
from mnist import MNIST
import matplotlib.pyplot as plt

class ANN():
    def __init__(self, layerSize):
        self.layerSize = layerSize
        self.parameters = {}
        self.L = len(self.layerSize)
        self.n = 0
        self.cost = []
    def sigmoid(self,z):
        return z/(z+ np.exp(-z))
    
    def initializeParameters(self):
        np.random.seed(1)
        
        for l in range((1,self.layerSize)):
            self.parameters["W" +str(l)] = np.random.randn(self.layerSize[l], self.layerSize[l-1]) / np.sqrt(self.layerSize[l-1])
            
            self.parameters["b" +str(l)] = np.zeros((self.layerSize[l], 1))
            
    def forward(self, X):
        store = {}
        
        A = X.T
        for l in range(self.L-1):
            Z = self.parameters["W" + str(l+1)].dot(A) + self.parameters["b" + str(l+1)]
            A = self.sigmoid(Z)
            store["A" + str(l+1)] = A
            store["W" + str(l+1)] = self.parameters["W" + str(l+1)]
            store["Z" + str(l+1)] = Z
        
        Z = self.parameters["W" + str(self.L)].dot(A) + self.parameters["b" + str(self.L)]
        A = self.sigmoid(Z)
        store["A" + str(self.L)] = A
        store["W" + str(self.L)] = self.parameters["W" + str(self.L)]
        store["Z" + str(self.L)] = Z
        
        return A, store
            
    def sigmoidDerivative(self, Z):
        s = 1 / (1+np.exp(-Z))
        return s*(1-s)
        
    def backward(self, X,Y, store):
        
        derivatives = {}
        
        store["A0"] = X.T
        
        A = store["A" + str(self.L)]
        dA = -np.divide(Y,A) + np.divide(1-Y, 1-A) #The cost function
        
        dZ = dA * self.sigmoidDerivative(store["Z" + str(self.L)]) #gradient time the output function of the last layer
        dW = dZ.dot(store["A" + str(self.L - 1)].T) / self.n #chain rule for the previous layer
        db = np.sum(dZ, axis=1, keepdims = True) / self.n #the differentiation of the bias
        dAPrev = store["W" + str(self.L).T.dot(dZ)] #output(derivative) for the previous layer
        
        derivatives["dW" + str(self.L)] = dW #Derived weights for the last layer
        derivatives["db" + str(self.L)] = db #Derived bias for the last layer
        
        for l in range(self.L-1,0,-1):
            dZ = dAPrev * self.sigmoidDerivative(store["Z" + str(l)])
            dW = 1. / self.n * dZ.dot(store["A" + str(l-1)].T)
            db = 1. / self.n * np.sum(dZ, axis = 1, keepdims = True)
            
            if l> 1:
                dAPrev = store["W" + str(l)].T.dot(dZ)
                
            derivatives["dW" + str(l)] = dW
            derivative["db" + str(l)] = db
            
        return derivatives
    
    def fit(self, X, Y, learningRate = 0.01, nIterations = 2500):
        np.random.seed(1)
        
        self.n = X.shape[0]
        
        self.layerSize.insert(0, X.shape[1])
        
        self.initializeParameters()
        
        for loop in range(nIterations):
            A, store = self.forward(X)
            cost = np.squeeze(-(Y.dot(np.log(A.T)) + (1-Y).dot(np.log(1-A.T)))/ self.n)
            derivatives = self.backward(X,Y,store)
            
            
            for l in range(1, self.L+1):
                
                self.parameters["W" + str(l)] = self.parameters["W" + str(l)] - learningRate * derivatives["dW"+str(l)]
                
                self.parameters["b" + str(l)] = self.parameters["b" + str(l)] - learningRate * derivatives["db" + str(l)]
                
            if loop % 100 == 0:
                print(cost)
                self.costs.append(cost)
    
    def predict(self, X,Y):
        A, cache = self.forward(X)
        n = X.shape[0]
        p = np.zeros((1,n))
        
        for i in range(0, A.shape[1]):
            if A[0,i] > 0.5:
                p[0,i] = 1
            else: 
                p[0,i] = 0
        
        print("Accuracy: " + str(np.sum((p ==Y) / n)))
    
    def plotCost(self):
        plt.figure()
        plt.plot(np.arrange(len(self.costs)), self.costs)
        plt.xlabel("epochs")
        plt.ylabel("cost")
        plt.show()
    
def getBinaryDataset():
    trainXorig, trainYorig, testXorig, testYorig = mnist.get_data()
    
    index5 = np.where(trainYorig == 5)
    index8 = np.where(trainYorig == 8)
    
    index = np.concatenate([index5[0], index8[0]])
    np.random.seed(1)
    np.random.shuffle(index)
    
    trainY = trainYorig[index]
    trainX = trainXorig[index]
    
    
    trainY[np.where(trainY == 5)] = 0
    trainy[np.where(trainY == 8)] = 1
    
    index5 = np.where(testXorig == 5)
    index8 = np.where(testXorig == 8)
    
    index = np.concatenate([index5[0], index8[0]])
    np.random.shuffle(index)
    
    testY = testYorig[index]
    testX = testXorig[index]
    
    testY[np.where(testY == 5)] = 0
    testY[np.where(testY == 8)] = 1
    
    return trainX, trainY, testX, testY

def preProcessData(trainX, testX):
    # Normalize
    trainX = trainX/ 255.
    testX = testX/255.
    
    return trainX, testX

if __name__ == '__main__':
    trainX, trainY, testX, testY = getBinaryDataset()
    
    trainX, testX = preProcessData(trainX, testX)
    
    print("trainX shape:" + str(trainX.shape))
    print("testX shape:" + str(testX.shape))
    
    layersDims = [196,1]
    
    ann = ANN(layersDims)
    ann.fit(trainX, trainY, learningRate = 0.01, nIterations = 1000)
    ann.predict(trainX, trainY)
    ann.predict(testX, testY)
    ann.plotCost
                

NameError: name 'mnist' is not defined