# Creating a neuralnetwork from scratch using Numpy package only
#### Predict the MNIST image dataset handwritten numbers from images with 784(28*28) pixels using a neural network
#### It has one input layer 784 nodes, hidden layer with 10 nodes relu activation and output layer with 10 node softmax activation


In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf

#loading the dataset
(xtrain, ytrain), (xtest, ytest) = tf.keras.datasets.mnist.load_data()

In [None]:
print(xtrain.shape,xtest.shape,ytrain.shape,ytest.shape)
xtrain=np.reshape(xtrain,(60000,-1))
xtest=np.reshape(xtest,(10000,-1))
print(xtrain.shape,xtest.shape,ytrain.shape,ytest.shape)


In [None]:
xtrain=xtrain.T/255
xtest=xtest.T/255

In [None]:
#initialize weights and biases
def initparams():
    w1=np.random.rand(10,784)
    b1=np.random.rand(10,1)
    w2=np.random.rand(10,10)
    b2=np.random.rand(10,1)
    return w1,w2,b1,b2

##relu function
def relu(z):
    return np.maximum(0.0,z)

##softmax function
def softmax(x):
    """
    Compute softmax values for each row of matrix x.
    """
    # Subtracting the maximum element from each row for numerical stability
    x = x - np.max(x, axis=1, keepdims=True)
    exp_x = np.exp(x)
    softmax_x = exp_x / np.sum(exp_x, axis=1, keepdims=True)
    return softmax_x

#one hot encoding the target
def onehotfunc(z):
    ohz=np.zeros((10,z.shape[0]))
    for i in range(z.shape[0]):
        ohz[z[i],i]=1
    return ohz

#forward propagation
def forwprop(w1,w2,b1,b2,x):
    z1=np.dot(w1,x)+b1
    a1=relu(z1)
    z2=np.dot(w2,a1)+b2
    a2=softmax(z2)
    return z1,z2,a1,a2

#backward propagation
def backprop(z1,z2,a1,a2,w1,w2,x,y):
    m=y.shape[0]
    ency=onehotfunc(y.astype(int))
    dz2=a2-ency
    dw2=1/m*np.dot(dz2,a1.T)
    db2=1/m*np.sum(dz2)
    dz1=np.dot(w2.T,dz2)*(z1>0)
    dw1=1/m*np.dot(dz1,x.T)
    db1=1/m*np.sum(dz1)
    return dw1,dw2,db1,db2

#updating parameters in each iteration
def paramupdate(w1,w2,b1,b2,dw1,dw2,db1,db2,alpha):
    w2=w2-alpha*dw2
    w1=w1-alpha*dw1
    b2=b2-alpha*db2
    b1=b1-alpha*db1
    return w1,w2,b1,b2

#getting the prediction
def prediction(z):
    return np.argmax(z,0)

#calculating accuracy
def accuracy(pred,y):
    print(pred,y)
    return np.sum(pred==y)/y.size

#the final neural network which prints accuracy at each iteration
def neuralnet(x,y,itr,alpha):
    w1,w2,b1,b2=initparams()
    for i in range(itr):
        z1,z2,a1,a2=forwprop(w1,w2,b1,b2,x)
        dw1,dw2,db1,db2=backprop(z1,z2,a1,a2,w1,w2,x,y)
        w1,w2,b1,b2=paramupdate(w1,w2,b1,b2,dw1,dw2,db1,db2,alpha)
        pred=prediction(a2)
        print(i,accuracy(pred,y))
    return w1,w2,b1,b2



    





In [None]:
# implementing the neuralnetwork 
print(xtrain.shape,ytrain.shape)
w1,w2,b1,b2=neuralnet(xtrain,ytrain,500,0.001)