# 1. Import Necessary Libraries

In [1]:
import numpy as np
import pandas as pd 
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# 2. Load Dataset

In [2]:
iris_ds = load_iris()

In [3]:
X,y = iris_ds.data,iris_ds.target
print("X.shape:",X.shape,"y.shape:",y.shape)

X.shape: (150, 4) y.shape: (150,)


In [4]:
Xtrain,Xtest,ytrain,ytest = train_test_split(X,y,test_size=.2,random_state=3)

In [5]:
print("Xtrain.shape:",Xtrain.shape,"ytrain.shape:",ytrain.shape)
print(f"\nXtrain[:5]:\n{Xtrain[:5]}\n\nytrain[:5]:\n{ytrain[:5]}")

Xtrain.shape: (120, 4) ytrain.shape: (120,)

Xtrain[:5]:
[[5.8 4.  1.2 0.2]
 [4.8 3.4 1.9 0.2]
 [6.9 3.1 5.4 2.1]
 [6.6 2.9 4.6 1.3]
 [4.4 2.9 1.4 0.2]]

ytrain[:5]:
[0 0 2 1 0]


In [6]:
Xtrain,Xtest,ytrain,ytest = Xtrain.T,Xtest.T,ytrain.reshape(-1,1).T,ytest.reshape(-1,1).T

In [7]:
print("Xtrain.shape:",Xtrain.shape,"ytrain.shape:",ytrain.shape)
print(f"\nXtrain[:5]:\n{Xtrain[:,:5]}\n\nytrain[:5]:\n{ytrain[:,:5]}")

Xtrain.shape: (4, 120) ytrain.shape: (1, 120)

Xtrain[:5]:
[[5.8 4.8 6.9 6.6 4.4]
 [4.  3.4 3.1 2.9 2.9]
 [1.2 1.9 5.4 4.6 1.4]
 [0.2 0.2 2.1 1.3 0.2]]

ytrain[:5]:
[[0 0 2 1 0]]


In [8]:
idx_to_target = {}
for idx,target in enumerate(iris_ds.target_names):
    idx_to_target[idx] = target
print(idx_to_target)

{0: 'setosa', 1: 'versicolor', 2: 'virginica'}


# 3. Create Layer Structure

In [9]:
n_h = 5  # hidden_layer_units
n_x = len(Xtrain[:,0]) # input_layer_units 
n_y = len(idx_to_target) # output_layer_units 

print("N_X:",n_x)
print("N_h:",n_h)
print("N_y:",n_y)

N_X: 4
N_h: 5
N_y: 3


# 4. Initalize Weights and Biases

In [10]:
W1 = np.random.rand(n_x,n_h) # input-hidden layer arasındaki agırlıklar w[0] -> Mesela, ilk noronun agırlıkları 
W2 = np.random.rand(n_h,n_y) # hidden_layer ile output_layer arasındaki agirliklar
b1 = np.random.rand(n_h,1)
b2 = np.random.rand(n_y,1)

In [11]:
print(f"W1:\n{W1}\t W1.shape:{W1.shape}")
print(f"b1:\n{b1}\t b1.shape:{W1.shape}")
print(f"W2:\n{W2}\t W2.shape:{W2.shape}")
print(f"b2:\n{b2}\t b2.shape:{b2.shape}")


W1:
[[0.85231319 0.52837607 0.5705212  0.09916507 0.59132746]
 [0.57097091 0.37039451 0.90022406 0.15587152 0.74320266]
 [0.74285507 0.88793231 0.48940715 0.15738595 0.95094928]
 [0.79941413 0.15994955 0.7172062  0.3632363  0.22249157]]	 W1.shape:(4, 5)
b1:
[[0.36117809]
 [0.43983447]
 [0.5369493 ]
 [0.04459745]
 [0.60337236]]	 b1.shape:(4, 5)
W2:
[[0.77974584 0.99945576 0.35497081]
 [0.09326779 0.45437124 0.42604761]
 [0.66135956 0.12115397 0.66520138]
 [0.15775684 0.28489648 0.91731744]
 [0.79430416 0.99135754 0.78633163]]	 W2.shape:(5, 3)
b2:
[[0.83540884]
 [0.64366388]
 [0.17261434]]	 b2.shape:(3, 1)


# 5. Forward Propagation

### 5.1. Lineer Calculation 

In [12]:
def lineer_fonk(X,W,b):
    if X.shape[0] == (4,1): # Check x shape is (4,1) or not 
        pass
    else:
        X = X.reshape(-1,1)
    
    assert X.shape == (X.shape[0],1)
    
    return np.dot(W.T,X)+b

In [13]:
sample_x = Xtrain[:,1]
sample_y = ytrain[1]
print("X:",sample_x,"\ty:",sample_y)
print("X.shape:",sample_x.shape,"\ty.shape:",sample_y.shape)

IndexError: index 1 is out of bounds for axis 0 with size 1

In [None]:
print("X.shape:",sample_x.shape,"\ty.shape:",sample_y.shape)
print("W.shape:",W1.shape,"\tb.shape:",b1.shape)

In [None]:
Z1 = lineer_fonk(sample_x,W1,b1)
print("Lineer Func.'s Result(Z1):\n",Z1,"\t Z1.shape:",Z1.shape)

### 5.2. Non-lineer Calculation with Activation Functions

In [None]:
# Activation Functions
def softmax(Z):
    score = np.exp(Z) / np.sum(np.exp(Z))  
    return score 

def tanh(Z):
    score = np.tanh(Z)
    return score 

def sigmoid(Z):
    score = 1 / (1 + np.exp(-Z))
    return score 

# Non-lineer part of a neuron 
def non_lineer(Z,f='sigmoid'):
    if f == 'sigmoid':
        return sigmoid(Z)
    elif f == 'softmax': 
        return softmax(Z)
    elif f== 'tanh':
        return tanh(Z)
    else:
        raise Exception('Wrong Activation Function in Non-lineer!')

# Derivatives of activation functions for Backprop-step 
def sigmoid_derivative(X):
    derivative  =  X * (1 - X)
    return derivative



In [None]:
A1 = non_lineer(Z1,f='tanh')
print("Before Activation Function(Z1):\n",Z1,"\t Shape:",Z1.shape)
print("\nAfter Activation Function(A1):\n",A1,"\t Shape:",A1.shape)

In [None]:
# Now A is our new input for next hidden layer. 
Z2 = lineer_fonk(A1,W2,b2)
print("Lineer Func.'s Result(Z2):\n",Z2,"\t Z2.shape:",Z2.shape)

In [None]:
A2 = non_lineer(Z2,f='softmax')
print("Before Activation Function(Z2):\n",Z2,"\t Shape:",Z2.shape)
print("\nAfter Activation Function(A2):\n",A2,"\t Shape:",A2.shape)

In [None]:
predictions = A2 # It's last output in our nn for 1 forward prop step.
print("Forward Propagation Last Result (PREDICTIONs):\n",predictions,"\t Shape: ",predictions.shape)

In [None]:
print("Prediction is ",np.argmax(predictions),".Its mean is : ",idx_to_target[np.argmax(predictions)])

# 6. Calculate Loss & Cost 

In [None]:
 def cross_entropy(self,y,yhat):
    ce = np.multiply(np.log(yhat),y) + np.multiply(np.log(1-yhat),1-y)
    return -ce 
    
def calc_cost(losses):
    return np.mean(losses)


In [None]:
print("Lets remember the sample_y and predictions...\n")
print("Sample_y:",sample_y,"Prediction:")

In [None]:
#loss = cross_entropy(sample_y)

In [None]:
'''
losses = [] # intialize loss array 
init_params() # initialize weights & biases 
for epoch in epochs: 
    yhat,_ = forward_propagation(X) # predict something in case of current weights & biases 
    loss = cross_entropy(y,yhat) # calculate loss 
    backward_propagation() # Calculate gradients 
    update_params() # Update parameters
    losses.append(loss) # add loss 
    
    if epoch % 10 == 0:
        cost = calc_cost(losses) # calculate cost 
        print("Epoch {} \t ---> \t Cost: ".format(epoch,cost)) # Show cost for every 10 epoch
   ''' 
    

In [189]:
class NeuralNetwork:
    def __init__(self,layers=[4,5,3],lr=0.001,iterations=100):
        '''
        layers[0] = input layer size
        layers[1] = hidden layer size
        layers[2] = output layer size
        lr = learning rate
        iterations = epochs 
        params = a dict which will contain Weights and Biases 
        loss = a list which will contain losses
        X  = inputs/features
        y = outputs/targets 
        '''
        self.params = {}
        self.learning_rate = lr
        self.iterations = iterations # epoch 
        self.loss = []
        self.sample_size = None
        self.layers = layers
        self.X = None
        self.y = None

    def init_params(self):
        '''
        n_x : size of input layer
        n_h : size of hidden layer
        n_y : size of output layer
        w1  : Weights of input layer to hidden layer
        b1  : biases of input layer to hidden layer
        w2  : Weights of hidden layer to output layer
        b2  : biases of hidden layer to output layer 
        '''
        self.n_x,self.n_h,self.n_y = self.layers[0],self.layers[1],self.layers[2]
        self.w1 = np.random.rand(self.n_h,self.n_x)
        self.b1 = np.random.rand(self.n_h,1)
        self.w2 = np.random.rand(self.n_y,self.n_h)
        self.b2 = np.random.rand(self.n_y,1)
        
        assert self.w1.shape == (self.n_h,self.n_x)
        assert self.w2.shape == (self.n_y,self.n_h)
        assert self.b1.shape == (self.n_h,1)
        assert self.b2.shape == (self.n_y,1)
        self.params = {"w1":self.w1,"b1":self.b1,"w2":self.w2,"b2":self.b2}
    def lineer_func(self,X,W,b):
        if X.shape != (X.shape[0],1):
            X = X.reshape(-1,1)
        return np.dot(W,X) + b 

    def non_lineer_func(self,Z,f='sigmoid'):
        '''
        Z : lineer calculation result 
        f : activation function 
        '''
        if f == 'sigmoid':
            score = 1 / (1 + np.exp(-Z))
            return score
        elif f == 'softmax': 
            score = np.exp(Z) / np.sum(np.exp(Z))  
            return score 
        elif f== 'tanh':
            score = np.tanh(Z)
            return score 
        else:
            raise Exception('Wrong Activation Function in Non-lineer!')
    
    def forward_propagation(self,X,y):
        W1 = self.params["w1"]
        b1 = self.params["b1"]
        W2 = self.params["w2"]
        b2 = self.params["b2"]
        
        assert len(y) == 1 
        assert W1.shape[1] ==  X.shape[0]
        
        Z1 = lineer_func(X,W1,b1)
        A1 = non_lineer_func(Z1,f='tanh')

        assert W2.T.shape[1] == A1.shape[0]

        Z2 = lineer_func(A1,W2,b2)
        yhat = non_lineer_func(Z2,f='softmax')

        loss = np.multiply(np.log(yhat),y) + np.multiply(np.log(1-yhat),1-y)

        self.cache = {"Z1":Z1,"A1":A1,"Z2":Z2,"yhat":yhat}

        return yhat,loss
    
    def tanh_derivative(A):
        '''
        Args: You need to give Ai to find derivate of Zi
        '''
        return (1- np.power(A,2))
    
    def sigmoid_derivative(A):
        '''
        Args: You need to give Ai to find derivate of Zi
        '''
        return np.multiply(A,(1-A))
    
    def back_propagation(self,X,y,params,cache):
        '''
        compute derivative and update weights&biases
        '''

        m = self.sample_size
        W1 = self.params["w1"]
        W2 = self.params["w2"]
        
        A1 = cache["A1"]
        yhat = cache["yhat"]
        m = 120 
        assert len(y) == 1 
        assert yhat.shape == (self.n_y,1)
        # backward
        dz2 =  yhat - y 
        print("dZ2:\n",dz2,"\t Shape:",dz2.shape)
        print("A1:\n",A1,"\t Shape:",A1.shape)
        print("m:",m)
        dw2 = np.dot(dz2,A1.T) / m
        db2 = np.sum(dz2,axis=1,keepdims=True) / m

        dz1 = np.multiply(np.dot(W2.T,dz2), tanh_derivative(A1))

        dw1 = np.dot(dz1,X.T) / m 
        db1 = np.sum(dz1,axis=1,keepdims=True) / m
        
        self.grads = {"dw1":dw1,"db1":db1,"dw2":dw2,"db2":db2}
        return self.grads
    
    def update_params(self,params,grads):
        lr = self.learning_rate
        # Weight and biases 
        W1 = self.params["w1"]
        b1 = self.params["b1"]
        W2 = self.params["w2"]
        b2 = self.params["b2"]

        # Gradients
        dw1 = self.grads["dw1"]
        db1 = self.grads["db1"]
        dw2 = self.grads["dw2"]
        db2 = self.grads["db2"]

        # Update parameters according to given parameters and learning rate 
        w1 = W1 - lr * dw1
        b1 = b1 - lr * db1
        w2 = W2 - lr * dw2
        b2 = b2 - lr * db2

        self.params = {"w1":w1,"b1":b1,"w2":w2,"b2":b2}
        return self.params
    
    def fit(self,X,y):
        print("INFO: Training is began...")
        #trainable_count = len(self.params["w1"].reshape(-1)) + len(self.params["b1"].reshape(-1)) + len(self.params["w2"].reshape(-1)) + len(self.params["b2"].reshape(-1))
        #print(f"INFO: Layer Count:{len(self.layers)-1}\n Learning Rate:{self.learning_rate}\n Epochs:{self.iterations} \nTrainable Parameters:{trainable_count}")
        self.init_params()
        self.sample_size = X.shape[1]
        for epoch in range(self.iterations):
            for i in range(self.sample_size):
                self.X = X[:,i].reshape(-1,1)
                self.y = y[i].reshape(-1,1)
                yhat,loss = self.forward_propagation()
                self.back_propagation(yhat)
                self.update_params()

                self.losses.append(loss)
            cost = -1 * np.mean(self.losses)
            cost = float(np.squeeze(cost)) # be sure to 1 dimension
            assert isinstance(cost,float)
            
            losses = [] # clear losses list 
            if epoch % 10 == 0:
                print("Epoch {} \t ---> \t Cost: ".format(epoch+1,cost)) # Show cost for every 10 epoch

    def predict(self,X):
        '''
        Makes prediction using model on test data
        '''
        yhat,_ = self.forward_propagation()
        return yhat
    
    def acc(self,y,yhat):
        acc = int( sum(y==yhat) / len(y) * 100)
        return acc 

    def plot_loss(self):
        plt.plot(self.loss)
        plt.xlabel("Iteration")
        plt.ylable("logloss")
        plt.title("Loss cure for training")
        plt.show()



In [190]:
nn = NeuralNetwork()

In [191]:
print("Xtrain.shape:",Xtrain.shape,"\tytrain.shape:",ytrain.shape)

Xtrain.shape: (4, 120) 	ytrain.shape: (1, 120)


In [192]:
nn.params

{}

In [193]:
nn.init_params()

In [194]:
nn.params

{'w1': array([[0.14899151, 0.18733881, 0.90677298, 0.88880441],
        [0.8833128 , 0.67265856, 0.18755734, 0.41234908],
        [0.94058584, 0.47771412, 0.7160983 , 0.57963805],
        [0.74424959, 0.38327416, 0.45494457, 0.56430957],
        [0.41926703, 0.69762027, 0.67472237, 0.97999816]]),
 'b1': array([[0.02494273],
        [0.40873935],
        [0.0643523 ],
        [0.87553245],
        [0.21793623]]),
 'w2': array([[0.68916411, 0.80049042, 0.7353547 , 0.85115575, 0.4925957 ],
        [0.31059814, 0.78671409, 0.04729197, 0.15863713, 0.0520535 ],
        [0.466365  , 0.83268637, 0.89182753, 0.71515263, 0.50165694]]),
 'b2': array([[0.36734346],
        [0.12041287],
        [0.51570921]])}

In [195]:
X_,y_ = Xtrain[:,0].reshape(-1,1),ytrain[:,0].reshape(-1,1)
print(f"X_.shape:{X_.shape}\ty_.shape:{y_.shape}")

X_.shape:(4, 1)	y_.shape:(1, 1)


In [196]:
nn.forward_propagation(X_,y_)

NameError: name 'lineer_func' is not defined

In [197]:
b1 = nn.params["b1"]
W1 = nn.params["w1"]

In [198]:
Z1 = nn.lineer_func(X_,W1,b1)

In [199]:
A1 = nn.non_lineer_func(Z1,f='tanh')

In [200]:
W2 = nn.params["w2"]
b2 = nn.params["b2"]
Z2 = nn.lineer_func(A1,W2,b2)
A2 = nn.non_lineer_func(Z2,f='softmax')

In [201]:
print("A2:\n",A2,"\t Shape:",A2.shape)

A2:
 [[0.48209349]
 [0.04126481]
 [0.4766417 ]] 	 Shape: (3, 1)


In [202]:
cache = {"Z1":Z1,"A1":A1,"Z2":Z2,"yhat":A2}

In [203]:
grads = nn.back_propagation(X_,y_,nn.params,cache)

dZ2:
 [[0.48209349]
 [0.04126481]
 [0.4766417 ]] 	 Shape: (3, 1)
A1:
 [[0.99401515]
 [0.99999992]
 [0.9999999 ]
 [0.99999923]
 [0.99999496]] 	 Shape: (5, 1)
m: 120


NameError: name 'tanh_derivative' is not defined

In [None]:
nn.fit(Xtrain,ytrain)

In [None]:
nn.params["w1"].shape

In [None]:
nn.cache["Z2"].shape

In [None]:
nn.y.shape

In [None]:
nn.X.shape

In [None]:
# STRUCTURE OF ASSIGNMENT 
'''
losses = [] # intialize loss array 
init_params() # initialize weights & biases 
for epoch in epochs: 
    yhat,_ = forward_propagation(X) # predict something in case of current weights & biases 
    loss = cross_entropy(y,yhat) # calculate loss 
    backward_propagation() # Calculate gradients 
    update_params() # Update parameters
    losses.append(loss) # add loss 
    
    if epoch % 10 == 0:
        cost = calc_cost(losses) # calculate cost 
        print("Epoch {} \t ---> \t Cost: ".format(epoch,cost)) # Show cost for every 10 epoch
''' 
    

In [248]:
def init_params(layers):
        '''
        n_x : size of input layer
        n_h : size of hidden layer
        n_y : size of output layer
        w1  : Weights of input layer to hidden layer
        b1  : biases of input layer to hidden layer
        w2  : Weights of hidden layer to output layer
        b2  : biases of hidden layer to output layer 
        '''
        n_x,n_h,n_y = layers[0],layers[1],layers[2]
        W1 = np.random.rand(n_h,n_x)
        b1 = np.random.rand(n_h,1)
        W2 = np.random.rand(n_y,n_h)
        b2 = np.random.rand(n_y,1)
        params = {"w1":W1,"b1":b1,"w2":W2,"b2":b2}
        return params
    
def lineer_func(X,W,b):
    if X.shape != (X.shape[0],1):
        X = X.reshape(-1,1)
    return np.dot(W,X) + b 

def non_lineer_func(Z,f='sigmoid'):
    '''
    Z : lineer calculation result 
    f : activation function 
    '''
    if f == 'sigmoid':
        score = 1 / (1 + np.exp(-Z))
        return score
    elif f == 'softmax': 
        score = np.exp(Z) / np.sum(np.exp(Z))  
        return score 
    elif f== 'tanh':
        score = np.tanh(Z)
        return score 
    else:
        raise Exception('Wrong Activation Function in Non-lineer!')

def forward_propagation(X,y,params):
    '''
    Args: X: input
          y: real value
          params: weights and biases
    Return: 
          loss: loss
          cache: calculation results z1,a1,z2,a2
    '''
    W1 = params["w1"]
    b1 = params["b1"]
    W2 = params["w2"]
    b2 = params["b2"]

    assert len(y) == 1 
    assert W1.shape[1] ==  X.shape[0]
    m = 120 # sample_size
    Z1 = lineer_func(X,W1,b1)
    A1 = non_lineer_func(Z1,f='tanh')
    print("A1.shape:",A1.shape,"W2.shape:",W2.shape)
    assert W2.shape[1] == A1.shape[0]

    Z2 = lineer_func(A1,W2,b2)
    yhat = non_lineer_func(Z2,f='softmax')

    loss = -1 * (np.multiply(np.log(yhat),y) + np.multiply(np.log(1-yhat),1-y))
    cost = np.sum(loss) / m 
    cache = {"Z1":Z1,"A1":A1,"Z2":Z2,"yhat":yhat}

    return cost,cache

def tanh_derivative(A):
    '''
    Args: You need to give Ai to find derivate of Zi
    '''
    return (1- np.power(A,2))

def back_propagation(X,y,params,cache):
    '''
    compute derivative and update weights&biases
    Returns gradients(grads)
    '''

    m = 120 # sample_size
    W1 = params["w1"]
    W2 = params["w2"]

    A1 = cache["A1"]
    yhat = cache["yhat"]
    m = 120 
    assert len(y) == 1 
    assert yhat.shape == (3,1) # n_y,1
    # backward
    dz2 =  yhat - y 
    #print("dZ2:\n",dz2,"\t Shape:",dz2.shape)
    #print("A1:\n",A1,"\t Shape:",A1.shape)
    #print("m:",m)
    dw2 = np.dot(dz2,A1.T) / m
    db2 = np.sum(dz2,axis=1,keepdims=True) / m

    dz1 = np.multiply(np.dot(W2.T,dz2), tanh_derivative(A1))

    dw1 = np.dot(dz1,X.T) / m 
    db1 = np.sum(dz1,axis=1,keepdims=True) / m

    grads = {"dw1":dw1,"db1":db1,"dw2":dw2,"db2":db2}
    return grads

def update_params(params,grads,lr=0.001):
    '''
    Returns updated params
    '''
    # Weight and biases 
    W1 = params["w1"]
    b1 = params["b1"]
    W2 = params["w2"]
    b2 = params["b2"]

    # Gradients
    dw1 = grads["dw1"]
    db1 = grads["db1"]
    dw2 = grads["dw2"]
    db2 = grads["db2"]

    # Update parameters according to given parameters and learning rate 
    w1 = W1 - lr * dw1
    b1 = b1 - lr * db1
    w2 = W2 - lr * dw2
    b2 = b2 - lr * db2

    params = {"w1":w1,"b1":b1,"w2":w2,"b2":b2}
    return params

In [249]:
layers = [4,5,3]
lr = 0.001
X1,y1 = Xtrain[:,2].reshape(-1,1),ytrain[:,3].reshape(-1,1)
print(f"X_.shape:{X_.shape}\ty_.shape:{y_.shape}")
params = init_params(layers)
print("params\n")
params

X_.shape:(4, 1)	y_.shape:(1, 1)
params



{'w1': array([[0.35734249, 0.97846537, 0.36536034, 0.27000946],
        [0.29420431, 0.70846036, 0.07207911, 0.04133522],
        [0.56514474, 0.2613164 , 0.00825787, 0.32579672],
        [0.93920099, 0.3159115 , 0.07804344, 0.5159972 ],
        [0.9835994 , 0.41844141, 0.94191881, 0.35488166]]),
 'b1': array([[0.29855978],
        [0.17931056],
        [0.37230165],
        [0.31154832],
        [0.65791015]]),
 'w2': array([[0.20570394, 0.63976103, 0.74851606, 0.34773297, 0.10339065],
        [0.81324585, 0.23714137, 0.23137034, 0.98826681, 0.87490822],
        [0.35446091, 0.87557007, 0.37599981, 0.28597489, 0.0226546 ]]),
 'b2': array([[0.73329502],
        [0.74817506],
        [0.85913364]])}

In [250]:
cost,cache = forward_propagation(X1,y1,params)

A1.shape: (5, 1) W2.shape: (3, 5)


In [251]:
cost

0.031204988925457193

In [252]:
cache

{'Z1': array([[ 8.3374313 ],
        [ 4.88157852],
        [ 5.81064684],
        [ 9.27638955],
        [14.57352743]]),
 'A1': array([[0.99999989],
        [0.99988494],
        [0.99998205],
        [0.99999998],
        [1.        ]]),
 'Z2': array([[2.77831259],
        [3.89307611],
        [2.77368639]]),
 'yhat': array([[0.19824625],
        [0.60442251],
        [0.19733124]])}

In [253]:
grads = back_propagation(X1,y1,params,cache)

In [254]:
grads

{'dw1': array([[-1.01639005e-08, -4.56639010e-09, -7.95435694e-09,
         -3.09336103e-09],
        [-1.73264078e-05, -7.78432815e-06, -1.35597974e-05,
         -5.27325455e-06],
        [-2.05023661e-06, -9.21120796e-07, -1.60453300e-06,
         -6.23985055e-07],
        [-1.81238712e-09, -8.14260882e-10, -1.41838992e-09,
         -5.51596081e-10],
        [-2.25859797e-14, -1.01473242e-14, -1.76759841e-14,
         -6.87399382e-15]]),
 'db1': array([[-1.47302906e-09],
        [-2.51107360e-06],
        [-2.97135741e-07],
        [-2.62664801e-10],
        [-3.27333039e-15]]),
 'dw2': array([[-0.00668128, -0.00668051, -0.00668116, -0.00668128, -0.00668128],
        [-0.00329648, -0.0032961 , -0.00329642, -0.00329648, -0.00329648],
        [-0.00668891, -0.00668814, -0.00668879, -0.00668891, -0.00668891]]),
 'db2': array([[-0.00668128],
        [-0.00329648],
        [-0.00668891]])}

In [255]:
params = update_params(params,grads,lr=0.1)

In [256]:
params

{'w1': array([[0.35734249, 0.97846537, 0.36536034, 0.27000946],
        [0.29420604, 0.70846114, 0.07208046, 0.04133574],
        [0.56514495, 0.26131649, 0.00825803, 0.32579679],
        [0.93920099, 0.3159115 , 0.07804344, 0.5159972 ],
        [0.9835994 , 0.41844141, 0.94191881, 0.35488166]]),
 'b1': array([[0.29855978],
        [0.17931081],
        [0.37230168],
        [0.31154832],
        [0.65791015]]),
 'w2': array([[0.20637207, 0.64042908, 0.74918418, 0.3484011 , 0.10405877],
        [0.8135755 , 0.23747098, 0.23169999, 0.98859646, 0.87523787],
        [0.3551298 , 0.87623889, 0.37666869, 0.28664378, 0.02332349]]),
 'b2': array([[0.73396315],
        [0.74850471],
        [0.85980253]])}