In [1]:
import numpy as np
import pandas as pd 
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [4]:
iris_ds = load_iris()

In [5]:
X,y = iris_ds.data,iris_ds.target
print("X.shape:",X.shape,"y.shape:",y.shape)

X.shape: (150, 4) y.shape: (150,)


In [6]:
Xtrain,Xtest,ytrain,ytest = train_test_split(X,y,test_size=.2,random_state=3)

In [7]:
print("Xtrain.shape:",Xtrain.shape,"ytrain.shape:",ytrain.shape)
print(f"\nXtrain[:5]:\n{Xtrain[:5]}\n\nytrain[:5]:\n{ytrain[:5]}")

Xtrain.shape: (120, 4) ytrain.shape: (120,)

Xtrain[:5]:
[[5.8 4.  1.2 0.2]
 [4.8 3.4 1.9 0.2]
 [6.9 3.1 5.4 2.1]
 [6.6 2.9 4.6 1.3]
 [4.4 2.9 1.4 0.2]]

ytrain[:5]:
[0 0 2 1 0]


In [8]:
Xtrain,Xtest,ytrain,ytest = Xtrain.T,Xtest.T,ytrain.reshape(-1,1).T,ytest.reshape(-1,1).T

In [9]:
print("Xtrain.shape:",Xtrain.shape,"ytrain.shape:",ytrain.shape)
print(f"\nXtrain[:5]:\n{Xtrain[:,:5]}\n\nytrain[:5]:\n{ytrain[:,:5]}")

Xtrain.shape: (4, 120) ytrain.shape: (1, 120)

Xtrain[:5]:
[[5.8 4.8 6.9 6.6 4.4]
 [4.  3.4 3.1 2.9 2.9]
 [1.2 1.9 5.4 4.6 1.4]
 [0.2 0.2 2.1 1.3 0.2]]

ytrain[:5]:
[[0 0 2 1 0]]


In [10]:
idx_to_target = {}
for idx,target in enumerate(iris_ds.target_names):
    idx_to_target[idx] = target
print(idx_to_target)

{0: 'setosa', 1: 'versicolor', 2: 'virginica'}


In [47]:
def init_params(layers):
        '''
        n_x : size of input layer
        n_h : size of hidden layer
        n_y : size of output layer
        w1  : Weights of input layer to hidden layer
        b1  : biases of input layer to hidden layer
        w2  : Weights of hidden layer to output layer
        b2  : biases of hidden layer to output layer 
        '''
        n_x,n_h,n_y = layers[0],layers[1],layers[2]
        W1 = np.random.rand(n_h,n_x) * 0.01
        b1 = np.zeros((n_h,1))
        W2 = np.random.rand(n_y,n_h) * 0.01
        b2 = np.zeros((n_y,1))
        params = {"w1":W1,"b1":b1,"w2":W2,"b2":b2}
        return params
    
def lineer_func(X,W,b):
    if X.shape != (X.shape[0],1):
        X = X.reshape(-1,1)
    return np.dot(W,X) + b 

def non_lineer_func(Z,f='sigmoid'):
    '''
    Z : lineer calculation result 
    f : activation function 
    '''
    if f == 'sigmoid':
        score = 1 / (1 + np.exp(-Z))
        return score
    elif f == 'softmax': 
        score = np.exp(Z) / np.sum(np.exp(Z))  
        return score 
    elif f== 'tanh':
        score = np.tanh(Z)
        return score 
    else:
        raise Exception('Wrong Activation Function in Non-lineer!')

def forward_propagation(X,y,params):
    '''
    Args: X: input
          y: real value
          params: weights and biases
    Return: 
          loss: loss
          cache: calculation results z1,a1,z2,a2
    '''
    W1 = params["w1"]
    b1 = params["b1"]
    W2 = params["w2"]
    b2 = params["b2"]

    assert len(y) == 1 
    assert W1.shape[1] ==  X.shape[0]
    m = 120 # sample_size
    Z1 = lineer_func(X,W1,b1)
    A1 = non_lineer_func(Z1,f='tanh')
    print("A1.shape:",A1.shape,"W2.shape:",W2.shape)
    assert W2.shape[1] == A1.shape[0]

    Z2 = lineer_func(A1,W2,b2)
    yhat = non_lineer_func(Z2,f='softmax')

    loss = -1 * (np.multiply(np.log(yhat),y) + np.multiply(np.log(1-yhat),1-y))
    cost = np.sum(loss) / m 
    cache = {"Z1":Z1,"A1":A1,"Z2":Z2,"yhat":yhat}

    return cost,cache

def tanh_derivative(A):
    '''
    Args: You need to give Ai to find derivate of Zi
    '''
    return (1- np.power(A,2))

def back_propagation(X,y,params,cache):
    '''
    compute derivative and update weights&biases
    Returns gradients(grads)
    '''

    m = 120 # sample_size
    W1 = params["w1"]
    W2 = params["w2"]

    A1 = cache["A1"]
    yhat = cache["yhat"]
    m = 120 
    assert len(y) == 1 
    assert yhat.shape == (3,1) # n_y,1
    # backward
    dz2 =  yhat - y 
    #print("dZ2:\n",dz2,"\t Shape:",dz2.shape)
    #print("A1:\n",A1,"\t Shape:",A1.shape)
    #print("m:",m)
    dw2 = np.dot(dz2,A1.T) / m
    db2 = np.sum(dz2,axis=1,keepdims=True) / m

    dz1 = np.multiply(np.dot(W2.T,dz2), tanh_derivative(A1))

    dw1 = np.dot(dz1,X.T) / m 
    db1 = np.sum(dz1,axis=1,keepdims=True) / m

    grads = {"dw1":dw1,"db1":db1,"dw2":dw2,"db2":db2}
    return grads

def update_params(params,grads,lr=0.001):
    '''
    Returns updated params
    '''
    # Weight and biases 
    W1 = params["w1"]
    b1 = params["b1"]
    W2 = params["w2"]
    b2 = params["b2"]
    print("W1",W1)
    # Gradients
    dw1 = grads["dw1"]
    db1 = grads["db1"]
    dw2 = grads["dw2"]
    db2 = grads["db2"]
    #print("Dw1:",dw1)
    #print("carpim:",lr*dw1)
    #print("fark:",W1-lr*dw1)
    # Update parameters according to given parameters and learning rate 
    w1 = W1 - lr * dw1
    b1 = b1 - lr * db1
    w2 = W2 - lr * dw2
    b2 = b2 - lr * db2

    updated_params = {"w1":w1,"b1":b1,"w2":w2,"b2":b2}
    return updated_params

In [48]:
layers = [4,5,3]
lr = 0.001
X1,y1 = Xtrain[:,2].reshape(-1,1),ytrain[:,3].reshape(-1,1)
print(f"X_.shape:{X1.shape}\ty_.shape:{y1.shape}")
params = init_params(layers)
print("params\n")
params

X_.shape:(4, 1)	y_.shape:(1, 1)
params



{'w1': array([[9.98257727e-03, 3.42584383e-03, 4.83802431e-03, 7.47925822e-03],
        [9.63671758e-03, 6.28405481e-03, 3.87375103e-04, 5.53144982e-03],
        [6.96364573e-03, 6.18422778e-04, 9.02290060e-03, 9.19127449e-03],
        [1.82596343e-04, 1.51478169e-03, 8.51045369e-03, 8.73564611e-03],
        [7.11631477e-03, 3.26854433e-03, 7.27981316e-05, 9.81603629e-03]]),
 'b1': array([[0.],
        [0.],
        [0.],
        [0.],
        [0.]]),
 'w2': array([[6.64915261e-03, 9.74576562e-04, 5.50832759e-03, 3.32336580e-03,
         7.54376306e-03],
        [7.61998847e-05, 5.18960184e-03, 9.62337159e-03, 5.01958397e-03,
         5.02804313e-03],
        [5.16670711e-03, 4.94036338e-03, 9.93613231e-03, 3.01095738e-03,
         5.85145686e-03]]),
 'b2': array([[0.],
        [0.],
        [0.]])}

In [49]:
cost,cache = forward_propagation(X1,y1,params)
print("cost:",cost)

A1.shape: (5, 1) W2.shape: (3, 5)
cost: 0.027465308103770517


In [50]:
cache

{'Z1': array([[0.12133167],
        [0.09968179],
        [0.11799161],
        [0.07025704],
        [0.08024185]]),
 'A1': array([[0.12073977],
        [0.09935294],
        [0.11744708],
        [0.07014168],
        [0.08007007]]),
 'Z2': array([[0.00238372],
        [0.00240972],
        [0.00296136]]),
 'yhat': array([[0.33326626],
        [0.33327492],
        [0.33345882]])}

In [51]:
grads = back_propagation(X1,y1,params,cache)
grads

{'dw1': array([[-0.00044921, -0.00020182, -0.00035155, -0.00013671],
        [-0.00042146, -0.00018935, -0.00032984, -0.00012827],
        [-0.00094766, -0.00042576, -0.00074165, -0.00028842],
        [-0.0004331 , -0.00019458, -0.00033895, -0.00013181],
        [-0.0007017 , -0.00031526, -0.00054916, -0.00021356]]),
 'db1': array([[-6.51022419e-05],
        [-6.10808629e-05],
        [-1.37342134e-04],
        [-6.27680776e-05],
        [-1.01695806e-04]]),
 'dw2': array([[-0.00067084, -0.00055202, -0.00065255, -0.00038972, -0.00044488],
        [-0.00067084, -0.00055201, -0.00065254, -0.00038971, -0.00044487],
        [-0.00067065, -0.00055186, -0.00065236, -0.0003896 , -0.00044475]]),
 'db2': array([[-0.00555611],
        [-0.00555604],
        [-0.00555451]])}

In [52]:
updated_params = update_params(params,grads,lr=1)
updated_params

W1 [[9.98257727e-03 3.42584383e-03 4.83802431e-03 7.47925822e-03]
 [9.63671758e-03 6.28405481e-03 3.87375103e-04 5.53144982e-03]
 [6.96364573e-03 6.18422778e-04 9.02290060e-03 9.19127449e-03]
 [1.82596343e-04 1.51478169e-03 8.51045369e-03 8.73564611e-03]
 [7.11631477e-03 3.26854433e-03 7.27981316e-05 9.81603629e-03]]


{'w1': array([[0.01043178, 0.00362766, 0.00518958, 0.00761597],
        [0.01005818, 0.00647341, 0.00071721, 0.00565972],
        [0.00791131, 0.00104418, 0.00976455, 0.00947969],
        [0.0006157 , 0.00170936, 0.0088494 , 0.00886746],
        [0.00781802, 0.0035838 , 0.00062196, 0.0100296 ]]),
 'b1': array([[6.51022419e-05],
        [6.10808629e-05],
        [1.37342134e-04],
        [6.27680776e-05],
        [1.01695806e-04]]),
 'w2': array([[0.00732   , 0.00152659, 0.00616088, 0.00371308, 0.00798864],
        [0.00074704, 0.00574161, 0.01027591, 0.00540929, 0.00547292],
        [0.00583736, 0.00549222, 0.01058849, 0.00340056, 0.00629621]]),
 'b2': array([[0.00555611],
        [0.00555604],
        [0.00555451]])}

In [53]:
params["w1"] == updated_params["w1"]

array([[False, False, False, False],
       [False, False, False, False],
       [False, False, False, False],
       [False, False, False, False],
       [False, False, False, False]])

In [54]:
params["w1"]

array([[9.98257727e-03, 3.42584383e-03, 4.83802431e-03, 7.47925822e-03],
       [9.63671758e-03, 6.28405481e-03, 3.87375103e-04, 5.53144982e-03],
       [6.96364573e-03, 6.18422778e-04, 9.02290060e-03, 9.19127449e-03],
       [1.82596343e-04, 1.51478169e-03, 8.51045369e-03, 8.73564611e-03],
       [7.11631477e-03, 3.26854433e-03, 7.27981316e-05, 9.81603629e-03]])

In [55]:
updated_params["w1"]

array([[0.01043178, 0.00362766, 0.00518958, 0.00761597],
       [0.01005818, 0.00647341, 0.00071721, 0.00565972],
       [0.00791131, 0.00104418, 0.00976455, 0.00947969],
       [0.0006157 , 0.00170936, 0.0088494 , 0.00886746],
       [0.00781802, 0.0035838 , 0.00062196, 0.0100296 ]])

In [60]:
# ALT TARAF SILINEBILIR .. 

In [58]:
def fit(X,y,sample_size,epochs=1000):
    layer_dims = [4,5,3]
    lr = 0.001
    print("Training is began..")
    params = init_params(layer_dims) 
    for epoch in range(epochs):
        for i in range(sample_size):
            X_,y_ = X[:,i],y[:,i]
            loss,cache = forward_propagation(X_,y_,params)
            grads = back_propagation(X_,y_,params,cache)
            params = update_params(params,grads,lr=1)
            self.loss.append(loss)
        cost = np.sum(self.loss) / sample_size

    if epoch % 10 == 0:
        print("Epoch: {} \t Cost: {}".format(epoch,cost))

In [59]:
fit(Xtrain,ytrain,120,1000)

Training is began..
A1.shape: (5, 1) W2.shape: (3, 5)


ValueError: shapes (5,1) and (4,) not aligned: 1 (dim 1) != 4 (dim 0)