# 1. Import Necessary Libraries

In [1]:
import numpy as np
import pandas as pd 
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# 2. Load Dataset

In [2]:
iris_ds = load_iris()

In [3]:
X,y = iris_ds.data,iris_ds.target
print("X.shape:",X.shape,"y.shape:",y.shape)

X.shape: (150, 4) y.shape: (150,)


In [4]:
Xtrain,Xtest,ytrain,ytest = train_test_split(X,y,test_size=.2,random_state=3)

In [5]:
print("Xtrain.shape:",Xtrain.shape,"ytrain.shape:",ytrain.shape)
print(f"\nXtrain[:5]:\n{Xtrain[:5]}\n\nytrain[:5]:\n{ytrain[:5]}")

Xtrain.shape: (120, 4) ytrain.shape: (120,)

Xtrain[:5]:
[[5.8 4.  1.2 0.2]
 [4.8 3.4 1.9 0.2]
 [6.9 3.1 5.4 2.1]
 [6.6 2.9 4.6 1.3]
 [4.4 2.9 1.4 0.2]]

ytrain[:5]:
[0 0 2 1 0]


In [6]:
Xtrain,Xtest,ytrain,ytest = Xtrain.T,Xtest.T,ytrain.reshape(-1,1).T,ytest.reshape(-1,1).T

In [7]:
print("Xtrain.shape:",Xtrain.shape,"ytrain.shape:",ytrain.shape)
print(f"\nXtrain[:5]:\n{Xtrain[:,:5]}\n\nytrain[:5]:\n{ytrain[:,:5]}")

Xtrain.shape: (4, 120) ytrain.shape: (1, 120)

Xtrain[:5]:
[[5.8 4.8 6.9 6.6 4.4]
 [4.  3.4 3.1 2.9 2.9]
 [1.2 1.9 5.4 4.6 1.4]
 [0.2 0.2 2.1 1.3 0.2]]

ytrain[:5]:
[[0 0 2 1 0]]


In [8]:
idx_to_target = {}
for idx,target in enumerate(iris_ds.target_names):
    idx_to_target[idx] = target
print(idx_to_target)

{0: 'setosa', 1: 'versicolor', 2: 'virginica'}


# 3. Create Layer Structure

In [9]:
n_h = 5  # hidden_layer_units
n_x = len(Xtrain[:,0]) # input_layer_units 
n_y = len(idx_to_target) # output_layer_units 

print("N_X:",n_x)
print("N_h:",n_h)
print("N_y:",n_y)

N_X: 4
N_h: 5
N_y: 3


# 4. Initalize Weights and Biases

In [10]:
W1 = np.random.rand(n_x,n_h) # input-hidden layer arasındaki agırlıklar w[0] -> Mesela, ilk noronun agırlıkları 
W2 = np.random.rand(n_h,n_y) # hidden_layer ile output_layer arasındaki agirliklar
b1 = np.random.rand(n_h,1)
b2 = np.random.rand(n_y,1)

In [11]:
print(f"W1:\n{W1}\t W1.shape:{W1.shape}")
print(f"b1:\n{b1}\t b1.shape:{W1.shape}")
print(f"W2:\n{W2}\t W2.shape:{W2.shape}")
print(f"b2:\n{b2}\t b2.shape:{b2.shape}")


W1:
[[0.43660609 0.60542122 0.63645407 0.79411884 0.95800011]
 [0.81065925 0.79752391 0.94499652 0.76905032 0.52645312]
 [0.97103014 0.15069328 0.25463608 0.74808575 0.27653712]
 [0.96842085 0.041185   0.56537396 0.47554876 0.45363784]]	 W1.shape:(4, 5)
b1:
[[0.63734679]
 [0.91059013]
 [0.35794682]
 [0.94310387]
 [0.54065691]]	 b1.shape:(4, 5)
W2:
[[0.51190471 0.6132555  0.61365144]
 [0.35991317 0.73489262 0.51674482]
 [0.97418776 0.90291177 0.17056371]
 [0.22560971 0.95037523 0.94746135]
 [0.15798704 0.86102936 0.83883711]]	 W2.shape:(5, 3)
b2:
[[0.08515251]
 [0.20459528]
 [0.47954159]]	 b2.shape:(3, 1)


# 5. Forward Propagation

### 5.1. Lineer Calculation 

In [None]:
def lineer_fonk(X,W,b):
    if X.shape[0] == (4,1): # Check x shape is (4,1) or not 
        pass
    else:
        X = X.reshape(-1,1)
    
    assert X.shape == (X.shape[0],1)
    
    return np.dot(W.T,X)+b

In [None]:
sample_x = Xtrain[:,1]
sample_y = ytrain[1]
print("X:",sample_x,"\ty:",sample_y)
print("X.shape:",sample_x.shape,"\ty.shape:",sample_y.shape)

In [None]:
print("X.shape:",sample_x.shape,"\ty.shape:",sample_y.shape)
print("W.shape:",W1.shape,"\tb.shape:",b1.shape)

In [None]:
Z1 = lineer_fonk(sample_x,W1,b1)
print("Lineer Func.'s Result(Z1):\n",Z1,"\t Z1.shape:",Z1.shape)

### 5.2. Non-lineer Calculation with Activation Functions

In [None]:
# Activation Functions
def softmax(Z):
    score = np.exp(Z) / np.sum(np.exp(Z))  
    return score 

def tanh(Z):
    score = np.tanh(Z)
    return score 

def sigmoid(Z):
    score = 1 / (1 + np.exp(-Z))
    return score 

# Non-lineer part of a neuron 
def non_lineer(Z,f='sigmoid'):
    if f == 'sigmoid':
        return sigmoid(Z)
    elif f == 'softmax': 
        return softmax(Z)
    elif f== 'tanh':
        return tanh(Z)
    else:
        raise Exception('Wrong Activation Function in Non-lineer!')

# Derivatives of activation functions for Backprop-step 
def sigmoid_derivative(X):
    derivative  =  X * (1 - X)
    return derivative



In [None]:
A1 = non_lineer(Z1,f='tanh')
print("Before Activation Function(Z1):\n",Z1,"\t Shape:",Z1.shape)
print("\nAfter Activation Function(A1):\n",A1,"\t Shape:",A1.shape)

In [None]:
# Now A is our new input for next hidden layer. 
Z2 = lineer_fonk(A1,W2,b2)
print("Lineer Func.'s Result(Z2):\n",Z2,"\t Z2.shape:",Z2.shape)

In [None]:
A2 = non_lineer(Z2,f='softmax')
print("Before Activation Function(Z2):\n",Z2,"\t Shape:",Z2.shape)
print("\nAfter Activation Function(A2):\n",A2,"\t Shape:",A2.shape)

In [None]:
predictions = A2 # It's last output in our nn for 1 forward prop step.
print("Forward Propagation Last Result (PREDICTIONs):\n",predictions,"\t Shape: ",predictions.shape)

In [None]:
print("Prediction is ",np.argmax(predictions),".Its mean is : ",idx_to_target[np.argmax(predictions)])

# 6. Calculate Loss & Cost 

In [None]:
 def cross_entropy(self,y,yhat):
    ce = np.multiply(np.log(yhat),y) + np.multiply(np.log(1-yhat),1-y)
    return -ce 
    
def calc_cost(losses):
    return np.mean(losses)


In [None]:
print("Lets remember the sample_y and predictions...\n")
print("Sample_y:",sample_y,"Prediction:")

In [None]:
#loss = cross_entropy(sample_y)

In [None]:
'''
losses = [] # intialize loss array 
init_params() # initialize weights & biases 
for epoch in epochs: 
    yhat,_ = forward_propagation(X) # predict something in case of current weights & biases 
    loss = cross_entropy(y,yhat) # calculate loss 
    backward_propagation() # Calculate gradients 
    update_params() # Update parameters
    losses.append(loss) # add loss 
    
    if epoch % 10 == 0:
        cost = calc_cost(losses) # calculate cost 
        print("Epoch {} \t ---> \t Cost: ".format(epoch,cost)) # Show cost for every 10 epoch
   ''' 
    

# All-in

In [12]:
class NeuralNetwork:
    def __init__(self,layers=[4,5,3],lr=0.001,iterations=100):
        '''
        layers[0] = input layer size
        layers[1] = hidden layer size
        layers[2] = output layer size
        lr = learning rate
        iterations = epochs 
        params = a dict which will contain Weights and Biases 
        loss = a list which will contain losses
        X  = inputs/features
        y = outputs/targets 
        '''
        self.params = {}
        self.learning_rate = lr
        self.iterations = iterations # epoch 
        self.loss = []
        self.sample_size = None
        self.layers = layers
        self.X = None
        self.y = None


    def init_params(self):
        '''
        n_x : size of input layer
        n_h : size of hidden layer
        n_y : size of output layer
        w1  : Weights of input layer to hidden layer
        b1  : biases of input layer to hidden layer
        w2  : Weights of hidden layer to output layer
        b2  : biases of hidden layer to output layer 
        '''
        self.n_x,self.n_h,self.n_y = self.layers[0],self.layers[1],self.layers[2]
        self.w1 = np.random.rand(self.n_h,self.n_x)
        self.b1 = np.random.rand(self.n_h,1)
        self.w2 = np.random.rand(self.n_y,self.n_h)
        self.b2 = np.random.rand(self.n_y,1)
        
        assert self.w1.shape == (self.n_h,self.n_x)
        assert self.w2.shape == (self.n_y,self.n_h)
        assert self.b1.shape == (self.n_h,1)
        assert self.b2.shape == (self.n_y,1)
        self.params = {"w1":self.w1,"b1":self.b1,"w2":self.w2,"b2":self.b2}
    
    def lineer_func(X,W,b):
        if X.shape[0] == (4,1): # Check x shape is (4,1) or not 
            pass
        else:
            X = X.reshape(-1,1)
        assert X.shape == (n_x,1)
        return np.dot(W,X)+b
    
    # Activation Functions
    # Non-lineer part of a neuron 
    def non_lineer_func(self,Z,f='sigmoid'):
        '''
        Z : lineer calculation result 
        f : activation function 
        '''
        if f == 'sigmoid':
            score = 1 / (1 + np.exp(-Z))
            return score
        elif f == 'softmax': 
            score = np.exp(Z) / np.sum(np.exp(Z))  
            return score 
        elif f== 'tanh':
            score = np.tanh(Z)
            return score 
        else:
            raise Exception('Wrong Activation Function in Non-lineer!')
    
    def forward_propagation(self,X,y):
        W1 = self.params["w1"]
        b1 = self.params["b1"]
        W2 = self.params["w2"]
        b2 = self.params["b2"]
        
        assert len(y) == 1 
        assert W1.shape[1] ==  X.shape[0]
        
        Z1 = lineer_func(X,W1,b1)
        A1 = non_lineer_func(Z1,f='tanh')

        assert W2.T.shape[1] == A1.shape[0]

        Z2 = lineer_func(A1,W2,b2)
        yhat = non_lineer_func(Z2,f='softmax')

        loss = np.multiply(np.log(yhat),y) + np.multiply(np.log(1-yhat),1-y)

        self.cache = {"Z1":Z1,"A1":A1,"Z2":Z2,"yhat":yhat}

        return yhat,loss
    
    def tanh_derivative(A):
        '''
        Args: You need to give Ai to find derivate of Zi
        '''
        return (1- np.power(A,2))
    
    def sigmoid_derivative(A):
        '''
        Args: You need to give Ai to find derivate of Zi
        '''
        return np.multiply(A,(1-A))
    
    def back_propagation(self,X,yhat):
        '''
        compute derivative and update weights&biases
        '''
        assert len(y) == 1 
        assert yhat.shape == (self.n_y,1)
        m = self.sample_size
        W1 = self.params["w1"]
        W2 = self.params["w2"]
        
        A1 = self.cache["A1"]
        yhat = self.cache["yhat"]
        # backward
        dz2 =  - self.y 
        print("dZ2:\n",dz2,"\t Shape:",dz2.shape)
        dw2 = np.dot(dz2,A1.T) / m
        db2 = np.sum(dz2,axis=1,keepdims=True) / m

        dz1 = np.multiply(np.dot(W2.T,dz2), tanh_derivative(A1))

        dw1 = np.dot(dz1,self.X.T) / m 
        db1 = np.sum(dz1,axis=1,keepdims=True) / m
        
        self.grads = {"dw1":dw1,"db1":db1,"dw2":dw2,"db2":db2}

    def update_params(self):
        lr = self.learning_rate
        # Weight and biases 
        W1 = self.params["w1"]
        b1 = self.params["b1"]
        W2 = self.params["w2"]
        b2 = self.params["b2"]

        # Gradients
        dw1 = self.grads["dw1"]
        db1 = self.grads["db1"]
        dw2 = self.grads["dw2"]
        db2 = self.grads["db2"]

        # Update parameters according to given parameters and learning rate 
        w1 = W1 - lr * dw1
        b1 = b1 - lr * db1
        w2 = W2 - lr * dw2
        b2 = b2 - lr * db2

        self.params = {"w1":w1,"b1":b1,"w2":w2,"b2":b2}

    def fit(self,X,y):
        print("INFO: Training is began...")
        #trainable_count = len(self.params["w1"].reshape(-1)) + len(self.params["b1"].reshape(-1)) + len(self.params["w2"].reshape(-1)) + len(self.params["b2"].reshape(-1))
        #print(f"INFO: Layer Count:{len(self.layers)-1}\n Learning Rate:{self.learning_rate}\n Epochs:{self.iterations} \nTrainable Parameters:{trainable_count}")
        self.init_params()
        self.sample_size = X.shape[1]
        for epoch in range(self.iterations):
            for i in range(self.sample_size):
                self.X = X[:,i].reshape(-1,1)
                self.y = y[i].reshape(-1,1)
                yhat,loss = self.forward_propagation()
                self.back_propagation(yhat)
                self.update_params()

                self.losses.append(loss)
            cost = -1 * np.mean(self.losses)
            cost = float(np.squeeze(cost)) # be sure to 1 dimension
            assert isinstance(cost,float)
            
            losses = [] # clear losses list 
            if epoch % 10 == 0:
                print("Epoch {} \t ---> \t Cost: ".format(epoch+1,cost)) # Show cost for every 10 epoch

    def predict(self,X):
        '''
        Makes prediction using model on test data
        '''
        yhat,_ = self.forward_propagation()
        return yhat
    
    def acc(self,y,yhat):
        acc = int( sum(y==yhat) / len(y) * 100)
        return acc 

    def plot_loss(self):
        plt.plot(self.loss)
        plt.xlabel("Iteration")
        plt.ylable("logloss")
        plt.title("Loss cure for training")
        plt.show()



In [14]:
nn = NeuralNetwork()

In [15]:
print("Xtrain.shape:",Xtrain.shape,"\tytrain.shape:",ytrain.shape)

Xtrain.shape: (4, 120) 	ytrain.shape: (1, 120)


In [16]:
nn.params

{}

In [17]:
nn.init_params()

In [18]:
nn.params

{'w1': array([[0.83559413, 0.27079733, 0.83382243, 0.91787646],
        [0.79953727, 0.9113884 , 0.01688752, 0.1386197 ],
        [0.18531292, 0.59344484, 0.03649412, 0.92465073],
        [0.08828581, 0.19371289, 0.21318629, 0.67314935],
        [0.21113236, 0.8722589 , 0.44307872, 0.66029233]]),
 'b1': array([[0.16676669],
        [0.51089048],
        [0.01564739],
        [0.1772931 ],
        [0.70499102]]),
 'w2': array([[0.64639801, 0.42738247, 0.44336621, 0.29746506, 0.41309084],
        [0.11633294, 0.12445723, 0.51976989, 0.83939216, 0.72035919],
        [0.76636861, 0.13839453, 0.0664479 , 0.70260681, 0.85438491]]),
 'b2': array([[0.93587434],
        [0.50973299],
        [0.31619615]])}

In [19]:
X_,y_ = Xtrain[:,0].reshape(-1,1),ytrain[:,0].reshape(-1,1)
print(f"X_.shape:{X_.shape}\ty_.shape:{y_.shape}")

X_.shape:(4, 1)	y_.shape:(1, 1)


In [20]:
nn.forward_propagation(X_,y_)

NameError: name 'lineer_func' is not defined

In [None]:
nn.params["w1"].shape

In [None]:
nn.fit(Xtrain,ytrain)

In [None]:
nn.params["w1"].shape

In [None]:
nn.cache["Z2"].shape

In [None]:
nn.y.shape

In [None]:
nn.X.shape

In [None]:
# STRUCTURE OF ASSIGNMENT 
'''
losses = [] # intialize loss array 
init_params() # initialize weights & biases 
for epoch in epochs: 
    yhat,_ = forward_propagation(X) # predict something in case of current weights & biases 
    loss = cross_entropy(y,yhat) # calculate loss 
    backward_propagation() # Calculate gradients 
    update_params() # Update parameters
    losses.append(loss) # add loss 
    
    if epoch % 10 == 0:
        cost = calc_cost(losses) # calculate cost 
        print("Epoch {} \t ---> \t Cost: ".format(epoch,cost)) # Show cost for every 10 epoch
''' 
    