In [1]:
import numpy as np
import pandas as pd 
import time
import os 
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from pprint import pprint

# Load Dataset 

In [2]:
iris_ds = load_iris()

In [3]:
X,y = iris_ds.data,iris_ds.target
print("X.shape:",X.shape,"y.shape:",y.shape)

X.shape: (150, 4) y.shape: (150,)


In [4]:
Xtrain,Xtest,ytrain,ytest = train_test_split(X,y,test_size=.2,random_state=3)

In [5]:
Xtrain[0,:]

array([5.8, 4. , 1.2, 0.2])

In [6]:
Xtrain,Xtest,ytrain,ytest = Xtrain.T,Xtest.T,ytrain.T,ytest.T

In [7]:
Xtrain[:,0]

array([5.8, 4. , 1.2, 0.2])

In [8]:
print("Xtrain.shape:",Xtrain.shape,"ytrain.shape:",ytrain.shape)

Xtrain.shape: (4, 120) ytrain.shape: (120,)


In [9]:
idx_to_target = {}
for idx,target in enumerate(iris_ds.target_names):
    idx_to_target[idx] = target

In [10]:
idx_to_target

{0: 'setosa', 1: 'versicolor', 2: 'virginica'}

In [11]:
outputs = np.array(list(idx_to_target.keys()))
outputs = outputs.reshape(-1,1)
print("Outputs: ",outputs,"\nShape:",outputs.shape)

Outputs:  [[0]
 [1]
 [2]] 
Shape: (3, 1)


In [13]:
m  = Xtrain.shape[1]
print("Total sample count:",m)

Total sample count: 120


> <b>Observation:</b> There are 120 train samples which have 4 features. And also there are 120 label for train. Label is one of them "setosa,versicolor,virginica" 

# Assignment PDF 

In [77]:
class NeuralNet:
    def __init__(self,layers=[4,5,3],learning_rate=0.001,iterations=100):
        '''
        layers[0] = input layer size
        layers[1] = hidden layer size
        layers[2] = output layer size
        
        '''
        self.params = {}
        self.learning_rate = learning_rate
        self.iterations = iterations # epoch 
        self.loss = []
        self.sample_size = None
        self.layers = layers
        self.X = None
        self.y = None
        
        
    def init_weights(self):
        self.w1 = np.random.rand(self.layers[1],self.layers[0])
        self.b1 = np.random.rand(self.layers[1],1)
        self.w2 = np.random.rand(self.layers[2],self.layers[1])
        self.b2 = np.random.rand(self.layers[2],1)
        self.params = {"w1":self.w1,"b1":self.b1,"w2":self.w2,"b2":self.b2}
    
    def relu(self,Z):
        if Z > 0 :
            return Z
        else:
            return 0 
    
    def softmax(self,Z):
        score = np.exp(Z) / np.sum(np.exp(Z))  
        return score 
    
    def cross_entropy(self,y,yhat):
        ce = np.multiply(np.log(yhat),y) + np.multiply(np.log(1-yhat),1-y)
        return -ce 
    
    def calc_cost(losses):
        return np.mean(losses)
    
    def forward_propagation(self):
        W1 = self.params["w1"]
        b1 = self.params["b1"]
        W2 = self.params["w2"]
        b2 = self.params["b2"]
        
        z1 = np.dot(W1,self.X)+ b1
        a1 = relu(z1)
        z2 = np.dot(W2,a1) + b2
        yhat = softmax(z2)
        loss = cross_entropy(self.y,yhat)
        
        self.cache = {"z1":z1,"a1":a1,"z2":z2,"yhat":yhat}
        return yhat,loss
    
    def back_propagation(self,yhat):
        '''
        compute derivative and update weights&biases
        '''
        W1 = self.params["w1"]
        b1 = self.params["b1"]
        W2 = self.params["w2"]
        b2 = self.params["b2"]
        
        A1 = self.cache["a1"]
        # backward
        dz2 = yhat - self.y 
        dw2 = np.dot(dz2,A1.T) / m
        db2 = np.sum(dz2,axis=1,keepdims=True) / m
        dz1 = np.multiply(np.dot(W2.T,dz2), (1-np.power(A1,2)))
        dw1 = np.dot(dz1,self.X.T) / m 
        db1 = np.sum(dz1,axis=1,keepdims=True) / m
    
        w1 = W1 - self.learning_rate * dw1
        b1 = b1 - self.learning_rate * db1
        w2 = W2 - self.learning_rate * dw2
        b2 = b2 - self.learning_rate * db2
        
        self.params = {"w1":self.w1,"b1":self.b1,"w2":self.w2,"b2":self.b2}
        
    def fit(X,y):
        self.X = X
        self.y = y
        self.init_weights()
        for i in range(self.iterations):
            
            yhat,loss = self.forward_propagation()
            self.back_propagation()
            
    def predict(self,X):
        '''
        Makes prediction using model on test data
        '''
        yhat,_ = self.forward_propagation()
        return yhat
    def acc(self,y,yhat):
        acc = int( sum(y==yhat) / len(y) * 100)
        return acc 
    def plot_loss(self):
        plt.plot(self.loss)
        plt.xlabel("Iteration")
        plt.ylable("logloss")
        plt.title("Loss cure for training")
        plt.show()
        
        

In [83]:
nn = NeuralNet()

In [84]:
nn.params

{}

In [85]:
nn.init_weights()

In [86]:
nn.params

{'w1': array([[0.30317429, 0.22848073, 0.295245  , 0.8896095 ],
        [0.72214467, 0.34268371, 0.77044781, 0.75890291],
        [0.93913669, 0.04365552, 0.85200454, 0.82171998],
        [0.82655218, 0.48307576, 0.85120366, 0.93437701],
        [0.23762908, 0.16177229, 0.25060671, 0.34585422]]),
 'b1': array([[0.62548028],
        [0.99768911],
        [0.74033982],
        [0.9751418 ],
        [0.36089635]]),
 'w2': array([[0.98515662, 0.0640912 , 0.80152839, 0.67591329, 0.92865665],
        [0.19616872, 0.34556914, 0.62539626, 0.95119408, 0.6343442 ],
        [0.10485432, 0.81757139, 0.40204173, 0.52056518, 0.61021176]]),
 'b2': array([[0.2729081 ],
        [0.15787407],
        [0.0341948 ]])}

In [87]:
yhat,loss= nn.forward_propagation()

TypeError: unsupported operand type(s) for *: 'float' and 'NoneType'

# General Idea 

General Idea:
* Define Layers
* Inialize layers weights
* For Loop (Epoch/iteration)
   * Forwad prop
   * Compute loss
   * Backward 
   * Update


In [50]:
class NeuralNetwork:
    def __init__(self,inputs,outputs,hidden_units=10,activation_func='tanh',output_activation_func='sigmoid'): 
        input_init = inputs.shape[1]
        output_init = outputs.shape[0]
        self.w1 = np.random.rand(input_init,hidden_units)
        self.b1 = np.zeros((hidden_units,1))
        self.w2 = np.random.rand(hidden_units,output_init)
        self.b2 = np.zeros((output_init,1))
        self.params = {"w1":self.w1,"b1":self.b1,"w2":self.w2,"b2":self.b2}
        self.activation_function = activation_func
        self.output_activation_function = output_activation_func
    
    def lineer_func(w,x,b):
        '''
        Args: x = input
              w = weight
              b = bias
        '''
        return np.dot(w,x)+b

    def sigmoid(z):
        '''
        z = lineer result of a neuron 
        '''
        sigmoid = 1 / (1 + np.exp(-z))
        return sigmoid

    def relu(z):
        '''
        z = lineer result of a neuron
        '''
        return max(0,z)

    def non_lineer_func(lineer_result,func_type='sigmoid'):
        '''
        Args: lineer_result = it's Z; Result of lineer part of a neuron
              func_type = help to decide which non-lineer func use to calculate output of neuron
        '''
        if func_type == 'sigmoid':
            return sigmoid(lineer_result)
        elif func_type == 'relu':
            return relu(lineer_result)
        elif func_type =='tanh':
            return np.tanh(lineer_result)
   
    def forward_prop(self,X,params,verbose=False):
        w1 = self.params["w1"]
        b1 = self.params["b1"]
        w2 = self.params["w2"]
        b2 = self.params["b2"]

        z1 = lineer_func(w1,X,b1) # lineer func
        a1 = non_lineer_func(z1,self.activation_function ) # non lineer func
        z2 = lineer_func(w2,a1,b2)
        a2 = non_lineer_func(z2,self.output_activation_function)

        self.cache = {"z1":z1,"a1":a1,"z2":z2,"a2":a2}
        return a2,self.cache
    

In [51]:
nn = NeuralNetwork(Xtrain,ytrain)

In [None]:
nn.params

# 1. Define Layers

In [21]:
def defineLayers(x,y):
    '''
    Function is find how many units in layers. So that i can define layer sizes.
    Args:
        X: inputs 
        y: targets ... for our task = [0,1,2]
        
    Returns: 
        n_x : input units count 
        n_h : hidden units counts
        n_y : output units count
    '''
    n_x = x.shape[0] # input count 
    n_h = 5 # I defined it. So that my nn [inputs-> 3 hiddenunits -> outputs]
    n_y = y.shape[0] # it's label count
    return (n_x,n_h,n_y)

In [22]:
n_x = defineLayers(Xtrain,ytrain)[0]
n_x

4

In [23]:
(n_x,n_h,n_y) = defineLayers(Xtrain,outputs)

In [24]:
print("Size of input layer: ",n_x)
print("Size of hidden layer: ",n_h)
print("Size of output layer: ",n_y)

Size of input layer:  4
Size of hidden layer:  5
Size of output layer:  3


# 2. Initialize Layers

In [25]:
def initializeLayers(n_x,n_h,n_y,verbose=False):
    '''
    Args:
        n_x : size of inputs layer
        n_h : size of hidden layer
        n_y : size of output layer
    
    Returns : params which is include :
              w1 and b1 are weight and bias values of 1st layer
              w2 and b1 are weight and bias values of 2nd layer
    '''
    
    w1 = np.random.rand(n_h,n_x) # check what will be when transpose  
    b1 = np.zeros((n_h,1))
    w2 = np.random.rand(n_y,n_h)
    b2 = np.zeros((n_y,1))
    
    params = {"w1":w1,"b1":b1,"w2":w2,"b2":b2}
    
    if verbose:
        print("Parameters:\n")
        pprint(params)
    return params

In [26]:
params = initializeLayers(n_x,n_h,n_y,verbose=False)

In [27]:
pprint(params)

{'b1': array([[0.],
       [0.],
       [0.],
       [0.],
       [0.]]),
 'b2': array([[0.],
       [0.],
       [0.]]),
 'w1': array([[0.92394921, 0.44808843, 0.35851763, 0.2561268 ],
       [0.68898875, 0.3764789 , 0.49954545, 0.28850572],
       [0.8653749 , 0.06730282, 0.72552444, 0.82108784],
       [0.43731079, 0.81368443, 0.32922002, 0.32729991],
       [0.282708  , 0.92883171, 0.81874574, 0.26451474]]),
 'w2': array([[0.6780047 , 0.13443903, 0.75105624, 0.51425849, 0.74907619],
       [0.45027238, 0.04491036, 0.1920904 , 0.99169396, 0.13082726],
       [0.4216712 , 0.60789197, 0.52126299, 0.15094137, 0.24463749]])}


# 3. For Loop(Iteration/Epoch)

## 3.1.helper functions 

In [28]:
def lineer_func(w,x,b):
    '''
    Args: x = input
          w = weight
          b = bias
    '''
    return np.dot(w,x)+b

def sigmoid(z):
    '''
    z = lineer result of a neuron 
    '''
    sigmoid = 1 / (1 + np.exp(-z))
    return sigmoid

def relu(z):
    '''
    z = lineer result of a neuron
    '''
    return max(0,z)

def non_lineer_func(lineer_result,func_type='sigmoid'):
    '''
    Args: lineer_result = it's Z; Result of lineer part of a neuron
          func_type = help to decide which non-lineer func use to calculate output of neuron
    '''
    if func_type == 'sigmoid':
        return sigmoid(lineer_result)
    elif func_type == 'relu':
        return relu(lineer_result)
    elif func_type =='tanh':
        return np.tanh(lineer_result)

## 3.2. Forward Propagation

In [29]:
def forward_prop(X,params,verbose=False):
    '''
        Args: X: inputs
              params: weight and bias values
        Return last_preds and cache(all calculations)
                a2: last prediction
                cache: a dict which contains all calculations
    '''
    w1 = params["w1"]
    b1 = params["b1"]
    w2 = params["w2"]
    b2 = params["b2"]
    
    z1 = lineer_func(w1,X,b1) # lineer func
    a1 = non_lineer_func(z1,'tanh') # non lineer func
    z2 = lineer_func(w2,a1,b2)
    a2 = non_lineer_func(z2,'sigmoid')

    cache = {"z1":z1,"a1":a1,"z2":z2,"a2":a2}
        
    if verbose:
        print("X1:",X)
        print("X1:",X.shape,"\tW1:",w1.shape)
        print("Z1:",z1.shape,"\ta1:",a1.shape)
        print("X2:",a1.shape,"\tW2:",w2.shape)
        print("Z2:",z2.shape,"\ta2:",a2.shape)
        print("\n*** Last Prediction ***\n")
        pprint(cache["a2"])
        print("\n*** CACHE ***\n ")
        pprint(cache)
    return a2,cache

In [30]:
preds,cache = forward_prop(Xtrain,params,verbose=False)

In [None]:
pprint(cache)

In [None]:
pprint(preds)

## 3.3. Compute Loss & Cost 

In [34]:
def calculateLoss(y,yhat,verbose=False):
    '''
    Args : y = real value
           yhat = prediction
    '''
    loss = np.multiply(np.log(yhat),y) + np.multiply(np.log(1-yhat),1-y)
    if verbose:
        print("Loss:",-loss)
    return -loss

In [35]:
def calculateCost(losses,verbose=False):
    '''
    losses = whole loss values
    '''
    cost = np.squeeze(np.mean(losses))
    if verbose:
        print("Cost:",cost)
    return cost

In [36]:
losses = calculateLoss(ytrain,preds,verbose=False)

In [37]:
cost = calculateCost(losses,verbose=False)

In [None]:
print("losses:\n")
pprint(losses)
print("\ncost:",cost)

## 3.4. Backward Propagation

In [44]:
def backward_prop(x,y,params,cache,verbose=False):
    m = y.shape[0]
    w1 = params["w1"]
    w2 = params["w2"]
    a1 = cache["a1"]
    a2 = cache["a2"]
    
    # backward
    dz2 = a2 - y 
    #print("dz2:",dz2)
    
    dw2 = np.dot(dz2,a1.T) / m
    #print("dw2:",dw2)
    
    db2 = np.sum(dz2,axis=1,keepdims=True) / m
    #print("db2:",db2)
    
    dz1 = np.multiply(np.dot(w2.T,dz2), (1-np.power(a1,2)))
    #print("dz1:",dz1)
    
    dw1 = np.dot(dz1,X.T) / m 
    #print("dw1:",dw1)
    
    db1 = np.sum(dz1,axis=1,keepdims=True) / m
    #print("db1:",db1)
    if verbose:
        print("dz2:",dz2)
        print("dw2:",dw2)
        print("db2:",db2)
        print("dz1:",dz1)
        print("dw1:",dw1)
        print("db1:",db1)
    grads = {"dw1":dw1,"db1":db1,"dw2":dw2,"db2":db2}
    
    return grads

In [45]:
grads = backward_prop(Xtrain,ytrain,params,cache,verbose=False)

ValueError: shapes (5,120) and (4,150) not aligned: 120 (dim 1) != 4 (dim 0)

In [None]:
print("Gradients")
pprint(grads)

## 3.5. Update Weights and Biases

In [None]:
def update_parameters(params, grads, lr = 1.2,verbose=False):
    """
    Updates parameters using the gradient descent update rule given above
    
    Arguments:
    parameters -- python dictionary containing your parameters 
    grads -- python dictionary containing your gradients 
    
    Returns:
    parameters -- python dictionary containing your updated parameters 
    """
    # Retrieve each parameter from the dictionary "parameters"
    ### START CODE HERE ### (≈ 4 lines of code)
    w1 = params["w1"]
    b1 = params["b1"]
    w2 = params["w2"]
    b2 = params["b2"]
    ### END CODE HERE ###
    
    # Retrieve each gradient from the dictionary "grads"
    ### START CODE HERE ### (≈ 4 lines of code)
    dw1 = grads["dw1"]
    db1 = grads["dw1"]
    dw2 = grads["dw2"]
    db2 = grads["db2"]
    ## END CODE HERE ###
    
    # Update rule for each parameter
    ### START CODE HERE ### (≈ 4 lines of code)
    w1 = w1 - lr * dw1
    b1 = b1 - lr * db1
    w2 = w2 - lr * dw2
    b2 = b2 - lr * db2
    ### END CODE HERE ###
    
    params = {"w1": w1,
                  "b1": b1,
                  "w2": w2,
                  "b2": b2}
    if verbose:
        print("Updated Parameters:")
        pprint(params)
    return params

In [None]:
print("Parameters")
pprint(params)
updated_params = update_parameters(params,grads,lr=2,verbose=False)
print("\n Updated Parameters\n")
pprint(updated_params)

# 4. Neural Network Model 

In [None]:
Xtrain[0].reshape(-1,1).shape[0]

In [None]:
def NeuralNetwork(X, Y, n_h, num_iterations = 4, print_cost=False):
    """
    Arguments:
    X -- dataset of shape (2, number of examples)
    Y -- labels of shape (1, number of examples)
    n_h -- size of the hidden layer
    num_iterations -- Number of iterations in gradient descent loop
    print_cost -- if True, print the cost every 1000 iterations
    
    Returns:
    parameters -- parameters learnt by the model. They can then be used to predict.
    """
    
    np.random.seed(3)
    sample_x = X[0]
    sample_y = y[0]
    n_x,_,n_y = defineLayers(sample_x, sample_y)
    params = initializeLayers(n_x, n_h, n_y)
    
    # Loop (gradient descent)

    for i in range(0, num_iterations):
         
        ### START CODE HERE ### (≈ 4 lines of code)
        # Forward propagation. Inputs: "X, parameters". Outputs: "A2, cache".
        A2, cache = forward_prop(X, params)
        
        # Cost function. Inputs: "A2, Y, parameters". Outputs: "cost".
        cost = calculateCost(A2, Y, params)
 
        # Backpropagation. Inputs: "parameters, cache, X, Y". Outputs: "grads".
        grads = backward_prop(params, cache, X, Y)
 
        # Gradient descent parameter update. Inputs: "parameters, grads". Outputs: "parameters".
        params = update_parameters(params, grads)
        
        ### END CODE HERE ###
        
        # Print the cost every 1000 iterations
        print ("Epoch {i+1} \t Cost: {}" %(i, cost))

    return params

In [None]:
params = NeuralNetwork(Xtrain,ytrain,4)

In [None]:
def predict(params, X):
    """
    Using the learned parameters, predicts a class for each example in X
    
    Arguments:
    parameters -- python dictionary containing your parameters 
    X -- input data of size (n_x, m)
    
    Returns
    predictions -- vector of predictions of our model (red: 0 / blue: 1)
    """
    pred,_ = forward_prop(X, params)
    prediction = np.argmax(pred) # Find the index of max score 

    return prediction

In [None]:
def classifyLabel(pred,label_dict):
    '''
    Args : pred is prediction index generated by predict function
           label_dict is a dict which contains index-label pairs 
    '''
    return label_dict[pred]