In [1]:
import numpy as np

In [2]:
def sigmoid(Z):
    
    A = 1/(1+np.exp(-Z))
    cache = Z
    
    return A, cache

def sigmoid_backward(dA, cache):
    
    Z = cache
    
    s = 1/(1+np.exp(-Z))
    dZ = dA * s * (1-s)
    
    assert (dZ.shape == Z.shape)
    
    return dZ



In [3]:
X = np.array([[4.8,5.7,5.1,5.7,4.6],
            [3.0,3.0,3.8,2.9,3.2],
            [1.4,4.2,1.6,4.2,1.4]])
Y = np.array([0,1,0,1,0])
def initialize_parameters(n_x, n_h1, n_h2, n_y):
    np.random.seed(1)
    
    
    W1 = np.array([[0.1,-0.5,0.3],[-0.2,0.6,-0.4]]) # (2,3)
    b1 = np.array([[0],[0]]) # (2,0)
    W2 = np.array([[-1,1],[1,1],[1,-1]]) #(3,2)
    b2 = np.array([[0],[0],[0]]) #(3,1)
    W3 = np.array([[-1,-1,1]]) #(1,3)
    b3 = np.array([[0]]) #(1,1)
    
    assert(W1.shape == (n_h1, n_x))
    assert(b1.shape == (n_h1, 1))
    assert(W2.shape == (n_h2, n_h1))
    assert(b2.shape == (n_h2, 1))
    assert(W3.shape == (n_y, n_h2))
    assert(b3.shape == (n_y, 1))
    
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2,
                  "W3": W3,
                  "b3": b3}
    
    return parameters    

In [4]:
parameters = initialize_parameters(3,2,3,1)
print("W1 = \n" + str(parameters["W1"]))
print("b1 = \n" + str(parameters["b1"]))
print("W2 = \n" + str(parameters["W2"]))
print("b2 = \n" + str(parameters["b2"]))
print("W3 = \n" + str(parameters["W3"]))
print("b3 = \n" + str(parameters["b3"]))

W1 = 
[[ 0.1 -0.5  0.3]
 [-0.2  0.6 -0.4]]
b1 = 
[[0]
 [0]]
W2 = 
[[-1  1]
 [ 1  1]
 [ 1 -1]]
b2 = 
[[0]
 [0]
 [0]]
W3 = 
[[-1 -1  1]]
b3 = 
[[0]]


In [5]:
def linear_forward(A, W, b):
    Z = np.dot(W,A)+b
    assert(Z.shape == (W.shape[0], A.shape[1]))
    cache = (A, W, b)
    
    return Z, cache

In [15]:
def linear_activation_forward(A_prev, W, b, activation):
    if activation == "sigmoid":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = sigmoid(Z)
        print(Z)
    assert (A.shape == (W.shape[0], A_prev.shape[1]))
    cache = (linear_cache, activation_cache)

    return A, cache

In [16]:
def L_model_forward(X, parameters):
    caches = []
    A = X
    L = len(parameters) // 2
    for l in range(1, L):
        A_prev = A
        print("Z"+str(l))
        A, cache = linear_activation_forward(A_prev, 
                                             parameters['W' + str(l)], 
                                             parameters['b' + str(l)], 
                                             activation='sigmoid')
        caches.append(cache)
        print("A"+str(l))
        print(A)
    AL, cache = linear_activation_forward(A, 
                                          parameters['W' + str(L)], 
                                          parameters['b' + str(L)], 
                                          activation='sigmoid')
    caches.append(cache)
    print("A"+str(L))
    print(AL)
    assert(AL.shape == (1,X.shape[1]))
            
    return AL, caches

In [17]:
AL, caches = L_model_forward(X, parameters)

Z1
[[-0.6   0.33 -0.91  0.38 -0.72]
 [ 0.28 -1.02  0.62 -1.08  0.44]]
A1
[[0.35434369 0.58175938 0.28699984 0.5938731  0.32739298]
 [0.56954622 0.2650274  0.65021855 0.25350602 0.60825903]]
Z2
[[ 0.21520253 -0.31673198  0.36321871 -0.34036709  0.28086605]
 [ 0.92388992  0.84678678  0.93721839  0.84737912  0.93565201]
 [-0.21520253  0.31673198 -0.36321871  0.34036709 -0.28086605]]
A2
[[0.55359395 0.4214724  0.58981937 0.41572031 0.56975853]
 [0.71583404 0.69989266 0.71853744 0.70001706 0.71822055]
 [0.44640605 0.5785276  0.41018063 0.58427969 0.43024147]]
[[-0.82302195 -0.54283745 -0.89817618 -0.53145768 -0.85773762]]
A3
[[0.30512256 0.36752777 0.28942544 0.37017697 0.29781224]]


In [18]:
def compute_cost(AL, Y):
    m = Y.shape[1]
    cost = (-1/m)* np.sum(np.multiply(Y, np.log(AL)) + np.multiply(1-Y, np.log(1-AL)))
    cost = np.squeeze(cost)
    assert(cost.shape == ())
    
    return cost

### Backpropogation

In [35]:
def linear_backward(dZ, cache):
    A_prev, W, b = cache
    m = A_prev.shape[1]

    dW = (1/m)*np.dot(dZ, A_prev.T)
    db = (1/m)*np.sum(dZ, axis=1, keepdims=True)
    dA_prev = np.dot(W.T,dZ)
    
    assert (dA_prev.shape == A_prev.shape)
    assert (dW.shape == W.shape)
    assert (db.shape == b.shape)
    
    return dA_prev, dW, db

In [36]:
def linear_activation_backward(dA, cache, activation):
    linear_cache, activation_cache = cache
    if activation == "sigmoid":
        dZ = sigmoid_backward(dA, cache[1])
        print('dZ \n')
        print(dZ)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
        print('dW \n')
        print(dW)
        print('db \n')
        print(db)
    return dA_prev, dW, db

In [37]:
def L_model_backward(AL, Y, caches):
    grads = {}
    
    L = len(caches) # the number of layers
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)
    
    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    current_cache = caches[L-1]
    grads["dA" + str(L-1)], grads["dW" + str(L)], grads["db" + str(L)] = linear_activation_backward(dAL, current_cache, activation = "sigmoid")
    
    for l in reversed(range(L-1)):
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA" + str(l + 1)], current_cache, activation = "sigmoid")
        grads["dA" + str(l)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp
    return grads

In [38]:
grads = L_model_backward(AL, Y, caches)


# def print_grads(grads):
#     print ("dW1 = "+ str(grads["dW1"]))
#     print ("db1 = "+ str(grads["db1"]))
#     print ("dA1 = "+ str(grads["dA1"]))
    
# print_grads(grads)

dZ 

[[ 0.30512256 -0.63247223  0.28942544 -0.62982303  0.29781224]]
dW 

[[-0.0038192  -0.04865491 -0.0701678 ]]
db 

[[-0.07398701]]
dZ 

[[-0.07540423  0.15421787 -0.07002141  0.15298208 -0.07300383]
 [-0.06206671  0.13284632 -0.0585338   0.13225854 -0.06027118]
 [ 0.07540423 -0.15421787  0.07002141 -0.15298208  0.07300383]]
dW 

[[ 0.02197071 -0.01064536]
 [ 0.01946098 -0.00826677]
 [-0.02197071  0.01064536]]
db 

[[ 0.01775409]
 [ 0.01684663]
 [-0.01775409]]
dZ 

[[ 0.02030272 -0.04272361  0.01667927 -0.04189569  0.01887976]
 [-0.05218919  0.08595639 -0.04516315  0.08292958 -0.04915212]]
dW 

[[-0.04259316 -0.01299275 -0.05477175]
 [ 0.05114202  0.00257813  0.09903644]]
db 

[[-0.00575151]
 [ 0.0044763 ]]


### Update Parameters

In [39]:
def update_parameters(parameters, grads, learning_rate):
    L = len(parameters) // 2
    for l in range(L):
        parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate * grads["dW" + str(l + 1)]
        parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate * grads["db" + str(l + 1)]
    return parameters

In [40]:
parameters = update_parameters(parameters, grads, 1)

In [41]:
parameters

{'W1': array([[ 0.14259316, -0.48700725,  0.35477175],
        [-0.25114202,  0.59742187, -0.49903644]]), 'b1': array([[ 0.00575151],
        [-0.0044763 ]]), 'W2': array([[-1.02197071,  1.01064536],
        [ 0.98053902,  1.00826677],
        [ 1.02197071, -1.01064536]]), 'b2': array([[-0.01775409],
        [-0.01684663],
        [ 0.01775409]]), 'W3': array([[-0.9961808 , -0.95134509,  1.0701678 ]]), 'b3': array([[0.07398701]])}