In [87]:
import numpy as np

In [88]:
def sigmoid(Z):
    A = 1 / (1+np.exp(-Z))
    cache = Z
    return A,cache


def relu(Z):
    A = np.maximum(0,Z)
    cache = Z
    return A,cache
    

In [89]:
def sigmoid_back(dA,cache):
    Z = cache
    s = 1 + (1+ np.exp(-Z))
    dZ = dA * s*(1-s)
    return dZ

def relu_back(dA,cache):
    Z = cache
    dZ = np.array(dA,copy = True)
    dZ[Z<=0] = 0
    return dZ

In [90]:
def init_params(layers_dims):
    parameters = {}
    L = len(layers_dims)
    for i in range(1,L):
        parameters["W"+str(i)] = np.random.randn(layers_dims[i],layers_dims[i-1])
        parameters["b"+str(i)] = np.zeros((layers_dims[i],1))

        
        assert(parameters['W' + str(i)].shape == (layers_dims[i], layers_dims[i-1]))
        assert(parameters['b' + str(i)].shape == (layers_dims[i], 1))
    return parameters


In [91]:
def linear_forward(A,W,b):
    Z = np.dot(W,A) + b
    assert(Z.shape == (W.shape[0], A.shape[1]))
    cache = (A,W,b)
    return Z,cache

In [92]:
def linear_activation_forword(A_prev,W,b,activation):
    if activation == "sigmoid":
        Z,linear_cache = linear_forward(A_prev,W,b)
        A,activation_cache = sigmoid(Z)
    elif activation == "relu":
        Z,linear_cache = linear_forward(A_prev,W,b)
        A,activation_cache = relu(Z)
    cache = (linear_cache,activation_cache)
    return A,cache

        

In [93]:
def forward_pass(X,parameters):
    A = X
    L =  len(parameters)//2
    caches = []
    for i in range(1,L):
        A_prev  = A
        A,cache = linear_activation_forword(A_prev,parameters["W"+str(i)],parameters["b"+str(i)],"relu")
        caches.append(cache)

    AL,cache = linear_activation_forword(A,parameters["W"+str(L)],parameters["b"+str(L)],"sigmoid")
    caches.append(cache)

    return AL,caches

In [94]:
def compute_cost(AL,Y):
    m = Y.shape[1]
    logprodut = np.dot(Y,np.log(AL).T) + np.dot((1-Y),np.log(1-AL).T)
    cost = 1/m * logprodut
    cost = np.squeeze(cost) 
    return cost

In [95]:
def linear_backward(dZ,cache):
    A_prev,W,b = cache
    m = A_prev.shape[1]

    dW = 1./m * np.dot(dZ,A_prev.T)
    db = 1./m * np.sum(dZ,axis= 1 ,keepdims= True)
    dA_prev = np.dot(W.T,dZ)

    assert (dA_prev.shape == A_prev.shape)
    assert (dW.shape == W.shape)
    assert (db.shape == b.shape)

    return dA_prev,dW,db


In [96]:
def linear_activation_backward(dA,cache,activation):
    linear_cache,activation_cache = cache

    if activation == "relu":
        dZ = relu_back(dA,activation_cache)
        dA_prev,dW,db = linear_backward(dZ,linear_cache)

    elif activation == "sigmoid":
        dZ = sigmoid_back(dA,activation_cache)
        dA_prev,dW,db = linear_backward(dZ,linear_cache)
    
    return dA_prev,dW,db

In [97]:
def backward_pass(AL,Y,caches):
    grads = {}
    L = len(caches)
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)

    dAL = - (np.divide(Y,AL) - np.divide(1-Y,1-AL))

    current_cache = caches[L-1]

    grads["dA"+str(L-1)],grads["dW"+str(L-1)],grads["db"+str(L-1)] = linear_activation_backward(dAL, current_cache, activation = "sigmoid")

    for i in reversed(range(L-1)):
        current_cache = caches[i]
        dA_prev_temp,dW_temp,db_temp = linear_activation_backward(grads["dA" + str(i + 1)], current_cache, activation = "relu")
        grads["dA" + str(i)] = dA_prev_temp
        grads["dW" + str(i + 1)] = dW_temp
        grads["db" + str(i + 1)] = db_temp 

    return grads   

In [98]:
def update_parameters(parameters,grads,learning_rate):
    L = len(parameters)//2
    for i in range(L-1):
        parameters["W" + str(i+1)] = parameters["W" + str(i+1)] - learning_rate * grads["dW" + str(i+1)]
        parameters["b" + str(i+1)] = parameters["b" + str(i+1)] - learning_rate * grads["db" + str(i+1)]
    
    return parameters

In [99]:
def predict(X,y,parameters):
    m = X.shape[1]
    n = len(parameters)//2
    p = np.zeros((1,m))
    probas,caches = forward_pass(X,parameters)

    for i in range(0, probas.shape[1]):
        if probas[0,i] > 0.5:
            p[0,i] = 1
        else:
            p[0,i] = 0
    print("Accuracy: "  + str(np.sum((p == y)/m)))

    return p



In [100]:
def call(X,y,layers_dims,epochs,learning_rate = 0.01):
    costs = []
    parameters = init_params(layers_dims)
    AL,caches = forward_pass(X,parameters)
    cost = compute_cost(AL,y)
    grads = backward_pass(AL,y,caches)
    parameters = update_parameters(parameters,grads,learning_rate)

    if epochs % 100 == 0:
        print ("Cost after iteration %i: %f" %(epochs, cost))
    if epochs % 100 == 0:
        costs.append(cost)

    return parameters

In [101]:
X = np.array([[0,0],[0,1],[1,0],[1,1]])
y = np.array([[0,1,1,0]])

X = X.reshape(X.shape[0], -1).T

In [102]:
parameters = call(X,y,[2,2,1],10000)

Cost after iteration 10000: -0.700038
