In [1]:
# imports 
import numpy as np 
import matplotlib.pyplot as plt 
from dataloader import * 
from testCases import * 
import h5py

%matplotlib inline 
plt.rcParams["figure.figsize"] = (7.0, 4.0) 
plt.rcParams["image.interpolation"] = 'nearest'
plt.rcParams["image.cmap"] = 'gray'

# All needed functions 

In [2]:
# sigmoid 
def sigmoid(x):
    s = 1/(1 + np.exp(-x))
    return s 

# ReLu
def relu(x): 
    s = np.maximum(0, x)
    return s


In [3]:
# initialize parameters 
def initialize_parameters(layer_dims):
    
    
    # to keep the output same 
    np.random.seed(3)
    parameters = {}
    L = len(layer_dims)
    
    for l in range(1,L):
        parameters["W" + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1])/np.sqrt(layer_dims[l-1])
        parameters["b" + str(l)] = np.zeros((layer_dims[l], 1))
        
        assert(parameters["W" + str(l)].shape == (layer_dims[l], layer_dims[l-1]))
        assert(parameters["b" + str(l)].shape == (layer_dims[l],1))
        
    return parameters

In [4]:
## forward propagation 
def forward_propagation(X, parameters):
    
    # retrieve parameters 
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    W3 = parameters["W3"]
    b3 = parameters["b3"]
    
    # forward calculation 
    Z1 = np.dot(W1,X) + b1
    A1 = relu(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = relu(Z2)
    Z3 = np.dot(W3, A2) + b3 
    A3 = sigmoid(Z3)
    
    cache = (Z1, A1, W1, b1, Z2, A2, W2, b2 ,Z3, A3, W3, b3)
    
    return A3, cache
    

In [5]:
# Backward propagation 
def backward_propagation(X, Y, cache):
    
    m = X.shape[1]
    (Z1, A1, W1, b1, Z2, A2, W2, b2 ,Z3, A3, W3, b3) = cache
    
    dZ3 = A3 - Y
    dW3 = (1./m)*np.dot(dZ3, A2.T)
    db3 = (1./m)*np.sum(dZ3, axis=1, keepdims=True)
    
    dA2 = np.dot(W3.T, dZ3)
    dZ2 = np.multiply(dA2, np.int64(A2 > 0))
    dW2 = (1./m)*np.dot(dZ2,A1.T)
    db2 = (1./m)*np.sum(dZ2, axis=1, keepdims=True)
    
    dA1 = np.dot(W2.T, dZ2)
    dZ1 = np.multiply(dA1, np.int64(A1>0))
    dW1 = (1./m)*np.dot(dZ1, X.T)
    db1 = (1./m)*np.sum(dZ1, axis=1, keepdims=True)
    
    gradients = {"dZ3": dZ3, "dW3": dW3, "db3": db3,
                 "dA2": dA2, "dZ2": dZ2, "dW2": dW2, "db2": db2,
                 "dA1": dA1, "dZ1": dZ1, "dW1": dW1, "db1": db1}
    
    return gradients

In [6]:
# update parameters 
def update_parameters(parameters, grads, learning_rate):
    
    # L number of layers of neural network 
    L = len(parameters)//2
    
    # update rule 
    for l in range(L): 
        parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate*grads["dW" + str(l + 1)]
        parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate*grads["db" + str(l + 1)]
        
    return parameters



In [7]:
# prediction 
def predict(X, y, parameters):
    
    m = X.shape[1]
    p = np.zeros((1,m), dtype = np.int)
    
    # forward propagation 
    a3, caches = forward_propagation(X, parameters)
    
    # convert probabilities to 0/1 predictions
    p[a3 > 0.5] = 1
    
    # accuracy 
    print("Accuracy:" + str(np.mean((p[0,:] == y[0,:]))))
    
    return p

In [8]:
# compute cost 
def compute_cost(a3,Y):
    m = Y.shape[1]
    logprobs = np.multiply(-np.log(a3), Y) + np.multiply(-np.log(1 - a3), (1-Y))
    cost = (1./m)*np.nansum(logprobs)
    
    return cost 

In [9]:
# loading dataset 
def load_dataset(): 
    train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r")
    train_set_x_orig = np.array(train_dataset["train_set_x"][:])
    train_set_y_orig = np.array(train_dataset["train_set_y"][:])
    
    test_dateset = h5py.File('datasets/test_catvnoncat.h5', "r")
    test_set_x_orig = np.array(test_dateset["test_set_x"][:])
    test_set_y_orig = np.array(test_dateset["test_set_y"][:])
    
    classes = np.array(test_dateset["list_classes"][:])
    
    train_set_y = train_set_y_orig.reshape((1,train_set_y_orig.shape[0]))
    test_set_y = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
    
    train_set_x_orig = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T
    test_set_x_orig = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T
    
    train_set_x = train_set_x_orig/ 255
    test_set_x = test_set_x_orig/255
    
    return train_set_x, train_set_y, test_set_x, test_set_y, classes

In [10]:
# predict_dec 
def predict_dec(parameters, X):
    
    a3, cache = forward_propagation(X, parameters)
    prediction = (a3 > 0.5)
    return prediction

# implementing different models 

* we will implement three different models 
    - non regularized model 
    - L2 regularized model 
    - Drop out regularized model 

## Non-regularized model

In [12]:
# non-regularized model 
def model(X, Y, learning_rate=0.3, num_iteration=30000, print_cost=True, lambd=0, keep_prob=1):
    

In [None]:
train_X, train_Y, test_X, test_Y = load_2D_dataset()