In [2]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
from sklearn.metrics import classification_report,accuracy_score
import random

In [3]:
X_train = np.loadtxt('cat_train_x.csv', delimiter = ',')/255.0
Y_train = np.loadtxt('cat_train_y.csv', delimiter = ',').reshape(1, X_train.shape[1])
X_test = np.loadtxt('cat_test_x.csv', delimiter = ',')/255.0
Y_test = np.loadtxt('cat_test_y.csv', delimiter = ',').reshape(1, X_test.shape[1])


print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)

(12288, 209)
(1, 209)
(12288, 50)
(1, 50)


In [4]:
def sigmoid(Z):
    A = 1/(1+np.exp(-Z))
    return A

def softmax(z):
    expZ = np.exp(z)
    return expZ/(np.sum(expZ, 0))

def relu(Z):
    A = np.maximum(0,Z)
    return A

def tanh(x):
    return np.tanh(x)

def derivative_relu(Z):
    return np.array(Z > 0, dtype = 'float')

def derivative_tanh(x):
    return (1 - np.power(x, 2))

In [5]:
def compute_cost(a,y):
    n, m = y.shape
    cost = 0
    if n==1 :
        cost = 1/m * (np.dot(y,np.log(a).T) + np.dot(1-y,np.log(1-a).T))
    else :
        cost = 1/m*np.sum(y*np.log(a))
    cost = np.squeeze(cost)
    return -1*cost

In [6]:
def initialize_parameters(layers):
    parameters = {}
    l = len(layers)
    for i in range(1,l):
        parameters["w"+str(i)] = np.random.randn(layers[i],layers[i-1])/np.sqrt(layers[i-1])
        parameters["b"+str(i)] = np.zeros((layers[i],1))

    return parameters

In [7]:
def forward_propagation(x,parameters,activation_function):
    l = len(parameters)//2
    forward_cache = {}
    forward_cache["a0"] = x
    for i in range(1,l):
        forward_cache["z"+str(i)] = parameters["w"+str(i)].dot(forward_cache["a"+str(i-1)]) + parameters["b"+str(i)]
        if  activation_function == 'tanh':
            forward_cache["a"+str(i)] = tanh(forward_cache["z"+str(i)])
        else:
            forward_cache["a"+str(i)] = relu(forward_cache["z"+str(i)])

    forward_cache["z"+str(l)] = parameters["w"+str(l)].dot(forward_cache["a"+str(l-1)]) + parameters["b"+str(l)]

    if forward_cache['z' + str(l)].shape[0] == 1:
        forward_cache['a' + str(l)] = sigmoid(forward_cache['z' + str(l)])
    else :
        forward_cache['a' + str(l)] = softmax(forward_cache['z' + str(l)])

    return forward_cache['a' + str(l)],forward_cache

In [8]:
layers = [X_train.shape[0],100,200,Y_train.shape[0]]

parameters = initialize_parameters(layers)

for l in range(1, len(layers)):
    print("Shape of W" + str(l) + ":", parameters['w' + str(l)].shape)
    print("Shape of B" + str(l) + ":", parameters['b' + str(l)].shape, "\n")

Shape of W1: (100, 12288)
Shape of B1: (100, 1) 

Shape of W2: (200, 100)
Shape of B2: (200, 1) 

Shape of W3: (1, 200)
Shape of B3: (1, 1) 



In [9]:
al , forward_cache = forward_propagation(X_train, parameters, 'relu')

for l in range(len(parameters)//2 + 1):
    print("Shape of A" + str(l) + " :", forward_cache['a' + str(l)].shape)

Shape of A0 : (12288, 209)
Shape of A1 : (100, 209)
Shape of A2 : (200, 209)
Shape of A3 : (1, 209)


In [10]:
def backward_propagation(al,y,parameters,forward_cache,activation):
    grads = {}
    l = len(parameters)//2
    m = al.shape[1]
    grads["dz"+str(l)] = al - y
    grads['dw'+str(l)] = (1/m)*np.dot(grads['dz'+str(l)],forward_cache['a'+str(l-1)].T)
    grads['db'+str(l)] = (1/m)*np.sum(grads['dz'+str(l)],axis=1,keepdims=True)

    for i in range(l-1,0,-1):
        if activation == 'relu':
            grads["dz"+str(i)] = np.dot(parameters['w'+str(i+1)].T,grads['dz'+str(i+1)])*derivative_relu(forward_cache['a'+str(i)])
        else:
            grads["dz"+str(i)] = np.dot(parameters['w'+str(i+1)].T,grads['dz'+str(i+1)])*derivative_tanh(forward_cache['a'+str(i)])
        grads['dw'+str(i)] = (1/m)*np.dot(grads['dz'+str(i)],forward_cache['a'+str(i-1)].T)
        grads['db'+str(i)] = (1/m)*np.sum(grads['dz'+str(i)],axis=1,keepdims=True)

    return grads

In [11]:
grads = backward_propagation(al,Y_train, parameters, forward_cache, 'relu')

for l in reversed(range(1, len(grads)//(len(layers)-1) + 1)):
    print("Shape of dz" + str(l) + " :", grads['dz' + str(l)].shape)
    print("Shape of dw" + str(l) + " :", grads['dw' + str(l)].shape)
    print("Shape of db" + str(l) + " :", grads['db' + str(l)].shape, "\n")

Shape of dz3 : (1, 209)
Shape of dw3 : (1, 200)
Shape of db3 : (1, 1) 

Shape of dz2 : (200, 209)
Shape of dw2 : (200, 100)
Shape of db2 : (200, 1) 

Shape of dz1 : (100, 209)
Shape of dw1 : (100, 12288)
Shape of db1 : (100, 1) 



In [12]:
def update_parameters(parameters,grads,learning_rate):
    l = len(parameters) // 2
    for i in range(l):
        parameters["w" + str(i+1)] = parameters["w" + str(i+1)] - learning_rate * grads["dw" + str(i+1)]
        parameters["b" + str(i+1)] = parameters["b" + str(i+1)] - learning_rate * grads["db" + str(i+1)]
        
    return parameters

In [13]:
def predict(X, y, parameters, activation):

    m = X.shape[1]
    y_pred, caches = forward_propagation(X, parameters, activation)
    
    if y.shape[0] == 1:
        y_pred = np.array(y_pred > 0.5, dtype = 'float')
    else:
        y = np.argmax(y, 0)
        y_pred = np.argmax(y_pred, 0)
    
    return np.round(np.sum((y_pred == y)/m), 2)

In [14]:
def model(X, Y, layers_dims, learning_rate = 0.03, activation = 'relu', num_iterations = 3000):#lr was 0.009

    np.random.seed(1)
    costs = []              
    
    parameters = initialize_parameters(layers_dims)

    for i in range(0, num_iterations):

        AL, forward_cache = forward_propagation(X, parameters, activation)

        cost = compute_cost(AL, Y)

        grads = backward_propagation(AL, Y, parameters, forward_cache, activation)

        parameters = update_parameters(parameters, grads, learning_rate)
        
        if i % (num_iterations/10) == 0:
            print("\niter:{} \t cost: {} \t train_acc:{} \t test_acc:{}".format(i, np.round(cost, 2), predict(X_train, Y_train, parameters, activation), predict(X_test, Y_test, parameters, activation)))
        
        if i % 10 == 0:
            print("==", end = '')

    return parameters

In [15]:
layers_dims = [X_train.shape[0], 20, 7, 5, Y_train.shape[0]] #  4-layer model
lr = 0.0075
iters = 2500

parameters = model(X_train, Y_train, layers_dims, learning_rate = lr, activation = 'relu', num_iterations = iters)


iter:0 	 cost: 0.77 	 train_acc:0.51 	 test_acc:0.42
====

iter:250 	 cost: 0.63 	 train_acc:0.74 	 test_acc:0.64
iter:500 	 cost: 0.54 	 train_acc:0.78 	 test_acc:0.7
iter:750 	 cost: 0.44 	 train_acc:0.92 	 test_acc:0.78
iter:1000 	 cost: 0.32 	 train_acc:0.96 	 test_acc:0.8
iter:1250 	 cost: 0.23 	 train_acc:0.98 	 test_acc:0.76
iter:1500 	 cost: 0.16 	 train_acc:0.98 	 test_acc:0.82
iter:1750 	 cost: 0.13 	 train_acc:0.98 	 test_acc:0.8
iter:2000 	 cost: 0.11 	 train_acc:0.98 	 test_acc:0.8
iter:2250 	 cost: 0.1 	 train_acc:0.98 	 test_acc:0.8