### 1. Headers

In [1]:
# Computation Libraries
import numpy as np
import pandas as pd
import os
import scipy
from PIL import Image

# Stats Libraries
import statistics
import scipy.stats as stats

# Visualisazition Libraries
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

# Encoders and Scalars
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

# Modelling Libraries
from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans

# Evaluation Metrics Libraries
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix, classification_report

# Cross Validation Libraries
from sklearn.model_selection import cross_val_score

### 2. Helper Functions

In [2]:
def sigmoid(val):
    return (1/(1+np.exp(-1*val)))

def relu(val):
    return np.maximum(0,val)

In [3]:
def relu_backward(dA, cache):
    """
    Implement the backward propagation for a single RELU unit.

    Arguments:
    dA -- post-activation gradient, of any shape
    cache -- 'Z' where we store for computing backward propagation efficiently

    Returns:
    dZ -- Gradient of the cost with respect to Z
    """

    Z = cache
    dZ = np.array(dA, copy=True) # just converting dz to a correct object.

    # When z <= 0, you should set dz to 0 as well. 
    dZ[Z <= 0] = 0

    assert (dZ.shape == Z.shape)

    return dZ

def sigmoid_backward(dA, cache):
    """
    Implement the backward propagation for a single SIGMOID unit.

    Arguments:
    dA -- post-activation gradient, of any shape
    cache -- 'Z' where we store for computing backward propagation efficiently

    Returns:
    dZ -- Gradient of the cost with respect to Z
    """

    Z = cache

    s = 1/(1+np.exp(-Z))
    dZ = dA * s * (1-s)

    assert (dZ.shape == Z.shape)

    return dZ

### 3. Intialize Dimensions and Parameters

In [20]:
# We would be resizing the images to 64x64x3 for having same size for training
n_x = 12288
hidden_layer_dims = [n_x,20,10, 5,3,1] 
learning_rate = 0.05

# defaults for training
IMG_HEIGHT = 64
IMG_WIDTH  = 64

### 4. Initialize Random Weights

In [5]:
def initialize_weights(hidden_layer_dims):
    params = {}
    for i in range(1, len(hidden_layer_dims)): 
        params["W"+str(i)] = np.random.randn(hidden_layer_dims[i],hidden_layer_dims[i-1])*0.01
        params["b"+str(i)] = np.zeros((hidden_layer_dims[i],1))*0.01
    return params

### 5. Forward Propogation

In [6]:
def forward_propogation(X, params, hidden_layer_dims):
    cache = {}
    L = len(params)//2
    cache["A"+str(0)] = X
    cache["Z"+str(1)] = np.dot(params["W1"],X)+params["b1"]
    cache["A"+str(1)] = relu(cache["Z1"])
    for i in range(1,L-1):
        cache["Z"+str(i+1)] = np.dot(params["W"+str(i+1)],cache["A"+str(i)])+params["b"+str(i+1)]
        cache["A"+str(i+1)] = relu(cache["Z"+str(i+1)])
    cache["Z"+str(L)] = np.dot(params["W"+str(L)],cache["A"+str(L-1)])+params["b"+str(L)]
    cache["A"+str(L)] = sigmoid(cache["Z"+str(L)])
    return cache

### 6. Cost

In [7]:
def calculate_cost(AL, Y, hidden_layer_dims):
    m = Y.shape[1]
    AL =  AL
    cost = (-1/m)*np.sum(np.multiply(Y,np.log(AL))+np.multiply(1-Y,np.log(1-AL)))
    cost = np.squeeze(cost)
    return cost

### 7. Backward_Propagation

In [8]:
def linear_backward(dZ, cache):
    A_prev, W, b = cache
    
    m = A_prev.shape[1]
    dW = (1/m)*(np.dot(dZ,A_prev.T))
    db = (1/m)*np.sum(dZ,axis=1,keepdims=True) 
    dA_prev = np.dot(W.T,dZ)
    
    return dA_prev, dW, db

In [9]:
def linear_activation_backward(dA, cache, activation):

    linear_cache, activation_cache = cache
    
    if activation == "relu":
        dZ =   relu_backward(dA, activation_cache)  
    elif activation == "sigmoid":
        dZ =   sigmoid_backward(dA, activation_cache)
        
    dA_prev, dW, db =  linear_backward(dZ, linear_cache)  
    return dA_prev, dW, db

In [10]:
def backward_propagation(Y,AL,params,cache, hidden_layer_dims):
    grads = {}
    m = Y.shape[1]
    Y = Y.reshape(AL.shape)
    
    L = len(hidden_layer_dims)-1
    
    dAL = -1*(np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    
    cache_val = ((cache["A"+str(L-1)],params["W"+str(L)],params["b"+str(L)]),cache["Z"+str(L)])
    
    dA_prev,dW,db = linear_activation_backward(dAL, cache_val, "sigmoid")
    grads["dA" + str(L-1)] = dA_prev
    grads["dW" + str(L)] = dW
    grads["db" + str(L)] = db
    
    for i in range(L-1,0,-1):
        cache_val = ((cache["A"+str(i-1)],params["W"+str(i)],params["b"+str(i)]),cache["Z"+str(i)])
        dA_prev, dW, db = linear_activation_backward(dA_prev, cache_val, "relu")
        grads["dA" + str(i-1)] = dA_prev
        grads["dW" + str(i)] = dW
        grads["db" + str(i)] = db
    
    return grads

### 8. Update Parameters

In [11]:
def update_parameters(params, grads, learning_rate):

    parameters = params.copy()
    L = len(parameters) // 2

    for l in range(L):
        parameters["W" + str(l+1)] = params["W"+ str(l+1)] - learning_rate*grads["dW"+ str(l+1)]
        parameters["b" + str(l+1)] = params["b"+ str(l+1)] - learning_rate*grads["db"+ str(l+1)]
        
    return parameters

### 9. Train model

In [12]:
def train_model(X, Y, layers_dims, learning_rate = 0.0075, num_iterations = 3000, print_cost=False):

    costs = [] 
    params = initialize_weights(layers_dims)
    
    for i in range(0, num_iterations):
        caches = forward_propogation(X, params, hidden_layer_dims)
        AL = caches["A"+str(len(layers_dims)-1)]
        cost = calculate_cost(AL, Y, hidden_layer_dims)
        grads = backward_propagation(Y,AL,params,caches, hidden_layer_dims)    
        params = update_parameters(params, grads, learning_rate)

        if print_cost and i % 100 == 0 or i == num_iterations - 1:
            print("Cost after iteration {}: {}".format(i, np.squeeze(cost)))
        if i % 100 == 0 or i == num_iterations:
            costs.append(cost)
    return params, costs

### 10. Import Dataset

In [13]:
## We store the data directly in a verctorized format 
dataset_path = "/Users/saikarna/Desktop/GitHub/Daily-Notebook/dogs-vs-cats/"
train_path = dataset_path+"train/"
test_path = dataset_path+"test1/"

def load_dataset(train_path):
    X, Y =[],[]
    for filename in os.listdir(train_path):
        if(filename[-3:]=="jpg"):
            file_path = train_path+filename
            image= np.array(Image.open(file_path))
            image= np.resize(image,(IMG_HEIGHT,IMG_WIDTH,3))
            image = image.astype('float32')
            image /= 255.  
            X.append(image)
            Y.append(1 if filename[0]=="d" else 0)
    X,Y = np.array(X), np.array(Y)
    return X, Y

In [14]:
X , Y = load_dataset(train_path)

In [15]:
X = np.array(X)
Y = np.array(Y)

In [16]:
print("Number of data points in X : ",X.shape)
print("Number of data points in Y : ",Y.shape)

## Number of Cats and Dogs
pd.Series(Y).value_counts()

Number of data points in X :  (25000, 64, 64, 3)
Number of data points in Y :  (25000,)


1    12500
0    12500
dtype: int64

### 11. Vectorize Data

In [17]:
print("Number of data points in X : ",X.shape)
print("Number of data points in Y : ",Y.shape)


X_train, X_test, Y_train ,Y_test = train_test_split(X,Y,test_size = 0.33, random_state = 33)
print("X-train Shape : ",X_train.shape, "Y-train Shape : ",Y_train.shape)
print("X-test Shape : ",X_test.shape, "Y-test Shape : ",Y_test.shape)

Number of data points in X :  (25000, 64, 64, 3)
Number of data points in Y :  (25000,)
X-train Shape :  (16750, 64, 64, 3) Y-train Shape :  (16750,)
X-test Shape :  (8250, 64, 64, 3) Y-test Shape :  (8250,)


In [18]:
X_train_flatten = np.array(X_train.reshape(X_train.shape[0],-1).T)
Y_train_flatten = np.array(Y_train.reshape(Y_train.shape[0],-1).T)

X_test_flatten = np.array(X_test.reshape(X_test.shape[0],-1).T)
Y_test_flatten = np.array(Y_test.reshape(Y_test.shape[0],-1).T)

print("X-train Shape : ",X_train_flatten.shape, "Y-train Shape : ",Y_train_flatten.shape)
print("X-test Shape : ",X_test_flatten.shape, "Y-test Shape : ",Y_test_flatten.shape)

X-train Shape :  (12288, 16750) Y-train Shape :  (1, 16750)
X-test Shape :  (12288, 8250) Y-test Shape :  (1, 8250)


### 12. Train Model

In [None]:
model_param, costs = train_model(X_train_flatten, Y_train_flatten, hidden_layer_dims, learning_rate = 0.05, num_iterations = 1000, print_cost=True)


Cost after iteration 0: 0.6931471807359174
Cost after iteration 100: 0.6931391537870447
Cost after iteration 200: 0.6931385052605331
Cost after iteration 300: 0.6931384528606667
Cost after iteration 400: 0.6931384486229095
Cost after iteration 500: 0.6931384482766383
Cost after iteration 600: 0.693138448243376
Cost after iteration 700: 0.6931384482356183


### 13. Predict and Visualize

In [None]:
def plot_costs(costs, learning_rate=0.0075):
    plt.plot(np.squeeze(costs))
    plt.ylabel('cost')
    plt.xlabel('iterations (per hundreds)')
    plt.title("Learning rate =" + str(learning_rate))
    plt.show()
    
plot_costs(costs)

### Questions to ponder on
1. Why not train with images with different sizes??
2. How can we have autocomplete feature available for Jupyter notebook