# Implementing logistic regression as a simple neural network

## 1 - Imports and basic helper functions

testCases provides some test cases to assess the correctness of your functions

In [2]:
import numpy as np
import h5py
import matplotlib.pyplot as plt
from testCases_v4 import *

%matplotlib inline
plt.rcParams['figure.figsize'] = (5.0, 4.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

%load_ext autoreload
%autoreload 2

np.random.seed(1)

ModuleNotFoundError: No module named 'testCases_v4'

### Helper functions

sigmoid, relu, relu_backward and sigmoid_backward

In [None]:
def sigmoid(Z):
    """
    Implements the sigmoid activation in numpy
    
    Arguments:
    Z -- numpy array of any shape
    
    Returns:
    A -- output of sigmoid(z), same shape as Z
    cache -- returns Z as well, useful during backpropagation
    """
    
    A = 1/(1+np.exp(-Z))
    cache = Z
    
    return A, cache


def sigmoid_backward(dA, cache):
    """
    Implement the backward propagation for a single SIGMOID unit.

    Arguments:
    dA -- post-activation gradient, of any shape
    cache -- 'Z' where we store for computing backward propagation efficiently

    Returns:
    dZ -- Gradient of the cost with respect to Z
    """
    
    Z = cache
    
    s = 1/(1+np.exp(-Z))
    dZ = dA * s * (1-s)
    
    assert (dZ.shape == Z.shape)
    
    return dZ

## 3 - Initialization

### 3.1 - Logistic regression (1-layer Neural Network)

- The model's structure is: *LINEAR -> SIGMOID*. 
- Random initialization for the weight matrices. We use `np.random.randn(shape)*0.01`.
- Zero initialization for the biases. We use `np.zeros(shape)`.

In [None]:
def initialize_parameters(n_x, n_y):
    """
    Argument:
    n_x -- size of the input layer
    n_y -- size of the output layer
    
    Returns:
    parameters -- python dictionary containing your parameters:
                    w -- weight matrix of shape (n_y, n_x)
                    b -- bias vector of shape (n_y, 1)
    """
    
    np.random.seed(1)
    
    w = np.random.randn(n_y, n_x) * 0.01
    
    # For logistic regression, n_y = 1
    # n_x = number of dimensions of x (input data points)
    b = np.zeros((n_y, 1))
    
    assert(w.shape == (n_y, n_x))
    assert(b.shape == (n_y, 1))
    
    return w, b

## 4 - Forward propagation module

We will complete three functions in this order:

- LINEAR
- LINEAR -> ACTIVATION where ACTIVATION will be Sigmoid. 

### 4.1 - Linear Forward 

The linear forward module (vectorized over all the examples) computes the following equations:

$$Z = w^T X +b.\tag{4}$$

In [None]:
def linear_forward(X, w, b):
    """

    Arguments:
    X -- activations from previous layer (or input data): (size of previous layer, number of examples)
    w -- weights vector: numpy array of shape (size of current layer, size of previous layer)
    b -- bias vector, numpy array of shape (size of the current layer, 1)

    Returns:
    Z -- the input of the activation function, also called pre-activation parameter 
    cache -- a python dictionary containing "w" and "b" ; stored for computing the backward pass efficiently
    """
    
    Z = np.dot(w,X) + b
    
    assert(Z.shape == (w.shape[0], X.shape[1]))
    cache = (w, b)
    
    return Z, cache

### 4.2 - Linear activation forward

In this implementation, we will use the linear_forward and add the activation (sigmoid) step.

In [None]:
def linear_activation_forward(X, w, b):
    
    # LINEAR -> SIGMOID. Add "cache" to the "caches" list.
    Z, linear_cache = linear_forward(X, w, b)
    A, activation_cache = sigmoid(Z)
    
    assert (A.shape == (w.shape[0], X.shape[1]))
    cache = (linear_cache, activation_cache)
    
    assert(A.shape == (1,X.shape[1]))
            
    return A, cache

In [None]:
def compute_cost(AL, Y):
    """
    The cost function.

    Arguments:
    AL -- probability vector corresponding to your label predictions, shape (1, number of examples)
    Y -- true "label" vector (for example: containing 0 if non-cat, 1 if cat), shape (1, number of examples)

    Returns:
    cost -- cross-entropy cost
    """
    
    m = Y.shape[1]

    # Compute loss from AL and y.
    cost = -(np.dot(Y, np.transpose(np.log(AL))) + np.dot(1 - Y, np.log(1 - np.transpose(AL))))/m
    
    cost = np.squeeze(cost)      
    # To make sure cost's shape is what we expect (e.g. this turns [[17]] into 17).
    assert(cost.shape == ())
    
    return cost

In [None]:
def linear_backward(X, dZ, cache):
    """
    The linear portion of backward propagation

    Arguments:
    dZ -- Gradient of the cost with respect to the linear output
    cache -- tuple of values (w, b) coming from the forward propagation

    Returns:
    dW -- Gradient of the cost with respect to W, same shape as W
    db -- Gradient of the cost with respect to b, same shape as b
    """
    w, b = cache
    m = X.shape[1]

    dw = (np.dot(dZ, np.transpose(X)))/m
    
    db = (np.sum(dZ, axis=1, keepdims = True))/m
    
    assert (dw.shape == w.shape)
    assert (db.shape == b.shape)
    
    return dw, db

### 6.2 - Linear activation backward

If $g(.)$ is the activation function, 
`sigmoid_backward` computes $$dZ = dA * g'(Z) \tag{11}$$.  

In [None]:
def linear_activation_backward(X, dA, cache):
    """
    The backward propagation for the LINEAR->ACTIVATION layer.
    
    Arguments:
    dA -- post-activation gradient  
    cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently
    
    Returns:
    dw -- Gradient of the cost with respect to w, same shape as w
    db -- Gradient of the cost with respect to b, same shape as b
    """
    linear_cache, activation_cache = cache
        
    dZ = sigmoid_backward(dA, activation_cache)
    dw, db = linear_backward(X, dZ, linear_cache)
        
    
    return dw, db

### 6.4 - Update Parameters


In [None]:
def update_parameters(w, b, dw, db, learning_rate):
    w = w - dw * learning_rate
    b = b - db * learning_rate
    return w, b

# Applying on a real dataset

1. Imports

In [None]:
import time
import scipy
from PIL import Image
from scipy import ndimage
#from dnn_app_utils_v3 import *

%load_ext autoreload
%autoreload 2

## 2 - Dataset

We will use a "Cat vs non-Cat" dataset. With logistic regression we can get about 70% test accuracy on classifying cats vs non-cats images. 

**Problem Statement**: We are given a dataset ("data.h5") containing:
    - a training set of m_train images labelled as cat (1) or non-cat (0)
    - a test set of m_test images labelled as cat and non-cat
    - each image is of shape (num_px, num_px, 3) where 3 is for the 3 channels (RGB).

To get more familiar with the dataset, let's load the data by running the cell below.

In [1]:
def load_data():
    train_dataset = h5py.File('C:\\Users\\utkar\\Desktop\\Python\\CDS\\data\\train_catvnoncat.h5', "r")
    train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # our train set features
    train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # our train set labels

    test_dataset = h5py.File('datasets/test_catvnoncat.h5', "r")
    test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # our test set features
    test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # our test set labels

    classes = np.array(test_dataset["list_classes"][:]) # the list of classes
    
    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
    
    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes



train_x_orig, train_y, test_x_orig, test_y, classes = load_data()

NameError: name 'h5py' is not defined

The following code will show us an image in the dataset. We can change the index and re-run the cell multiple times to see other images. 

In [None]:
# Example of a picture
index = 25
plt.imshow(train_x_orig[index])
print ("y = " + str(train_y[0,index]) + ". It's a " + classes[train_y[0,index]].decode("utf-8") +  " picture.")

In [None]:
# Let's explore our dataset 
m_train = train_x_orig.shape[0]
num_px = train_x_orig.shape[1]
m_test = test_x_orig.shape[0]

print ("Number of training examples: " + str(m_train))
print ("Number of testing examples: " + str(m_test))
print ("Each image is of size: (" + str(num_px) + ", " + str(num_px) + ", 3)")
print ("train_x_orig shape: " + str(train_x_orig.shape))
print ("train_y shape: " + str(train_y.shape))
print ("test_x_orig shape: " + str(test_x_orig.shape))
print ("test_y shape: " + str(test_y.shape))

As usual, you reshape and standardize the images before feeding them to the network. The code is given in the cell below.

<img src="images/imvectorkiank.png" style="width:450px;height:300px;">

<caption><center> <u>Figure 1</u>: Image to vector conversion. <br> </center></caption>

In [None]:
# Reshape the training and test examples 
train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0], -1).T   # The "-1" makes reshape flatten the remaining dimensions
test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0], -1).T

# Standardize data to have feature values between 0 and 1.
train_x = train_x_flatten/255.
test_x = test_x_flatten/255.

print ("train_x's shape: " + str(train_x.shape))
print ("test_x's shape: " + str(test_x.shape))

$12,288$ equals $64 \times 64 \times 3$ which is the size of one reshaped image vector.

In [None]:
### CONSTANTS DEFINING THE MODEL ####
n_x = 12288     # num_px * num_px * 3
n_y = 1

In [None]:
def logistic_regression_train(X, Y, learning_rate = 0.00075, num_iterations = 3000, print_cost=False):
    """
    Implements logistic regression: LINEAR->SIGMOID.
    
    Arguments:
    X -- input data, of shape (n_x, number of examples)
    Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples)
    num_iterations -- number of iterations of the optimization loop
    learning_rate -- learning rate of the gradient descent update rule
    L2reg -- L2 regularization factor
    print_cost -- If set to True, this will print the cost every 100 iterations 
    
    Returns:
    w, b
    """
    
    np.random.seed(1)
    grads = {}
    costs = []                              # to keep track of the cost
    m = X.shape[1]                           # number of examples
    
    # Initialize parameters dictionary, by calling one of the functions you'd previously implemented
    w, b = initialize_parameters(n_x, n_y)
    
    # Loop (gradient descent)

    for i in range(0, num_iterations):

        # Forward propagation: LINEAR -> SIGMOID
        A, cache = linear_activation_forward(X, w, b)
        ### END CODE HERE ###
        
        # Compute cost
        cost = compute_cost(A, Y)
        
        # Backward propagation
        dA = - (np.divide(Y, A) - np.divide(1 - Y, 1 - A))
        dw, db = linear_activation_backward(X, dA, cache)
        
        # Update parameters.
        w, b = update_parameters(w, b, dw, db, learning_rate)
        
        # Print the cost every 500 iterations
        if print_cost and i % 500 == 0:
            print("Cost after iteration {}: {}".format(i, np.squeeze(cost)))
        if print_cost and i % 500 == 0:
            costs.append(cost)
       
    # plot the cost

    plt.plot(np.squeeze(costs))
    plt.ylabel('cost')
    plt.xlabel('iterations (per tens)')
    plt.title("Learning rate =" + str(learning_rate))
    plt.show()
    
    return w, b

Now we can use the trained parameters to classify images from the dataset. We first define the predict function.

In [None]:
def predict(X, y, w, b):
    """
    This function is used to predict the results 
    
    Arguments:
    X -- data set of examples you would like to label
    w, b -- parameters of the trained model
    
    Returns:
    p -- predictions for the given dataset X
    """
    
    m = X.shape[1]
    p = np.zeros((1,m))
    
    # Forward propagation
    probas, caches = linear_activation_forward(X, w, b)

    
    # convert probas to 0/1 predictions
    for i in range(0, probas.shape[1]):
        if probas[0,i] > 0.5:
            p[0,i] = 1
        else:
            p[0,i] = 0
    
    #print results
    #print ("predictions: " + str(p))
    #print ("true labels: " + str(y))
    print("Accuracy: "  + str(np.sum((p == y)/m)))
        
    return p

## Training the model

If we run the cell, the cost should be decreasing. It may take up to 5 minutes to run 2500 iterations. 

In [None]:
w, b = logistic_regression_train(train_x, train_y, num_iterations = 4000, print_cost=True)

In [None]:
predictions_train = predict(train_x, train_y, w, b)

In [None]:
predictions_test = predict(test_x, test_y, w, b)