# Deep neural network for image binary classification: Hotdog or Not hotdog?

Test the deep neural network framework by implementing the function of the SeaFood APP from the TV show Silicon Valley. The question we are asking here is if something a Hotdog or Not a Hotdog! 

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import random
import os
import zipfile
from src.utils import *
from src.ANN import *

### 1. Download the hotdog dataset from keegle api and pre-processing

In [None]:
# download the hot-dog-not-hot-dog dataset
os.chdir("./data")
! kaggle datasets download -d dansbecker/hot-dog-not-hot-dog      # need keegle public key for this. 
os.chdir("..")

# unzip
with zipfile.ZipFile("./data/hot-dog-not-hot-dog.zip", 'r') as zip_ref:
    zip_ref.extractall("./data")

In [None]:
# Get dir of train and test sets.
train_hotdog_dirs = ["./data/seefood/train/hot_dog/"+ i for i in os.listdir("./data/seefood/train/hot_dog/")]
train_not_hotdog_dirs = ["./data/seefood/train/not_hot_dog/"+ i for i in os.listdir("./data/seefood/train/not_hot_dog/")]
test_hotdog_dirs = ["./data/seefood/test/hot_dog/"+ i for i in os.listdir("./data/seefood/test/hot_dog/")]
test_not_hotdog_dirs = ["./data/seefood/test/not_hot_dog/"+ i for i in os.listdir("./data/seefood/test/not_hot_dog/")]
print(f"number of train: {len(train_hotdog_dirs)+len(train_not_hotdog_dirs)}")
print(f"number of test: {len(test_hotdog_dirs)+len(test_not_hotdog_dirs)}")

In [None]:
# test set is too large, thus we need to tranfer some samples from test set to train set.
random.shuffle(test_hotdog_dirs)
list_1, list_2 = split_list(test_hotdog_dirs,50)
test_hotdog_dirs = list_1
train_hotdog_dirs+=list_2

random.shuffle(test_not_hotdog_dirs)
list_1,list_2 = split_list(test_not_hotdog_dirs,50)
test_not_hotdog_dirs = list_1
train_not_hotdog_dirs+=list_2

Noticed that images in the dataset are of different shape and size, use resize and crop to make sure all the images have a dimension of 80*80 for later training

In [None]:
# assemble train and test set. 
image_size = 80

# train set x
train_x_orig_t = load_images(train_hotdog_dirs,image_size) # train images that are hotdog.
train_x_orig_f = load_images(train_not_hotdog_dirs,image_size) # train images that are not hotdog
train_x_orig = np.concatenate((train_x_orig_t,train_x_orig_f)) 

# train set y
m_train_t = train_x_orig_t.shape[0]
m_train_f = train_x_orig_f.shape[0]
m_train = m_train_t+m_train_f
train_y = np.concatenate((np.zeros((1,m_train_t))+1,np.zeros((1,m_train_f))),axis=1)

# test set x
test_x_orig_t = load_images(test_hotdog_dirs,image_size) # test images that are hotdog.
test_x_orig_f = load_images(test_not_hotdog_dirs,image_size) # test images that are not hotdog
test_x_orig = np.concatenate((test_x_orig_t,test_x_orig_f)) 

# test set y
m_test_t = test_x_orig_t.shape[0]
m_test_f = test_x_orig_f.shape[0]
m_test = m_test_t+m_test_f
test_y = np.concatenate((np.zeros((1,m_test_t))+1,np.zeros((1,m_test_f))),axis=1)

print ("Number of training examples: " + str(m_train))
print ("Number of testing examples: " + str(m_test))
print ("Each image is of size: (" + str(image_size) + ", " + str(image_size) + ", 3)")
print ("train_x_orig shape: " + str(train_x_orig.shape))
print ("train_y shape: " + str(train_y.shape))
print ("test_x_orig shape: " + str(test_x_orig.shape))
print ("test_y shape: " + str(test_y.shape))


In [None]:
# Example image
plt.imshow(train_x_orig[10])

In [None]:
# Reshape the training and test examples 
train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0], -1).T 
test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0], -1).T

# Standardize data to have feature values between 0 and 1.
train_x = train_x_flatten/255.
test_x = test_x_flatten/255.

print ("train_x's shape: " + str(train_x.shape))
print ("test_x's shape: " + str(test_x.shape))

### 2. Train a deep neural network model for 6 layers

It turns out that the 6 player neural network here overfit the data:

In [None]:
# define layer dimensions
layers_dims = [train_x.shape[0], 40, 20, 20, 10, 5, 1] #  6-layer model

In [None]:
parameters, costs = L_layer_model(train_x, train_y, layers_dims, learning_rate=0.001, num_iterations = 5000, print_cost = True)

In [None]:
pred_train = predict(train_x, train_y, parameters)

In [None]:
pred_test = predict(test_x, test_y, parameters)

Implement L2 regularization to reduce the overfitting.

In [None]:
parameters_r, costs_r = L_layer_model(train_x, train_y, layers_dims, learning_rate=0.001, num_iterations = 8000, lambd = 0.5, print_cost = True)

In [None]:
print("Train accuracy is:")
pred_train_r = predict(train_x, train_y, parameters_r)

In [None]:
print("Test accuracy is:")
pred_test_r = predict(test_x, test_y, parameters_r)

### Conlcusion

Since the dataset is consist of only food pictures, which makes them similar to hogdogs than onjects that are not food (a car for example). A simple ANN network's perfomance on this type of dataset is limited. More advanced techniques such as CNN are needed here. 