# Machine Learning Capstone Project

## CNN Classifier

### 1. Import required packages

In [1]:
from helper_functions import unpickle, extract_data, load_data
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.utils import np_utils
from time import time

Using TensorFlow backend.


### 2. Extract CIFAR 15

In [9]:
(X_train_orig, y_train_orig), (X_test, y_test) = load_data('cifar-15-python', 'fine')
print('Training Data Shape  :', X_train_orig.shape)
print('Training Label Shape :', y_train_orig.shape)
print('Testing Data Shape   :', X_test.shape)
print('Testing Label Shape  :', y_test.shape)

Training Data Shape  : (7500, 32, 32, 3)
Training Label Shape : (7500, 1)
Testing Data Shape   : (1500, 32, 32, 3)
Testing Label Shape  : (1500, 1)


#### 2.1. Split training datset into training and validation set

In [10]:
X_train, X_val, y_train, y_val = train_test_split(X_train_orig, y_train_orig, 
                                                    test_size=0.20, random_state=10)
print('Training Data Shape    :', X_train.shape)
print('Training Label Shape   :', y_train.shape)
print('Validation Data Shape  :', X_val.shape)
print('Validation Label Shape :', y_val.shape)

Training Data Shape    : (6000, 32, 32, 3)
Training Label Shape   : (6000, 1)
Validation Data Shape  : (1500, 32, 32, 3)
Validation Label Shape : (1500, 1)


### 3. Preprocessing Data

In [11]:
def convertToGray(input_dataset, output_dataset):
    i = 0
    for img in input_dataset:
        output_dataset[i] = np.reshape(np.dot(img[...,:3], [0.299, 0.587, 0.114]), (img.shape[0], img.shape[1], 1))
        i += 1    

In [12]:
X_train_gray = np.zeros((X_train.shape[0], X_train.shape[1], X_train.shape[2], 1))
X_val_gray = np.zeros((X_val.shape[0], X_val.shape[1], X_val.shape[2], 1))
X_test_gray = np.zeros((X_test.shape[0], X_test.shape[1], X_test.shape[2], 1))

# Convert to gray
convertToGray(X_train, X_train_gray)
convertToGray(X_val, X_val_gray)
convertToGray(X_test, X_test_gray)

# Normalize the data
X_train_gray = (X_train_gray - 127.5)/255.0
X_val_gray = (X_val_gray - 127.5)/255.0
X_test_gray = (X_test_gray - 127.5)/255.0

# Reshape image label into 1D array
y_train = np.ravel(y_train)
y_val = np.ravel(y_val)
y_test = np.ravel(y_test)

print('Training Data Shape    :', X_train_gray.shape)
print('Training Label Shape   :', y_train.shape)
print('Validation Data Shape  :', X_val_gray.shape)
print('Validation Label Shape :', y_val.shape)
print('Testing Data Shape     :', X_test_gray.shape)
print('Testing Label Shape    :', y_test.shape)

Training Data Shape    : (6000, 32, 32, 1)
Training Label Shape   : (6000,)
Validation Data Shape  : (1500, 32, 32, 1)
Validation Label Shape : (1500,)
Testing Data Shape     : (1500, 32, 32, 1)
Testing Label Shape    : (1500,)


In [13]:
# Variables 
BATCH_SIZE = 32 
NUM_CLASSES = 15
EPOCHS = 5

In [16]:
y_train = np_utils.to_categorical(y_train, NUM_CLASSES)
y_val = np_utils.to_categorical(y_val, NUM_CLASSES)
y_test = np_utils.to_categorical(y_test, NUM_CLASSES)

In [19]:
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Dropout, Flatten, Dense
from keras.models import Sequential

input_shape = (32,32,1)

model = Sequential()

model.add(Conv2D(filters = 16, kernel_size = 2, strides = 1, padding = 'same', activation = 'relu', input_shape = input_shape))
model.add(Flatten())
model.add(Dense(15, activation = 'softmax'))

#model.add(Conv2D(filters = 16, kernel_size = 2, strides = 1, padding = 'same', activation = 'relu', input_shape = input_shape))
#model.add(MaxPooling2D(pool_size = 2, strides = 2))
#model.add(Conv2D(filters = 32, kernel_size = 2, strides = 1, padding = 'same', activation = 'relu'))
#model.add(MaxPooling2D(pool_size = 2, strides = 2))
#model.add(Conv2D(filters = 64, kernel_size = 2, strides = 1, padding = 'same', activation = 'relu'))
#model.add(MaxPooling2D(pool_size = 2, strides = 2))
#model.add(Flatten())
#model.add(Dense(1024, activation = 'relu'))
#model.add(Dropout(0.4))
#model.add(Dense(512, activation = 'relu'))
#model.add(Dropout(0.4))
#model.add(Dense(15, activation = 'softmax'))

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 32, 32, 16)        80        
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 16, 16, 16)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 16, 16, 32)        2080      
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 8, 8, 32)          0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 8, 8, 64)          8256      
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 4, 4, 64)          0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 1024)              0         
__________

In [20]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
from keras.callbacks import ModelCheckpoint

checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.from_scratch.hdf5', 
                               verbose=1, save_best_only=True)
t1 = time()
model.fit(X_train_gray, y_train, 
          validation_data=(X_val_gray, y_val),
          epochs=EPOCHS, batch_size=BATCH_SIZE, callbacks=[checkpointer], verbose=1)
dt = time() - t1
print('Time to train SVM is', dt, 'seconds.' )

Train on 6000 samples, validate on 1500 samples
Epoch 1/10
Epoch 2/10

### 5. Accuracy of the classifier

In [6]:
acc = clf.score(X_test_gray, y_test)
print('Accuracy of the SVM classifier is', acc*100, '%')

Accuracy of the SVM classifier is 25.7333333333 %
