In [50]:
import numpy as np
import os

import keras 
from keras.models import Sequential 
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau
from keras.layers.pooling import GlobalAveragePooling2D


In [3]:
def list_dir(directory):
    """
    get all images and labels in directory/label/*.jpg
    """
    
    labels = os.listdir(directory)
    # sort the labels so that training and testing get them in the same order
    labels.sort()
    
    files_and_labels = []
    for label in labels:
        if label not in '.DS_Store':
            for f in os.listdir(os.path.join(directory, label)):
                files_and_labels.append((os.path.join(directory, label, f), label))

    filenames, labels = zip(*files_and_labels)
    filenames = list(filenames)
    labels = list(labels)
    unique_labels = list(set(labels))
    
    label_to_int = {}
    for i, label in enumerate(unique_labels):
        label_to_int[label] = i
    
    labels = [label_to_int[l] for l in labels]
    
    return filenames, labels


In [6]:
# read the image labels in hte order they are processed in the preprocessing.ipynb

file_train, label_train = list_dir('/Users/yueying.teng/Documents/dog_breeds/train')
file_test, label_test = list_dir('/Users/yueying.teng/Documents/dog_breeds/test')


In [11]:
# load bottleneck features 

feature_train = np.load('bnf_train.npy')
feature_test = np.load('bnf_test.npy')


In [12]:
feature_train.shape, feature_test.shape

((8177, 7, 7, 512), (2045, 7, 7, 512))

In [20]:
# one hot encoded labels  

y_train = np.eye(120)[label_train]

y_test = np.eye(120)[label_test]


In [43]:
# new model to be trained on bottleneck features 

new_model = Sequential()

new_model.add(GlobalAveragePooling2D(input_shape = feature_train.shape[1:]))

new_model.add(Dense(256, activation = 'relu', input_shape = (512, )))
new_model.add(Dropout(0.5))
new_model.add(Dense(120, activation = 'softmax'))

new_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
global_average_pooling2d_3 ( (None, 512)               0         
_________________________________________________________________
dense_7 (Dense)              (None, 256)               131328    
_________________________________________________________________
dropout_4 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 120)               30840     
Total params: 162,168
Trainable params: 162,168
Non-trainable params: 0
_________________________________________________________________


In [56]:

batch_size = 64
epochs = 30
# optimizer = SGD(lr = 0.0001, momentum = 0.9)
optimizer = 'adam'
# optimizer = RMSprop(lr = 0.0001, decay = 1e-6)

new_model.compile(optimizer = optimizer, loss = 'categorical_crossentropy', metrics = ['accuracy'])

checkpointer = ModelCheckpoint(filepath = 'saved_models/handprocess_bestmodel.hdf5', verbose = 1, save_best_only = True)

# model fitting without data augmentation 
history = new_model.fit(feature_train, y_train, batch_size = batch_size, 
                        epochs = epochs, validation_data = (feature_test, y_test),
                       callbacks = [checkpointer])


Train on 8177 samples, validate on 2045 samples
Epoch 1/30

Epoch 00001: val_loss improved from inf to 4.30058, saving model to saved_models/handprocess_bestmodel.hdf5
Epoch 2/30

Epoch 00002: val_loss did not improve from 4.30058
Epoch 3/30

Epoch 00003: val_loss improved from 4.30058 to 4.27583, saving model to saved_models/handprocess_bestmodel.hdf5
Epoch 4/30

Epoch 00004: val_loss did not improve from 4.27583
Epoch 5/30

Epoch 00005: val_loss did not improve from 4.27583
Epoch 6/30

Epoch 00006: val_loss did not improve from 4.27583
Epoch 7/30

Epoch 00007: val_loss did not improve from 4.27583
Epoch 8/30

Epoch 00008: val_loss did not improve from 4.27583
Epoch 9/30

Epoch 00009: val_loss did not improve from 4.27583
Epoch 10/30

Epoch 00010: val_loss improved from 4.27583 to 4.25495, saving model to saved_models/handprocess_bestmodel.hdf5
Epoch 11/30

Epoch 00011: val_loss did not improve from 4.25495
Epoch 12/30

Epoch 00012: val_loss did not improve from 4.25495
Epoch 13/30

E

In [58]:
# load the best model and check testing accuracy

new_model.load_weights('saved_models/handprocess_bestmodel.hdf5')

### Calculate classification accuracy on the test dataset.
newmodel_pred = [np.argmax(new_model.predict(np.expand_dims(feature, axis = 0))) for feature in feature_test]

# Report test accuracy
test_accuracy = 100* np.sum(np.array(newmodel_pred) == np.argmax(y_test, axis = 1))/len(newmodel_pred)
print('Test accuracy: %.4f%%' % test_accuracy)


Test accuracy: 8.0196%
