In [1]:
# import the necessary packages
import datetime
import matplotlib
import numpy as np
from keras.optimizers import SGD
from base_model_vgg import MiniVGGNet
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import classification_report
from keras.preprocessing.image import ImageDataGenerator

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
def process_data(data, batch_size, label_binarizer):
    '''
    Function to read images from metadata
    process image for training, get label info
    from meta data
    '''
    
    with open(data, "r") as f:
        while True:
            images = []
            labels = []
            # process images in batches
            while len(images) < batch_size:
                line = f.readline()
                if line == "":
                    f.seek(0)
                    line = f.readline()

                line = line.strip().split(",")
                label = line[0]
                image = np.array([int(x) for x in line[1:]], dtype="uint8")
                image = image.reshape((64, 64, 3))
                # append features and target labels
                images.append(image)
                labels.append(label)

            labels = label_binarizer.transform(np.array(labels))
            # batches of tensor image data with real-time data augmentation
            data_aug = ImageDataGenerator(rotation_range=20, zoom_range=0.15, width_shift_range=0.2,
                                          height_shift_range=0.2, shear_range=0.15, 
                                          horizontal_flip=True, fill_mode="nearest")

            (images, labels) = next(data_aug.flow(np.array(images),
                                                  labels, batch_size=batch_size))

            yield (np.array(images), labels)

In [3]:
# Setting up Variables, Hyper parameters
train_csv = "train.csv"
test_csv = "test.csv"

epoch_num = 200
size_batch = 16

num_train_images = 0
num_test_images = 0

In [4]:
# open meta data file
f = open(train_csv, "r")
labels = set()
testLabels = []

# loop over all rows of meta data
for line in f:
    # strip label values
    label = line.strip().split(",")[0]
    labels.add(label)
    num_train_images += 1
f.close()
f = open(test_csv, "r")

for line in f:
    label = line.strip().split(",")[0]
    testLabels.append(label)
    num_test_images += 1

# close the meta data file
f.close()

In [5]:
# binarize labels in a one-vs-all fashion

lb = LabelBinarizer()
lb.fit(list(labels))
testLabels = lb.transform(testLabels)

In [6]:
# initialize train and test image generators
train_obj = process_data(train_csv, size_batch, lb)
test_obj = process_data(test_csv, size_batch, lb)

In [7]:
# initialise MiniVGGNet
model = MiniVGGNet.build(64, 64, 3, len(lb.classes_))
opt = SGD(lr=1e-2, momentum=0.9, decay=1e-2 / epoch_num)
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])

In [None]:
# num_test_images

In [None]:
# train the model
print("training/ generating model ...")
model.fit_generator(train_obj, steps_per_epoch=num_train_images // size_batch,
                    validation_data=test_obj, validation_steps=num_test_images // size_batch,
                    epochs=epoch_num)

training/ generating model ...
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200

In [None]:
# re-initialize test data generator, for evaluating
test_obj = process_data(test_csv, size_batch, lb)

# make predictions on the testing images, finding the index of the
# label with the corresponding largest predicted probability
predIdxs = model.predict_generator(
    test_obj, steps=(num_test_images // size_batch) + 1)
predIdxs = np.argmax(predIdxs, axis=1)

In [None]:
model.save('model_sample2.h5')