In [None]:
########################
### Import Libraries ###
########################

%matplotlib inline
import os
import split_folders
import matplotlib.pyplot as plt

from keras.preprocessing.image import ImageDataGenerator
from keras import layers, models, optimizers

In [None]:
###########################
### 1. Data Preparation ###
###########################

# Current working directory
projectFolder = os.getcwd()

# Assign directories to variables
images_dir        = projectFolder + '/images'
base_dir          = images_dir + '/base'
base_OK_dir       = base_dir + '/OK'
base_NG_dir       = base_dir + '/NG'
_OKlist           = os.listdir(base_OK_dir)  # type list
_NGlist           = os.listdir(base_NG_dir)  # type list

# Parameters
model_name        = "<model_name>.h5"
seed              = 1337
ratios            = (.7, .15, .15)
bs                = 32

# Model fit parameters
steps_per_epoch   = 40
epochs            = 15
validation_steps  = 15

# Split base -> train, validate, test by ratios
# Automatically creates folders train, val, test
split_folders.ratio(base_dir,               # input folder
                    output = images_dir,    # output folder
                    seed = seed,            # seed for reproduction
                    ratio = ratios)         # ratio of train, validate, test

# Data File Path
train_dir         = images_dir + '/train'
validation_dir    = images_dir + '/val'
test_dir          = images_dir + '/test'

train_OK_dir      = train_dir + '/OK'
train_NG_dir      = train_dir + '/NG'
validation_OK_dir = validation_dir + '/OK'
validation_NG_dir = validation_dir + '/NG'
test_OK_dir       = test_dir + '/OK'
test_NG_dir       = test_dir + '/NG'

# Print total number of files per folder
print('1.0\tbase/OK  : ', len(_OKlist), '\tbase/NG  : ', len(_NGlist))
print('-------------------------------------------------')
print(ratios[0], '\ttrain/OK : ', len(os.listdir(train_OK_dir)), '\ttrain/NG : ', len(os.listdir(train_NG_dir)))
print(ratios[1], '\tval/ OK  : ', len(os.listdir(validation_OK_dir)), '\tval/ NG  : ', len(os.listdir(validation_NG_dir)))
print(ratios[2], '\ttest/ OK : ', len(os.listdir(test_OK_dir)), '\ttest/ NG : ', len(os.listdir(test_NG_dir)))


In [None]:
#############################
### 2. Data Preprocessing ###
#############################

# Define the configuration for image data preparation and augmentation for train and test
# Feature standardizaton is set by default
# zca_whitening = False
train_datagen = ImageDataGenerator(rescale=1./255) # Convention to name it XX_datagen
val_datagen = ImageDataGenerator(rescale=1./255)

# Apply transformation settings onto train_dir
# train_dir (1638 + 1638 = 3276 images)
# ImageDataGenerator().flow_from_directory(directory)
train_generator = train_datagen.flow_from_directory(
    train_dir, 
    target_size = (256,256),
    batch_size = bs,
    class_mode = 'binary')

# validation_dir (351 + 351 = 702 images)
validation_generator = val_datagen.flow_from_directory(
    validation_dir, 
    target_size = (256,256),
    batch_size = bs,
    class_mode = 'binary')

In [None]:
#######################
### 3. Create Model ###
#######################
# As you go deeper into the neural network, number of filters in Conv2D typically double in order to learn more and more sophisticated types of features.

model = models.Sequential()

# Conv2D(filters = 32, kernel_size = (3, 3), activation='relu', input_shape=(250, 250, 3))
# reLu = Rectify Linear Unit
# filter is the output dimension of the output filter
# kernel size is sometimes referred to as kernel matrix
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(256, 256, 3)))
# MaxPooling2D - Make our image much smaller
# filter_size = 2 so (2, 2) window,  stride = 2  so maxpool every 2 pixels
#  Essentially, a maxpooling(2, 2) resizes picture  to 1/4 of its original size i.e. 256 x 256 -> 128 x 128
model.add(layers.MaxPooling2D(2, 2))


model.add(layers.Conv2D(64, (3, 3), activation='relu'))
# 128 x 128 -> 64 x 64
model.add(layers.MaxPooling2D(2, 2))


model.add(layers.Conv2D(128, (3, 3), activation='relu'))
# 64 x 64 -> 32 x 32
model.add(layers.MaxPooling2D(2, 2))


model.add(layers.Conv2D(128, (3, 3), activation='relu'))
# 32 x 32 = 16 x 16
model.add(layers.MaxPooling2D(2, 2))


# Flatten turns data from 2D vector to 1D array
model.add(layers.Flatten())
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

In [None]:
########################
### 4. Compile Model ###
########################

# Loss function as binary crossentropy
model.compile(loss = 'binary_crossentropy',
              optimizer = optimizers.RMSprop(lr=1e-4),
              metrics = ['accuracy'])

In [None]:
######################
### 5. Train Model ###
######################

# The Keras deep learning library includes three separate functions that can be used to train your own models:
## .fit()
## .fit_generator()
## .train_on_batch()

# Number of training samples in images/train folder
train_num_files = len(os.listdir(train_OK_dir)) + len(os.listdir(train_NG_dir))
train_num_files

val_num_files = len(os.listdir(validation_OK_dir)) + len(os.listdir(validation_NG_dir))
val_num_files

history = model.fit_generator(train_generator,
                             steps_per_epoch  = steps_per_epoch,
                             epochs           = epochs,
                             validation_data  = validation_generator,
                             validation_steps = validation_steps)

# Save Model
try:
    os.mkdir(projectFolder + '/Models')
except FileExistsError:
    print("Folder already exists.")
model.save(projectFolder + "/Models/" + model_name)
print("Saved model " + model_name + " in " + projectFolder + '/Models')

In [None]:
######################################################
### 6. Visualize Improvements in Accuracy and Loss ###
######################################################

acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_ = range(1, len(acc) + 1)

# Plot Accuracy
plt.plot(epochs_, acc, 'g', label='Training acc')
plt.plot(epochs_, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

# Plot Loss
plt.plot(epochs_, loss, 'g', label='Training loss')
plt.plot(epochs_, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

In [None]:
#####################
### 7. Test Model ###
#####################

# Load Model
model = models.load_model(projectFolder + '/Models/' + model_name)

test_datagen = ImageDataGenerator(rescale=1./255) # Convention to name it XX_datagen

# Apply transformation settings onto test_dir
# test_dir (540 + 540 = 1080 images)
# ImageDataGenerator().flow_from_directory(directory)
test_generator = test_datagen.flow_from_directory(
    test_dir, 
    target_size=(256,256), # target_size(height, width), dimentions to which all images wil be resized to
    batch_size=bs,         # size of batch of data (default = 32)
    class_mode = 'binary') # class_mode = binary because either OK or NG (output = 1D binary labels)

loss, acc = model.evaluate(test_generator[0][0], test_generator[0][1])
print('Test loss: %s, Test acc: %s' % (loss, acc))