In [89]:
import glob
import os

from skimage import io
import numpy as np
import cv2

from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Activation, Dropout, Flatten, Dense
from keras.optimizers import Adam
from keras import backend as K
from sklearn.metrics import accuracy_score, confusion_matrix

In [90]:
img_width, img_height = 224, 224
train_data_dir = 'processed_data/train'
validation_data_dir = 'processed_data/validation'

In [91]:
def get_n_files(directory):
    count = 0
    for _, _, files in os.walk(directory):
        count += len(files)
    return count

In [92]:
n_train_samples = get_n_files(train_data_dir)
n_validation_samples = get_n_files(validation_data_dir)
epochs = 20
batch_size = 32
input_shape = (img_width, img_height, 3)

In [93]:
model = Sequential()

model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))

model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(1, activation='sigmoid'))

In [94]:
optimizer = Adam(learning_rate=0.001)
model.compile(loss='binary_crossentropy',
              optimizer=optimizer,
              metrics=['accuracy'])

In [95]:
train_datagen = ImageDataGenerator(rescale=1. / 255, shear_range=0.2, zoom_range=0.1, horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1. / 255)

train_generator = train_datagen.flow_from_directory(train_data_dir, target_size=(img_width, img_height),
                                                    batch_size=batch_size, class_mode='binary')

validation_generator = test_datagen.flow_from_directory(validation_data_dir, target_size=(img_width, img_height),
                                                        batch_size=batch_size, class_mode='binary')

history = model.fit_generator(train_generator, steps_per_epoch=n_train_samples // batch_size, epochs=epochs,
                              validation_data=validation_generator, validation_steps=n_validation_samples // batch_size)


Found 381 images belonging to 2 classes.
Found 94 images belonging to 2 classes.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [131]:
images = []

n_damaged = 0
n_undamaged = 0

# load images from the validation directory and count the number
# of instances belonging to each of the classes
for directory in ['undamaged', 'damaged']:
    full_img_paths = glob.glob(os.path.join(validation_data_dir, directory, '*'))
    for img_path in full_img_paths:
        try:
            image = io.imread(img_path)
            image = cv2.resize(image, (224, 224))
            images.append(image.reshape(1, 224, 224, 3))
            n_damaged += int('undamaged' not in directory)
            n_undamaged += int('undamaged' in directory)
        except ValueError:
            continue

In [132]:
# check which class is represented by 1
class_dict = validation_generator.class_indices
print(class_dict)

{'damaged': 0, 'undamaged': 1}


In [133]:
# create a vector of true classes in the validation set
y_test = n_undamaged * [class_dict['undamaged']] + n_damaged * [class_dict['damaged']]

In [134]:
# baseline model - random prediction
y_pred_random = np.random.randint(0, 1, len(images))

# baseline model - always predicts most prevalent class
y_pred_mono = len(images) * [max(set(y_test), key=y_test.count)]

# baseline accuracies
print('Accuracy (random model): ', accuracy_score(y_test, y_pred_random))
print('Accuracy (model always predicting the most prevalent class): ', accuracy_score(y_test, y_pred_mono))

Accuracy (random model):  0.4787234042553192
Accuracy (model always predicting the most prevalent class):  0.5212765957446809


In [135]:
# make predictions using trained model
images = np.vstack(images)
images = images / 255.0
y_pred = model.predict_classes(images)

In [136]:
print('Model accuracy: ', accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

Model accuracy:  0.7553191489361702
[[33 12]
 [11 38]]
