# Keras Test for medical image dataset as described in Fast AI course

In [3]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [4]:
PATH = "data/"
sz=224
batch_size=64

In [40]:
import numpy as np
import datetime
import os
import pickle
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras.layers import Dropout, Flatten, Dense
from keras.applications import ResNet50
from keras.models import Model, Sequential
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as K
from keras.applications.resnet50 import preprocess_input

###### Initial Data handling

In [22]:
base_dir = 'data'
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')


main_classification_types = ['dyed-lifted-polyps',
                             'dyed-resection-margins',
                             'esophagitis',
                             'normal-cecum',
                             'normal-pylorus',
                             'normal-z-line',
                             'polyps',
                             'ulcerative-colitis']

# original data location path
orginal_data_folder = '/home/vajira/simula/Datasets/kvasir_v2_preprocessed_borders_navbox_removed'

In [23]:
no_of_classes = 8
train_size = 500
validation_size = 250
test_size = 250

img_height = 150
img_width = 150

batch_size = 50
no_of_epochs = 5

training_steps_per_epoch = (train_size * no_of_classes) / batch_size

validation_steps = (validation_size * no_of_classes) / batch_size

In [6]:
train_data_dir = f'{PATH}train'
validation_data_dir = f'{PATH}validation'

In [12]:
def my_plots(history_object, model_name_string):
    plt.figure()

    acc = history_object.history['acc']
    val_acc = history_object.history['val_acc']
    loss = history_object.history['loss']
    val_loss = history_object.history['val_loss']

    epochs = range(1, len(acc)+1)

    plt.plot(epochs, acc, 'bo', label='Training acc')
    plt.plot(epochs, val_acc, 'b', label='Validation acc')
    title_string = 'Training and validation accuracy =' + model_name_string
    plt.title(title_string)
    plt.legend()

    # To save plots - added by me
    plot_name = title_string + '.tiff'
    plot_fname = os.path.join(plot_dir, plot_name)
    plt.savefig(plot_fname)
    ##############################

    plt.figure()

    plt.plot(epochs, loss, 'bo', label='Training loss')
    plt.plot(epochs, val_loss, 'b', label='Validation loss')
    title_string = 'Training and validation loss' + model_name_string
    plt.title(title_string)
    plt.legend()

    # To save plots - added by me
    plot_name = title_string + '.tiff'
    plot_fname = os.path.join(plot_dir, plot_name)
    plt.savefig(plot_fname)

In [16]:
def save_model(model_object):
    model_name_string = 'medical_v1_fine_tunned' + datetime.datetime.now().strftime("%Y-%m-%d %H:%M") + '.h5'
    model_fname = os.path.join(model_dir, model_name_string)
    model_object.save(model_fname)
    print('Fined tunened Model saved, model name =',model_fname)
    return model_name_string

In [25]:
def make_folder_structure(base_dir, main_classification_types_dirs):

    train_dir_list = []
    validation_dir_list = []
    test_dir_list = []


    if not os.path.exists(base_dir):
        os.mkdir(base_dir)  # new directory to make directory structure and load data

    main_directory_names = ['train', 'validation', 'test']

    for main_dir_name in main_directory_names:
        dir = os.path.join(base_dir, main_dir_name)
        if not os.path.exists(dir):
            os.mkdir(dir)

        for main_classification_types_dir in main_classification_types_dirs:
            type_dir = os.path.join(dir, main_classification_types_dir)

            if main_dir_name == 'train':
                train_dir_list.append(type_dir)

            elif main_dir_name == 'validation':
                validation_dir_list.append(type_dir)

            elif main_dir_name == 'test':
                test_dir_list.append(type_dir)

            if not os.path.exists(type_dir):
                os.mkdir(type_dir)

    return train_dir_list, validation_dir_list, test_dir_list


In [41]:
def save_training_history(history_object):
    history_string= 'history_of_medical_v1' + datetime.datetime.now().strftime("%Y-%m-%d %H:%M")
    history_fname = os.path.join(history_dir, history_string)
    f = open(history_fname, "wb+")
    pickle.dump(history_object.history, f)
    f.close()
    print('History of the model saved, history model name =', history_fname)
    return history_fname
    

In [26]:
make_folder_structure(base_dir, main_classification_types)  # making the folder structure

(['data/train/dyed-lifted-polyps',
  'data/train/dyed-resection-margins',
  'data/train/esophagitis',
  'data/train/normal-cecum',
  'data/train/normal-pylorus',
  'data/train/normal-z-line',
  'data/train/polyps',
  'data/train/ulcerative-colitis'],
 ['data/validation/dyed-lifted-polyps',
  'data/validation/dyed-resection-margins',
  'data/validation/esophagitis',
  'data/validation/normal-cecum',
  'data/validation/normal-pylorus',
  'data/validation/normal-z-line',
  'data/validation/polyps',
  'data/validation/ulcerative-colitis'],
 ['data/test/dyed-lifted-polyps',
  'data/test/dyed-resection-margins',
  'data/test/esophagitis',
  'data/test/normal-cecum',
  'data/test/normal-pylorus',
  'data/test/normal-z-line',
  'data/test/polyps',
  'data/test/ulcerative-colitis'])

In [28]:
# the model directory
model_dir = 'my_models'
if not os.path.exists(model_dir):
    os.mkdir(model_dir, mode=0o777)

# history saving directory
history_dir = 'history_of_training'
if not os.path.exists(history_dir):
    os.mkdir(history_dir)

# plot directory
plot_dir = 'plots'
if not os.path.exists(plot_dir):
    os.mkdir(plot_dir)


In [8]:
train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input,
    shear_range=0.2, zoom_range=0.2, horizontal_flip=True)

test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

train_generator = train_datagen.flow_from_directory(train_data_dir,
    target_size=(sz, sz),
    batch_size=batch_size, class_mode='categorical')

validation_generator = test_datagen.flow_from_directory(validation_data_dir,
    shuffle=False,
    target_size=(sz, sz),
    batch_size=batch_size, class_mode='categorical')

Found 4000 images belonging to 8 classes.
Found 2000 images belonging to 8 classes.


In [9]:
base_model = ResNet50(weights='imagenet', include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(8, activation='softmax')(x)

In [14]:
model = Model(inputs=base_model.input, outputs=predictions)
for layer in base_model.layers: layer.trainable = False
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

In [36]:
%%time
history = model.fit_generator(train_generator, train_generator.n // batch_size, epochs=3, workers=4,
        validation_data=validation_generator, validation_steps=validation_generator.n // batch_size)

Epoch 1/3
Epoch 2/3
Epoch 3/3
CPU times: user 8min 53s, sys: 11.4 s, total: 9min 5s
Wall time: 1min 59s


In [37]:
model_name = save_model(model)

Fined tunened Model saved, model name = my_models/medical_v1_fine_tunned2018-07-10 14:37.h5


In [42]:
history_name = save_training_history(history)

History of the model saved, history model name = history_of_training/history_of_medical_v12018-07-10 14:39


In [43]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, None, None, 3 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, None, None, 3 0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, None, None, 6 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, None, None, 6 256         conv1[0][0]                      
__________________________________________________________________________________________________
activation

In [33]:
len(model.layers)

177

In [44]:
split_at = 140
for layer in model.layers[:split_at]: layer.trainable = False
for layer in model.layers[split_at:]: layer.trainable = True
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])   

In [45]:
%%time
model.fit_generator(train_generator, train_generator.n // batch_size, epochs=1, workers=3,
        validation_data=validation_generator, validation_steps=validation_generator.n // batch_size)

Epoch 1/1
CPU times: user 2min 50s, sys: 4.93 s, total: 2min 54s
Wall time: 49.2 s


<keras.callbacks.History at 0x7fa2d5bb94e0>