In [45]:
import numpy as np
import pandas as pd
from glob import glob
from sklearn.datasets import load_files

from keras.preprocessing.image import ImageDataGenerator
from keras.applications import VGG16, DenseNet121
from keras.utils import np_utils
from keras.applications.inception_v3 import InceptionV3
from keras.callbacks import ModelCheckpoint, LearningRateScheduler

from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, BatchNormalization
from keras.layers import Dropout, Flatten, Dense, Activation
from keras.models import Sequential, Model
from keras.callbacks import ModelCheckpoint

In [8]:
# define function to load train, test, and validation datasets
def load_dataset(path, n_classes):
    """Returns the path and the Label from the folder"""
    data = load_files(path)
    chest_files = np.array(data['filenames'])
    chest_targets = np_utils.to_categorical(np.array(data['target']), n_classes)
    return chest_files, chest_targets

# load list of dog names
labels = [item[18:-1] for item in sorted(glob("../imgs/all/train/*/"))]
n_classes = len(labels)

# load train, test, and validation datasets
train_files, train_targets = load_dataset('../imgs/all/train', n_classes)
test_files, test_targets = load_dataset('../imgs/all/test', n_classes)

# Img size
img_width, img_height, channels = 224, 224, 3

#proportions
train_prop = np.count_nonzero(train_targets, axis=0) / len(train_targets)
test_prop = np.count_nonzero(test_targets, axis=0) / len(test_targets)

print('Proportions: \n')
for index, label in enumerate(labels):
    print('{} train: {:.4f}'.format(label, train_prop[index]*100))
    print('{} test: {:.4f}'.format(label, test_prop[index]*100))
    print('*********************')

print('\nStatistics about the Dataset:\n')
print('There are %d total chest deseases.' % len(labels))
print('There are %s total chest images.\n' % len(np.hstack([train_files, test_files])))
print('There are %d training chest images.' % len(train_files))
print('There are %d test chest images.'% len(test_files))

Proportions: 

Atelectasis train: 15.9895
Atelectasis test: 10.6340
*********************
Cardiomegaly train: 4.0766
Cardiomegaly test: 7.8382
*********************
Consolidation train: 4.0077
Consolidation test: 5.1922
*********************
Edema train: 1.2533
Edema test: 2.3465
*********************
Effusion train: 12.5603
Effusion test: 11.6326
*********************
Emphysema train: 3.4155
Emphysema test: 3.2451
*********************
Fibrosis train: 4.0628
Fibrosis test: 3.7943
*********************
Hernia train: 0.4132
Hernia test: 1.1483
*********************
Infiltration train: 24.4732
Infiltration test: 29.8552
*********************
Mass train: 6.9274
Mass test: 3.4448
*********************
Nodule train: 10.8525
Nodule test: 4.5432
*********************
Pleural_Thickening train: 4.3796
Pleural_Thickening test: 4.9426
*********************
Pneumonia train: 0.9778
Pneumonia test: 1.1982
*********************
Pneumothorax train: 6.6107
Pneumothorax test: 10.1847
*******************

In [33]:
cnn_model = DenseNet121(weights= 'imagenet', include_top=False, input_shape=(img_height, img_width, channels))
full_model = cnn_model.output
full_model = Dropout(0.5)(full_model)
full_model = GlobalAveragePooling2D()(full_model)
full_model = Dense(256, activation='relu')(full_model)
full_model = BatchNormalization()(full_model)
predictions = Dense(n_classes, activation='sigmoid')(full_model)

model = Model(inputs=cnn_model.input, outputs=predictions)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_15 (InputLayer)           (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
zero_padding2d_15 (ZeroPadding2 (None, 230, 230, 3)  0           input_15[0][0]                   
__________________________________________________________________________________________________
conv1/conv (Conv2D)             (None, 112, 112, 64) 9408        zero_padding2d_15[0][0]          
__________________________________________________________________________________________________
conv1/bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1/conv[0][0]                 
__________________________________________________________________________________________________
conv1/relu

In [32]:
print("There are {} layers in the model".format(len(model.layers)))

There are 432 layers in the model


In [None]:
# set the first 431 layers (up to the last conv block)
# to non-trainable (weights will not be updated)
for layer in model.layers[:431]:
    layer.trainable = False

In [42]:
train_data_dir = '../imgs/all/train'
validation_data_dir = '../imgs/all/test'
batch_size = 16
epochs = 2
nb_train_samples = len(train_files) // batch_size
nb_validation_samples = len(test_files) // batch_size

In [43]:
# prepare data augmentation configuration
train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1. / 255)

train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical')

validation_generator = test_datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical')

Found 7261 images belonging to 14 classes.
Found 2003 images belonging to 14 classes.


In [None]:
def lr_schedule(epoch):
    """Change the learning rate """
    lrate = 0.001
    if epoch > 10:
        lrate = 0.0005
    if epoch > 50:
        lrate = 0.0003
    return lrate

# checkpointer = ModelCheckpoint(filepath='weights.best.DENSNET121.hdf5', verbose=1, save_best_only=True)
model.fit_generator(train_generator, samples_per_epoch=nb_train_samples, epochs=epochs,
                    validation_data=validation_generator, nb_val_samples=nb_validation_samples,
                   callbacks=[LearningRateScheduler(lr_schedule)])

  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


Epoch 1/2