In [15]:
from sklearn.datasets import load_files
from keras.utils import np_utils
import numpy as np
import pandas as pd
from glob import glob
from IPython.core.display import Image, display
import cv2                
import matplotlib.pyplot as plt                        
%matplotlib inline
from collections import Counter

from sklearn.metrics import classification_report

from keras import models, layers, optimizers
from keras.preprocessing import image
from keras.applications.vgg19 import VGG19
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input, decode_predictions
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import VGG16
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.callbacks import ModelCheckpoint
from tqdm import tqdm
import math
import pickle
import os
from PIL import ImageFile
import time

from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dropout, Flatten, Dense, Activation
from keras.models import Sequential
import tensorflow as tf
from tensorflow.keras.callbacks import TensorBoard
# from keras.callbacks import TensorBoard

In [41]:
# define function to load train, test, and validation datasets
def load_dataset(path):
    """Returns the path and the Label from the folder"""
    data = load_files(path)
    chest_files = np.array(data['filenames'])
    chest_targets = np_utils.to_categorical(np.array(data['target']), 2)
    return chest_files, chest_targets

# load train, test, and validation datasets
train_files, train_targets = load_dataset('../imgs/images/train')
test_files, test_targets = load_dataset('../imgs/images/test')

# load list of dog names
labels = [item[21:-1] for item in sorted(glob("../imgs/images/train/*/"))]
CLASSES = len(labels)

#prop
train_prop = np.count_nonzero(train_targets, axis=0) / len(train_targets)
test_prop = np.count_nonzero(test_targets, axis=0) / len(test_targets)
print('Proportions: \n')
for index, label in enumerate(labels):
    print('{} train: {:.4f}'.format(label, train_prop[index]*100))
    print('{} test: {:.4f}'.format(label, test_prop[index]*100))

print('\nStatistics about the Dataset:\n')
print('There are %d total chest deseases.' % len(labels))
print('There are %s total chest images.\n' % len(np.hstack([train_files, test_files])))
print('There are %d training chest images.' % len(train_files))
print('There are %d test chest images.'% len(test_files))

Proportions

Finding train: 39.8562
Finding test: 65.9772
Proportions

No Finding train: 60.1438
No Finding test: 34.0228
Statistics about the Dataset:

There are 2 total chest deseases.
There are 4999 total chest images.

There are 4032 training chest images.
There are 967 test chest images.


In [40]:
prop_label = np.count_nonzero(train_targets, axis=0) / len(train_targets)
for index, label in enumerate(labels):
    print('{} train: {:.4f}'.format(label, prop_label[index]*100))

Finding train: 39.8562
No Finding train: 60.1438


In [37]:
finding_prop_train, no_finding_prop_train = np.count_nonzero(train_targets, axis=0) / len(train_targets)
no_finding_prop_train

0.6014384920634921

# Create Generators

In [3]:
img_width, img_height, channels = 224, 224, 3

top_model_weights_path = 'bottleneck_fc_model.h5'
train_data_dir = '../imgs/images/train'
test_data_dir = '../imgs/images/test'



train_batch_size = 30
test_batch_size = 10

# ### Load VGG model
# model = ResNet50(weights= 'imagenet', include_top=False, input_shape=(img_height, img_width, channels))

# ### Freeze some layers
# for layer in model.layers:
#     layer.trainable = False

# ### Check the trainable status of the individual layers
# for layer in model.layers:
#     print(layer, layer.trainable)

print('**********************TRAIN GENERATOR**********************')
### Train Generator
train_datagen = ImageDataGenerator(rescale=1. / 255,
                                  samplewise_center=True, 
                                  samplewise_std_normalization=True, 
                                  horizontal_flip = True, 
                                  vertical_flip = False, 
                                  height_shift_range= 0.05, 
                                  width_shift_range=0.1, 
                                  rotation_range=5, 
                                  shear_range = 0.1,
                                  fill_mode = 'reflect',
                                  zoom_range=0.15)


train_generator = train_datagen.flow_from_directory(train_data_dir,
                                                    target_size=(img_width, img_height),
                                                    batch_size = train_batch_size,
                                                    class_mode = 'categorical',
                                                    shuffle=False)
                                                    #color_mode = 'grayscale'
    

print('**********************TEST GENERATOR**********************')
### Test Generator
test_datagen = ImageDataGenerator(rescale=1. / 255)
test_generator = test_datagen.flow_from_directory(test_data_dir,
                                                    target_size=(img_width, img_height),
                                                    batch_size=test_batch_size,
                                                    class_mode='categorical',
                                                    shuffle=False)

**********************TRAIN GENERATOR**********************
Found 4032 images belonging to 2 classes.
**********************TEST GENERATOR**********************
Found 967 images belonging to 2 classes.


# CNN
## Architecture

In [4]:
nb_train_samples = 4032
nb_test_samples = 967

# Define the architecture
model = ResNet50(weights= 'imagenet', include_top=False, input_shape=(img_height, img_width, channels)) # or weights=None
ChestRESN50_model = Sequential()
ChestRESN50_model.add(model)

ChestRESN50_model.add(layers.Flatten())

ChestRESN50_model.add(layers.Dense(256))
ChestRESN50_model.add(layers.BatchNormalization())
ChestRESN50_model.add(layers.Activation('relu'))
ChestRESN50_model.add(layers.Dropout(0.5))

ChestRESN50_model.add(layers.Dense(2))
ChestRESN50_model.add(layers.BatchNormalization())
ChestRESN50_model.add(layers.Activation('softmax'))
ChestRESN50_model.add(layers.Dropout(0.5))

adam = optimizers.Adam(lr=0.0001)
ChestRESN50_model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])

ChestRESN50_model.summary()



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Model)             (None, 7, 7, 2048)        23587712  
_________________________________________________________________
flatten_1 (Flatten)          (None, 100352)            0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               25690368  
_________________________________________________________________
batch_normalization_1 (Batch (None, 256)               1024      
_________________________________________________________________
activation_50 (Activation)   (None, 256)               0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 514       
__________

## Fit

In [5]:
checkpointer = ModelCheckpoint(filepath='weights.best.RESN50.hdf5', verbose=1, save_best_only=True)

History = ChestRESN50_model.fit_generator(train_generator, 
                                        epochs=1,
                                        validation_data = test_generator,
                                        validation_steps = test_generator.samples / test_generator.batch_size,  
                                        steps_per_epoch = train_generator.samples / train_generator.batch_size, 
                                        callbacks=[checkpointer], verbose=1, shuffle=False)
#use_multiprocessing=True

Epoch 1/2

KeyboardInterrupt: 

In [None]:
### Load the model weights with the best validation loss.
ChestRes50_model.load_weights('weights.best.RESN50.hdf5')

### Calculate classification accuracy on the test dataset.
# get index of predicted dog breed for each image in test set
predictions = [np.argmax(ChestRes50_model.predict(np.expand_dims(feature, axis=0))) for feature in test_RESNET]

# report test accuracy
test_accuracy = 100*np.sum(np.array(predictions)==np.argmax(test_targets, axis=1)[:960])/len(predictions)
print('Test accuracy: %.4f%%' % test_accuracy)

In [None]:
plt.figure(1, figsize=(10,10))  

# summarize history for accuracy  

plt.subplot(211)  
plt.plot(History.history['acc'])  
plt.plot(History.history['val_acc'])  
plt.title('Model Accuracy')  
plt.ylabel('Accuracy')  
plt.xlabel('Epoch')  
plt.legend(['train', 'val'], loc='upper left')  

# summarize history for loss  

plt.subplot(212)  
plt.plot(History.history['loss'])  
plt.plot(History.history['val_loss'])  
plt.title('Model Loss')  
plt.ylabel('Loss')  
plt.xlabel('Epoch')  
plt.legend(['train', 'val'], loc='upper left')  
plt.show()

In [None]:
# Classification Report
y_true = np.argmax(test_targets, axis=1)
print(classification_report(y_true[:960], predictions, target_names=labels))