In [1]:
import numpy as np
from PIL import Image
import glob

from keras import layers, optimizers
from keras.layers import Input, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D
from keras.layers import AveragePooling2D, MaxPooling2D, Dropout, GlobalMaxPooling2D, GlobalAveragePooling2D
from keras.models import Model

Using TensorFlow backend.


In [2]:
def image_preprocessing(image_path):
    image_list = []
    for filename in glob.glob(image_path + '/*.jpeg'):   # create the path of image directory
        img = Image.open(filename)
        if img.mode == 'L':
            img = img.convert('RGB')
        img = img.resize((128,128))
        im = np.asarray(img, dtype=np.float32)/255
        image_list.append(im)
    return np.array(image_list)

In [3]:
test_normal = image_preprocessing('./chest_xray/test/NORMAL')
test_sick = image_preprocessing('./chest_xray/test/PNEUMONIA')

train_normal = image_preprocessing('./chest_xray/train/NORMAL')
train_sick = image_preprocessing('./chest_xray/train/PNEUMONIA')

val_normal = image_preprocessing('./chest_xray/val/NORMAL')
val_sick = image_preprocessing('./chest_xray/val/PNEUMONIA')

print("test_normal's shape is {}".format(test_normal.shape))
print("test_sick's shape is {}".format(test_sick.shape))
print("train_normal's shape is {}".format(train_normal.shape))
print("train_sick's shape is {}".format(train_sick.shape))
print("val_normal's shape is {}".format(val_normal.shape))
print("val_sick's shape is {}".format(val_sick.shape))

test_normal's shape is (234, 128, 128, 3)
test_sick's shape is (390, 128, 128, 3)
train_normal's shape is (1341, 128, 128, 3)
train_sick's shape is (3875, 128, 128, 3)
val_normal's shape is (8, 128, 128, 3)
val_sick's shape is (8, 128, 128, 3)


In [4]:
test_normal_y = np.zeros((test_normal.shape[0],1))
train_normal_y = np.zeros((train_normal.shape[0],1))
val_normal_y = np.zeros((val_normal.shape[0],1))
test_sick_y = np.ones((test_sick.shape[0],1))
train_sick_y = np.ones((train_sick.shape[0],1))
val_sick_y = np.ones((val_sick.shape[0],1))

print("test_normal_y's shape is {}".format(test_normal_y.shape))
print("test_sick_y's shape is {}".format(test_sick_y.shape))
print("train_normal_y's shape is {}".format(train_normal_y.shape))
print("train_sick_y's shape is {}".format(train_sick_y.shape))
print("val_normal_y's shape is {}".format(val_normal_y.shape))
print("val_sick_y's shape is {}".format(val_sick_y.shape))
print(test_normal_y[12])

test_normal_y's shape is (234, 1)
test_sick_y's shape is (390, 1)
train_normal_y's shape is (1341, 1)
train_sick_y's shape is (3875, 1)
val_normal_y's shape is (8, 1)
val_sick_y's shape is (8, 1)
[0.]


In [5]:
test_X = np.concatenate((test_normal, test_sick))
test_Y = np.concatenate((test_normal_y, test_sick_y))
train_X = np.concatenate((train_normal, train_sick))
train_Y = np.concatenate((train_normal_y, train_sick_y))
val_X = np.concatenate((val_normal, val_sick))
val_Y = np.concatenate((val_normal_y, val_sick_y))

print(train_X.shape)
print(train_Y.shape)
print(test_X.shape)
print(test_Y.shape)
print(val_X.shape)
print(val_Y.shape)
print(train_X[0])

(5216, 128, 128, 3)
(5216, 1)
(624, 128, 128, 3)
(624, 1)
(16, 128, 128, 3)
(16, 1)
[[[0.10588235 0.10588235 0.10588235]
  [0.10588235 0.10588235 0.10588235]
  [0.11372549 0.11372549 0.11372549]
  ...
  [0.10196079 0.10196079 0.10196079]
  [0.11764706 0.11764706 0.11764706]
  [0.10980392 0.10980392 0.10980392]]

 [[0.10588235 0.10588235 0.10588235]
  [0.11764706 0.11764706 0.11764706]
  [0.11372549 0.11372549 0.11372549]
  ...
  [0.10980392 0.10980392 0.10980392]
  [0.10980392 0.10980392 0.10980392]
  [0.11372549 0.11372549 0.11372549]]

 [[0.10588235 0.10588235 0.10588235]
  [0.11764706 0.11764706 0.11764706]
  [0.10588235 0.10588235 0.10588235]
  ...
  [0.10980392 0.10980392 0.10980392]
  [0.11372549 0.11372549 0.11372549]
  [0.11372549 0.11372549 0.11372549]]

 ...

 [[0.         0.         0.        ]
  [0.         0.         0.        ]
  [0.09411765 0.09411765 0.09411765]
  ...
  [0.09019608 0.09019608 0.09019608]
  [0.09411765 0.09411765 0.09411765]
  [0.09411765 0.09411765 0.09

In [6]:
# shuffle the data sets
def data_shuffle(X,Y):
    s = np.arange(X.shape[0])
    np.random.shuffle(s)
    return X[s], Y[s]

In [7]:
seed = 100
np.random.seed(seed)
train_X_shuffle, train_Y_shuffle = data_shuffle(train_X, train_Y)
test_X_shuffle, test_Y_shuffle = data_shuffle(test_X, test_Y)
val_X_shuffle, val_Y_shuffle = data_shuffle(val_X, val_Y)

In [8]:
# create a simple CNN model
def XrayModel(input_shape):
    """
    Implementation of the XrayModel.
    
    Arguments:
    input_shape -- shape of the images of the dataset

    Returns:
    model -- a Model() instance in Keras
    """
    
    X_input = Input(input_shape)
    
#     X = ZeroPadding2D((3,3))(X_input)
    X = Conv2D(32, (5,5), strides=(1,1), padding='same')(X_input)
    X = BatchNormalization(axis=3)(X)    
    X = Activation('relu')(X)
    X = MaxPooling2D((2,2))(X)
    
    X = Conv2D(64, (5,5), padding='same')(X)
    X = BatchNormalization(axis=3)(X)    
    X = Activation('relu')(X)
    X = MaxPooling2D((2,2))(X)
    
    X = Conv2D(128, (5,5), padding='same')(X)
    X = BatchNormalization(axis=3)(X)    
    X = Activation('relu')(X)
    X = MaxPooling2D((2,2))(X)
    
    X = Conv2D(256, (7,7))(X)
    X = BatchNormalization(axis=3)(X)    
    X = Activation('relu')(X)
    X = MaxPooling2D((2,2))(X)
    
    X = Flatten()(X)

    X = Dense(1, activation='sigmoid')(X)
    
    model = Model(inputs=X_input, outputs=X)
    
    
    return model

In [9]:
model = XrayModel((128,128,3))

In [10]:
opt = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999)

In [11]:
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=["accuracy"])

In [12]:
model.fit(x=train_X_shuffle, y=train_Y_shuffle, epochs=20, batch_size=16)

val_preds = model.evaluate(x=val_X_shuffle, y=val_Y_shuffle)

print()
print ("Loss = " + str(val_preds[0]))
print ("Test Accuracy = " + str(val_preds[1]))

test_preds = model.evaluate(x=test_X_shuffle, y=test_Y_shuffle)
### END CODE HERE ###
print()
print ("Loss = " + str(test_preds[0]))
print ("Test Accuracy = " + str(test_preds[1]))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
()
Loss = 0.3433024287223816
Test Accuracy = 0.875
()
Loss = 3.0093768132038607
Test Accuracy = 0.7227564102564102
