In [116]:
#imports 

import numpy as np
import os, shutil

import tensorflow as tf
from tensorflow.keras import models, layers
from tensorflow.keras.preprocessing.image import load_img
 
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.models import Sequential

from sklearn.model_selection import train_test_split, cross_val_score

import time
import scipy
from scipy import ndimage
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from PIL import Image
from sklearn.metrics import plot_confusion_matrix, confusion_matrix, classification_report, recall_score, ConfusionMatrixDisplay
from sklearn.dummy import DummyClassifier
from sklearn.metrics import plot_confusion_matrix

## Image Load In

In [117]:
# load in the train data

#starting from main project directory

train_normal_dir = "./data/chest_xray/train/NORMAL"
train_pneumonia_dir = "./data/chest_xray/train/PNEUMONIA"

imgs_train_normal = [file for file in os.listdir(train_normal_dir) if file.endswith('.jpeg')]
imgs_train_pneumonia = [file for file in os.listdir(train_pneumonia_dir) if file.endswith('.jpeg')]

In [118]:
test_normal_dir = "./data/chest_xray/test/NORMAL"
test_pneumonia_dir = "./data/chest_xray/test/PNEUMONIA"

imgs_test_normal = [file for file in os.listdir(test_normal_dir) if file.endswith('.jpeg')]
imgs_test_pneumonia = [file for file in os.listdir(test_pneumonia_dir) if file.endswith('.jpeg')]

In [119]:
len(imgs_train_normal)

1341

In [120]:
len(imgs_train_pneumonia)

3875

In [121]:
imgs_train_normal[0]

'IM-0115-0001.jpeg'

In [122]:
imgs_train_pneumonia[0]

'person1000_bacteria_2931.jpeg'

In [123]:
len(imgs_test_normal)

234

In [124]:
len(imgs_test_pneumonia)

390

In [125]:
train_folder = "./data/chest_xray/train"
train_reshape = ImageDataGenerator(rescale=1./255).flow_from_directory(
        train_folder, 
        target_size=(64, 64), batch_size=5216)

test_folder = "./data/chest_xray/test"
test_reshape = ImageDataGenerator(rescale=1./255).flow_from_directory(
        test_folder, 
        target_size=(64, 64), batch_size=468)

Found 5216 images belonging to 2 classes.
Found 624 images belonging to 2 classes.


In [126]:
train_images, train_labels = next(train_reshape)
test_images, test_labels = next(test_reshape)

In [127]:
train_images[0]

array([[[0.28235295, 0.28235295, 0.28235295],
        [0.41176474, 0.41176474, 0.41176474],
        [0.47450984, 0.47450984, 0.47450984],
        ...,
        [0.32156864, 0.32156864, 0.32156864],
        [0.3372549 , 0.3372549 , 0.3372549 ],
        [0.28627452, 0.28627452, 0.28627452]],

       [[0.37647063, 0.37647063, 0.37647063],
        [0.43137258, 0.43137258, 0.43137258],
        [0.5372549 , 0.5372549 , 0.5372549 ],
        ...,
        [0.34509805, 0.34509805, 0.34509805],
        [0.3254902 , 0.3254902 , 0.3254902 ],
        [0.30980393, 0.30980393, 0.30980393]],

       [[0.43137258, 0.43137258, 0.43137258],
        [0.4901961 , 0.4901961 , 0.4901961 ],
        [0.6       , 0.6       , 0.6       ],
        ...,
        [0.29411766, 0.29411766, 0.29411766],
        [0.25882354, 0.25882354, 0.25882354],
        [0.28235295, 0.28235295, 0.28235295]],

       ...,

       [[0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        ],
        [0.        , 0

In [128]:
train_images.shape

(5216, 64, 64, 3)

In [129]:
train_images[0].shape

(64, 64, 3)

In [130]:
train_labels[0]

array([1., 0.], dtype=float32)

In [131]:
train_labels.shape

(5216, 2)

In [132]:
#split training set into a validation set for metrics during model fit
train_images, validation_images, train_labels, validation_labels = train_test_split(train_images, train_labels, random_state=42)

In [133]:
train_images.shape

(3912, 64, 64, 3)

In [134]:
validation_images.shape

(1304, 64, 64, 3)

In [135]:
train_img = train_images.reshape(train_images.shape[0], -1)
validation_img = validation_images.reshape(validation_images.shape[0], -1)
test_img = test_images.reshape(test_images.shape[0], -1)

print(train_img.shape)
print(validation_img.shape)
print(test_img.shape)

(3912, 12288)
(1304, 12288)
(468, 12288)


In [136]:
train_labels.shape

(3912, 2)

In [137]:
validation_labels.shape

(1304, 2)

In [138]:
test_labels.shape

(468, 2)

In [139]:
train_y = np.reshape(train_labels[:,0], (3912,1))
validation_y = np.reshape(validation_labels[:,0], (1304,1))
test_y = np.reshape(test_labels[:,0], (468,1))

print(train_y.shape)
print(validation_y.shape)
print(test_y.shape)

(3912, 1)
(1304, 1)
(468, 1)


## Dummy Model

In [140]:
dummy = DummyClassifier(strategy='most_frequent')
dummy.fit(train_images, train_labels)
dummy_score = dummy.score(validation_images, validation_labels)
dummy_score

0.7392638036809815

In [143]:
plot_confusion_matrix(dummy, validation_images, validation_labels)

ValueError: multilabel-indicator is not supported

## Baseline Model

In [None]:
baseline_model = models.Sequential()

In [None]:
baseline_model.add(layers.Dense(20, activation='relu', input_shape=(12_288,)))
baseline_model.add(layers.Dense(7, activation='relu'))
baseline_model.add(layers.Dense(5, activation='relu'))
baseline_model.add(layers.Dense(1, activation='sigmoid'))

In [None]:
baseline_model.summary()

In [None]:
baseline_model.compile(optimizer='sgd', 
              loss='binary_crossentropy', 
              metrics=['accuracy'])

In [144]:
baseline_model.fit(train_img, 
                   train_y, 
                   epochs=50, 
                   batch_size=32, 
                   validation_data=(validation_img, validation_y))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x1d51b3bce20>

In [145]:
results_train = baseline_model.evaluate(train_img, train_y)
results_train



[0.5687029361724854, 0.7441206574440002]

In [146]:
results_test = baseline_model.evaluate(test_img, test_y)
results_test



[0.6903402209281921, 0.6303418874740601]

## CNN Model

In [147]:
cnn = models.Sequential()
cnn.add(layers.Conv2D(filters=64, kernel_size=(3,3), activation='relu', 
          input_shape=(64, 64, 1)))
cnn.add(layers.MaxPooling2D(pool_size=(2,2)))
cnn.add(layers.Conv2D(filters=64, kernel_size=(3,3), activation='relu'))
cnn.add(layers.Flatten())
cnn.add(layers.Dense(1, activation='sigmoid'))
cnn.summary()

Model: "sequential_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_8 (Conv2D)            (None, 62, 62, 64)        640       
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 31, 31, 64)        0         
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 29, 29, 64)        36928     
_________________________________________________________________
flatten_4 (Flatten)          (None, 53824)             0         
_________________________________________________________________
dense_22 (Dense)             (None, 1)                 53825     
Total params: 91,393
Trainable params: 91,393
Non-trainable params: 0
_________________________________________________________________


In [148]:
cnn_test = cnn.fit(train_img, train_y,
                    epochs=15,
                    batch_size=50,
                    validation_data=(validation_img, validation_y))

RuntimeError: You must compile your model before training/testing. Use `model.compile(optimizer, loss)`.

## Prebuilt Model(VVG19)

In [74]:
from keras.applications import VGG19
cnn_base = VGG19(weights='imagenet', 
                 include_top=False, 
                 input_shape=(64, 64, 3))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5


In [75]:
cnn_base.summary()

Model: "vgg19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 64, 64, 3)]       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 64, 64, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 64, 64, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 32, 32, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 32, 32, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 32, 32, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 16, 16, 128)       0     

In [111]:
import datetime
start = datetime.datetime.now()
import os, shutil
import time
import matplotlib.pyplot as plt
%matplotlib inline
import scipy
import numpy as np
from PIL import Image
from scipy import ndimage
from keras.preprocessing.image import ImageDataGenerator, array_to_img
np.random.seed(123)

In [112]:
train_folder = './data/chest_xray/train'
test_folder = './data/chest_xray/test'
val_folder = './data/chest_xray/val'
datagen = ImageDataGenerator(rescale=1./255) 
batch_size = 10

In [113]:
from keras import models
from keras import layers
from keras import optimizers

model = models.Sequential()
model.add(layers.Dense(256, activation='relu', input_dim=10*2*2*512))
model.add(layers.Dense(1, activation='sigmoid'))

model.compile(optimizer=optimizers.RMSprop(lr=1e-4),
              loss='binary_crossentropy',
              metrics=['acc'])

history = model.fit(train_img, 
                   train_y, 
                   epochs=50, 
                   batch_size=32, 
                   validation_data=(validation_img, validation_y))

Epoch 1/50


ValueError: in user code:

    C:\Users\Peter\anaconda3\envs\learn-env\lib\site-packages\tensorflow\python\keras\engine\training.py:806 train_function  *
        return step_function(self, iterator)
    C:\Users\Peter\anaconda3\envs\learn-env\lib\site-packages\tensorflow\python\keras\engine\training.py:796 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    C:\Users\Peter\anaconda3\envs\learn-env\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1211 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    C:\Users\Peter\anaconda3\envs\learn-env\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2585 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    C:\Users\Peter\anaconda3\envs\learn-env\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2945 _call_for_each_replica
        return fn(*args, **kwargs)
    C:\Users\Peter\anaconda3\envs\learn-env\lib\site-packages\tensorflow\python\keras\engine\training.py:789 run_step  **
        outputs = model.train_step(data)
    C:\Users\Peter\anaconda3\envs\learn-env\lib\site-packages\tensorflow\python\keras\engine\training.py:747 train_step
        y_pred = self(x, training=True)
    C:\Users\Peter\anaconda3\envs\learn-env\lib\site-packages\tensorflow\python\keras\engine\base_layer.py:975 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs,
    C:\Users\Peter\anaconda3\envs\learn-env\lib\site-packages\tensorflow\python\keras\engine\input_spec.py:212 assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer sequential_10 is incompatible with the layer: expected axis -1 of input shape to have value 20480 but received input with shape [None, 12288]
