In [1]:
#imports 

import numpy as np
import os, shutil

import tensorflow as tf
from tensorflow.keras import models, layers
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.utils import image_dataset_from_directory 
from tensorflow.keras.models import Sequential

from sklearn.model_selection import train_test_split, cross_val_score

import time
import scipy
from scipy import ndimage
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from PIL import Image

## Image Load In

In [2]:
!ls

README.md
Yuhkai_notebook_scratch.ipynb
data
data.zip
notebooks_scratch


In [3]:
!cd ..

In [4]:
!ls

README.md
Yuhkai_notebook_scratch.ipynb
data
data.zip
notebooks_scratch


In [5]:
!ls

README.md
Yuhkai_notebook_scratch.ipynb
data
data.zip
notebooks_scratch


In [6]:
# load in the train data

#starting from main project directory

train_normal_dir = "./data/chest_xray/train/NORMAL"
train_pneumonia_dir = "./data/chest_xray/train/PNEUMONIA"

imgs_train_normal = [file for file in os.listdir(train_normal_dir) if file.endswith('.jpeg')]
imgs_train_pneumonia = [file for file in os.listdir(train_pneumonia_dir) if file.endswith('.jpeg')]

In [7]:
test_normal_dir = "./data/chest_xray/test/NORMAL"
test_pneumonia_dir = "./data/chest_xray/test/PNEUMONIA"

imgs_test_normal = [file for file in os.listdir(test_normal_dir) if file.endswith('.jpeg')]
imgs_test_pneumonia = [file for file in os.listdir(test_pneumonia_dir) if file.endswith('.jpeg')]

In [8]:
len(imgs_train_normal)

1341

In [9]:
len(imgs_train_pneumonia)

3875

In [10]:
imgs_train_normal[0]

'IM-0115-0001.jpeg'

In [11]:
imgs_train_pneumonia[0]

'person1000_bacteria_2931.jpeg'

In [12]:
len(imgs_test_normal)

234

In [13]:
len(imgs_test_pneumonia)

390

In [14]:
train_folder = "./data/chest_xray/train"
train_reshape = ImageDataGenerator(rescale=1./255).flow_from_directory(
        train_folder, 
        target_size=(64, 64), batch_size=5216)

test_folder = "./data/chest_xray/test"
test_reshape = ImageDataGenerator(rescale=1./255).flow_from_directory(
        test_folder, 
        target_size=(64, 64), batch_size=468)

Found 5216 images belonging to 2 classes.
Found 624 images belonging to 2 classes.


In [15]:
train_images, train_labels = next(train_reshape)
test_images, test_labels = next(test_reshape)

In [16]:
train_images[0]

array([[[0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        ],
        ...,
        [0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        ]],

       [[0.        , 0.        , 0.        ],
        [0.01568628, 0.01568628, 0.01568628],
        [0.        , 0.        , 0.        ],
        ...,
        [0.00784314, 0.00784314, 0.00784314],
        [0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        ]],

       [[0.        , 0.        , 0.        ],
        [1.        , 1.        , 1.        ],
        [0.00392157, 0.00392157, 0.00392157],
        ...,
        [0.00392157, 0.00392157, 0.00392157],
        [0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        ]],

       ...,

       [[0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        ],
        [0.        , 0

In [17]:
train_images.shape

(5216, 64, 64, 3)

In [18]:
train_images[0].shape

(64, 64, 3)

In [19]:
train_labels[0]

array([0., 1.], dtype=float32)

In [20]:
train_labels.shape

(5216, 2)

In [21]:
#split training set into a validation set for metrics during model fit
train_images, validation_images, train_labels, validation_labels = train_test_split(train_images, train_labels, random_state=42)

In [22]:
train_images.shape

(3912, 64, 64, 3)

In [23]:
validation_images.shape

(1304, 64, 64, 3)

In [24]:
train_img = train_images.reshape(train_images.shape[0], -1)
validation_img = validation_images.reshape(validation_images.shape[0], -1)
test_img = test_images.reshape(test_images.shape[0], -1)

print(train_img.shape)
print(validation_img.shape)
print(test_img.shape)

(3912, 12288)
(1304, 12288)
(468, 12288)


In [25]:
train_labels.shape

(3912, 2)

In [26]:
validation_labels.shape

(1304, 2)

In [27]:
test_labels.shape

(468, 2)

In [29]:
train_y = np.reshape(train_labels[:,0], (3912,1))
validation_y = np.reshape(validation_labels[:,0], (1304,1))
test_y = np.reshape(test_labels[:,0], (468,1))

print(train_y.shape)
print(validation_y.shape)
print(test_y.shape)

(3912, 1)
(1304, 1)
(468, 1)


## Baseline Model

In [30]:
baseline_model = models.Sequential()

In [31]:
baseline_model.add(layers.Dense(20, activation='relu', input_shape=(12_288,)))
baseline_model.add(layers.Dense(7, activation='relu'))
baseline_model.add(layers.Dense(5, activation='relu'))
baseline_model.add(layers.Dense(1, activation='sigmoid'))

In [32]:
baseline_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 20)                245780    
_________________________________________________________________
dense_1 (Dense)              (None, 7)                 147       
_________________________________________________________________
dense_2 (Dense)              (None, 5)                 40        
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 6         
Total params: 245,973
Trainable params: 245,973
Non-trainable params: 0
_________________________________________________________________


In [33]:
baseline_model.compile(optimizer='sgd', 
              loss='binary_crossentropy', 
              metrics=['accuracy'])

In [34]:
baseline_model.fit(train_img, 
                   train_y, 
                   epochs=50, 
                   batch_size=32, 
                   validation_data=(validation_img, validation_y))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x1aa74f4e370>

In [36]:
results_train = baseline_model.evaluate(train_img, train_y)
results_train



[0.13757158815860748, 0.9465746283531189]

In [37]:
results_test = baseline_model.evaluate(test_img, test_y)
results_test



[1.4075759649276733, 0.7072649598121643]