In [71]:
import tensorflow as tf
from tensorflow import keras

# Helper libraries
import numpy as np

import random

In [22]:
'''
Pixel range: 0 to 255. Using this code to convert all pixel values from 0 to 1.
'''
PIXEL_RANGE = 255

'''
Resize images to this height.
'''
RESIZE_IMAGE_HEIGHT = 100

'''
Resize images to this width.
'''
RESIZE_IMAGE_WIDTH = 100

'''
All test and training images are JPEG.
'''
JPEG_FILE_EXTENSION = 'jpeg'

In [23]:
'''
Paths for training and testing data. Note to self: is it better if I moved these over to a JSON? 
'''
TRAIN_NORMAL_LUNGS_DIR_PATH = '/Users/williambernard/Downloads/chest_xray/train/NORMAL/'

TRAIN_PNEUMONIA_LUNGS_DIR_PATH = '/Users/williambernard/Downloads/chest_xray/train/PNEUMONIA/'

TEST_NORMAL_LUNGS_DIR_PATH = '/Users/williambernard/Downloads/chest_xray/test/NORMAL/'

TEST_PNEUMONIA_LUNGS_PATH_DIR_PATH = '/Users/williambernard/Downloads/chest_xray/test/PNEUMONIA/'

In [24]:
def prepare_paths_to_images(path_to_lungs_with_pneumonia: str,
                            path_to_lungs_without_pneumonia: str,
                            file_extension: str):
    """
    Goes through all the images in the training or testing data and gets their absolute paths.

    :param path_to_lungs_with_pneumonia: should be path to a directory
    :param path_to_lungs_without_pneumonia: should be path to a directory
    :param file_extension: jpg, png, etc. Do not include the "." in the extension
    :return image_paths: Python list containing shuffled list of image paths
    """
    file_extension_wildcard = '*.' + file_extension

    lungs_without_pneumonia_image_paths = glob.glob(os.path.join(path_to_lungs_with_pneumonia,
                                                                 file_extension_wildcard))
    lungs_with_pneumonia_image_paths = glob.glob(os.path.join(path_to_lungs_without_pneumonia,
                                                              file_extension_wildcard))
    image_paths = lungs_without_pneumonia_image_paths + lungs_with_pneumonia_image_paths

    # This shuffles IN PLACE. You cannot do train_image_paths = random.shuffle(train_image_paths). 
    # It will return None.
    random.shuffle(image_paths)

    return image_paths

In [38]:
'''
Can I use this? https://ring-cache.readthedocs.io/en/stable/why.html#common-problems-of-cache
'''
def prepare_labels(image_paths: List[str]):
    """
    Assigns labels to training or testing data. Labels are either '1' (has pneumonia) or '0' (does not).

    :param image_paths: Python list of file paths.
    :return labels: Checks through all the images and determines if x-rays of lungs have pneumonia or do not.

    """
    labels = []

    for image_path in image_paths:
        # Go through all the images and rename them to something more consistent
        if 'bacteria' in image_path or 'virus' in image_path:
            labels.append(1)
        else:
            labels.append(0)

    labels = np.array(labels)
    return labels

In [42]:
'''
Can I use this? https://ring-cache.readthedocs.io/en/stable/why.html#common-problems-of-cache
'''
def prepare_images_in_opencv_format(image_paths: List[str], num_rows: int, num_columns: int):
    """
    Converts all images to OpenCV matrices. Returns all matrices in a single Numpy array.

    :param image_paths: Python list of image paths
    :param num_rows: should be a smallish number (< 100)?
    :param num_columns: should be a smallish number (< 100)?
    :return training_images: Python array of OpenCV matrices (they're really just individual Numpy matrices)
    """
    opencv_images = []

    for image_path in image_paths:
        img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        opencv_images.append(cv2.resize(img, (num_rows, num_columns)))

    opencv_images = np.array(opencv_images)
    opencv_images = opencv_images / PIXEL_RANGE
    return opencv_images


In [124]:
train_image_paths = prepare_paths_to_images(TRAIN_PNEUMONIA_LUNGS_DIR_PATH, 
                                            TRAIN_NORMAL_LUNGS_DIR_PATH, 
                                            JPEG_FILE_EXTENSION)

In [125]:
train_images = prepare_images_in_opencv_format(train_image_paths,
                                               RESIZE_IMAGE_HEIGHT,
                                               RESIZE_IMAGE_WIDTH)

In [126]:
train_labels = prepare_labels(train_image_paths)

In [127]:
train_images = train_images.reshape((-1, 100, 100, 1))

In [134]:
test_image_paths = prepare_paths_to_images(TEST_PNEUMONIA_LUNGS_PATH_DIR_PATH,
                                           TEST_NORMAL_LUNGS_DIR_PATH,
                                           JPEG_FILE_EXTENSION)

In [135]:
test_images = prepare_images_in_opencv_format(test_image_paths,
                                              RESIZE_IMAGE_HEIGHT,
                                              RESIZE_IMAGE_WIDTH)

In [136]:
test_labels = prepare_labels(test_image_paths)

In [137]:
test_images = test_images.reshape((-1, 100, 100, 1))

In [138]:
model = keras.Sequential([
#     keras.layers.Flatten(input_shape=(RESIZE_IMAGE_HEIGHT, RESIZE_IMAGE_WIDTH)), # Taking the picture and transforming it from a 2d array to a 1d array
#     keras.layers.Dense(128, activation='relu'),
#     keras.layers.Dense(128, activation='relu'),
#     keras.layers.Dense(128, activation='relu'),
    keras.layers.Convolution2D(32, 3, 3, input_shape=(100, 100, 1), activation='relu'),
#     keras.layers.Dense(1, activation='sigmoid')
    keras.layers.MaxPooling2D(pool_size=(2, 2)),
    keras.layers.Flatten(),
    keras.layers.Dense(128, activation='relu'), 
    keras.layers.Dense(1, activation='sigmoid')
]
)

In [139]:
# train_images = train_images.reshape(-1, RESIZE_IMAGE_HEIGHT, RESIZE_IMAGE_WIDTH, 1)

In [140]:
# from tensorflow.keras.preprocessing.image import ImageDataGenerator

# training_images2 = np.array(training_images, copy=True)
# training_labels2 = np.array(training_labels, copy=True)

# datagen = ImageDataGenerator(
#     featurewise_center=True,
#     featurewise_std_normalization=True,
#     rotation_range=20
#     )

# datagen.fit(training_images)

# result_training = np.concatenate((training_images, training_images2), axis=0)
# result_labels = np.concatenate((training_labels, training_labels2), axis=0)

In [142]:
model.compile(optimizer='adam',
             loss='binary_crossentropy',
             metrics=['accuracy'])

In [None]:
model.fit(train_images, train_labels, epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30

In [None]:
test_loss, test_acc = model.evaluate(test_images, test_labels)

print('\nTest accuracy:', test_acc)


# # model.fit(training_images, training_labels, epochs=10)
# # how do iget this to work????
# history = model.fit_generator(datagen.flow(result_training, result_labels, batch_size = 35),
#                                           steps_per_epoch=len(training_images) / 32, epochs = 12)