In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_hub as hub

from keras_preprocessing.image import ImageDataGenerator
from keras.utils.vis_utils import plot_model

In [None]:
input_dir = '/kaggle/input'

sample_dir = os.path.join(input_dir, 'sample/')
csv_dir = os.path.join(input_dir, 'nihsample')
images_dir = os.path.join(sample_dir, 'sample/sample/images')

In [None]:
training_file = os.path.join(csv_dir, 'train_data.csv')
testing_file = os.path.join(csv_dir, 'test_data.csv')

In [None]:
training_df = pd.read_csv(training_file, index_col=0)
testing_df = pd.read_csv(testing_file, index_col=0)

In [None]:
training_df['pneumonia'] = training_df['pneumonia'].astype(str)
testing_df['pneumonia'] = testing_df['pneumonia'].astype(str)

In [None]:
di = {'0': "NORMAL", '1': "PNEUMONIA"}

training_df['pneumonia'].replace(di, inplace=True)
testing_df['pneumonia'].replace(di, inplace=True)

In [None]:
batch_size = 64
image_size = (224, 224)

In [None]:
train_datagen = ImageDataGenerator(
    rescale=1./255.,  
    horizontal_flip = True, 
    vertical_flip = False, 
    height_shift_range= 0.1, 
    width_shift_range=0.1, 
    rotation_range=30,
    shear_range = 0.1,
    zoom_range=0.2,
    validation_split=0.2
)

test_datagen = ImageDataGenerator(rescale=1./255.)

In [None]:
train_generator=train_datagen.flow_from_dataframe(
    dataframe=training_df,
    directory=images_dir,
    x_col="image_index",
    y_col='pneumonia', 
    batch_size=batch_size,
    seed=42,
    shuffle=True,
    class_mode='categorical',
    target_size=image_size,
    subset='training',
    color_mode='rgb', 
    classes=['NORMAL', 'PNEUMONIA']
)

valid_generator=train_datagen.flow_from_dataframe(
    dataframe=training_df,
    directory=images_dir,
    x_col="image_index",
    y_col='pneumonia', 
    batch_size=batch_size,
    seed=42,
    shuffle=False,
    class_mode='categorical',
    target_size=image_size,
    subset='validation',
    color_mode='rgb', 
    classes=['NORMAL', 'PNEUMONIA']
)

test_generator=test_datagen.flow_from_dataframe(
    dataframe=testing_df,
    directory=images_dir,
    x_col="image_index",
    y_col='pneumonia',
    batch_size=batch_size,
    seed=42,
    shuffle=False,
    class_mode='categorical',
    target_size=image_size,
    color_mode='rgb', 
    classes=['NORMAL', 'PNEUMONIA']
)

In [None]:
num_classes = 2

In [None]:
URL = "https://tfhub.dev/google/imagenet/resnet_v2_50/feature_vector/4"

IMG_WIDTH, IMG_HEIGHT = image_size

feature_extractor = hub.KerasLayer(URL, input_shape=(IMG_WIDTH, IMG_HEIGHT, 3))

feature_extractor.trainable = False

In [None]:
model = tf.keras.Sequential([
        feature_extractor,
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(2156, activation = 'relu'),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(1280, activation = 'relu'),
        tf.keras.layers.Dropout(0.4),
        tf.keras.layers.Dense(540, activation = 'relu'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(num_classes, activation = 'softmax')
])

model.summary()

In [None]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
loss, accuracy = model.evaluate(test_generator)

print('\nLoss before training: {:,.3f}'.format(loss))
print('Accuracy before training: {:.3%}'.format(accuracy))

In [None]:
EPOCHS = 100

model_file = os.path.join('model.h5')

early_stopping = tf.keras.callbacks.EarlyStopping(
                    monitor='val_loss', 
                    patience=5,
                    restore_best_weights=True
                 )

save_best = tf.keras.callbacks.ModelCheckpoint(
                model_file, 
                monitor='val_accuracy', 
                save_best_only=True
            )

history = model.fit(
    train_generator, 
    epochs = EPOCHS, 
    validation_data=valid_generator, 
    callbacks=[early_stopping, save_best]
)

In [None]:
loss, accuracy = model.evaluate(test_generator)

print('\nLoss after training: {:,.3f}'.format(loss))
print('Accuracy after training: {:.3%}'.format(accuracy))

In [None]:
training_accuracy = history.history['accuracy']
validation_accuracy = history.history['val_accuracy']

training_loss = history.history['loss']
validation_loss = history.history['val_loss']

epochs_range=range(len(training_accuracy))

plt.figure(figsize=(20, 10))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, training_accuracy, label='Training Accuracy')
plt.plot(epochs_range, validation_accuracy, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, training_loss, label='Training Loss')
plt.plot(epochs_range, validation_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve

In [None]:
batch_size = 64
num_of_test_samples = 1121

In [None]:
Y_pred = model.predict_generator(test_generator, num_of_test_samples // batch_size+1, verbose = 1)
y_pred = np.argmax(Y_pred, axis=1)

In [None]:
print('Confusion Matrix')
print(confusion_matrix(test_generator.classes, y_pred, labels=[0, 1]))

In [None]:
print('Classification Report')
target_names = ['Normal', 'Pneumonia']
print(classification_report(test_generator.classes, y_pred, target_names=target_names))

In [None]:
roc_auc_score(test_generator.classes, y_pred)

In [None]:
fpr, tpr, thresholds = roc_curve(test_generator.classes, y_pred, pos_label=1)

fpr, tpr, thresholds 