In [1]:
import keras
import tensorflow as tf
from keras import layers
import matplotlib.pyplot as plt
from keras.preprocessing import image, image_dataset_from_directory
from keras.models import Sequential, Model
from keras.optimizers import Adam
from focal_loss import BinaryFocalLoss
from keras.layers import Dense, MaxPooling2D, Dropout, Flatten
from keras.preprocessing.image import ImageDataGenerator, load_img
from keras.callbacks import CSVLogger
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger
from tensorflow.keras.utils import to_categorical
import numpy as np
from sklearn.utils import shuffle, class_weight
from sklearn.metrics import classification_report, confusion_matrix
from focal_loss import BinaryFocalLoss
from tensorflow_addons.losses import SigmoidFocalCrossEntropy
import pandas as pd
import os

In [2]:
train=pd.read_csv('/home/jupyter-zaiman/data/DataCenter/CheXpert-v1.0/train.csv')
train = train.filter(["Path", "No Finding"], axis=1)
train = train.fillna(0)
print("Before Undersampling...")
abnormal, normal = train["No Finding"].value_counts()
print("Abnormal: " + str(abnormal))
print("Normal: " + str(normal))
# Divide by class
df_class_0 = train[train['No Finding'] == 0]
df_class_1 = train[train['No Finding'] == 1]
df_class_0_under = df_class_0.sample(normal)
train = pd.concat([df_class_0_under, df_class_1], axis=0)
train["No Finding"] = train["No Finding"].astype(str)
print('After Undersampling:')
print(train["No Finding"].value_counts())


Before Undersampling...
Abnormal: 201033
Normal: 22381
After Undersampling:
0.0    22381
1.0    22381
Name: No Finding, dtype: int64


In [3]:
# train=pd.read_csv('/home/jupyter-zaiman/data/DataCenter/CheXpert-v1.0/train.csv')
# train = train.fillna(0)
# # import valid data info from valid.csv
# valid=pd.read_csv('/home/jupyter-zaiman/data/DataCenter/CheXpert-v1.0/valid.csv')
# valid = valid.fillna(0)

# valid = valid.append(valid)
# #pre-process data: remove Lateral images
# train = train[~train[train.columns[3]].str.contains("Lateral")]

# #pre-process data: drop selected features - only images as inputs
# train = train.filter(["Path", "No Finding", "Cardiomegaly", "Enlarged Cardiomediastinum"], axis=1)
# abnormal, normal = train["No Finding"].value_counts()
# print("Pre_Abnormal: " + str(abnormal))
# print("Pre_Normal: " + str(normal))
# train["Cardiomegaly"] = train["Cardiomegaly"].replace(-1, 1)
# train["Enlarged Cardiomediastinum"] = train["Enlarged Cardiomediastinum"].replace(-1, 1)
# train["No Finding"] = train["No Finding"] + train["Cardiomegaly"] + train["Enlarged Cardiomediastinum"]
# train["No Finding"] = train["No Finding"].replace(2, 1)
# abnormal, normal = train["No Finding"].value_counts()
# print("Post_Abnormal: " + str(abnormal))
# print("Post_Normal: " + str(normal))
# train['No Finding'] = train['No Finding'].astype(str)
# train = train.drop(["Cardiomegaly", "Enlarged Cardiomediastinum"], axis=1)
# train = train.iloc[:2000]
# train.shape

In [4]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(train, test_size=0.4, random_state=1)
valid, test = train_test_split(test, test_size=.5, random_state=1)


In [5]:
#central crop on 224 x 224
IMAGE_WIDTH, IMAGE_HEIGHT = (224, 224)
EPOCHS = 50
BATCH_SIZE = 128
learning = 0.001
image_shape = (IMAGE_HEIGHT, IMAGE_WIDTH, 3)

In [6]:
#zero mean to resize -1 to 1 
train_datagen = ImageDataGenerator(
        rescale=1./255,
    )
valid_datagen = ImageDataGenerator(
        rescale=1./255
    )

train_generator=train_datagen.flow_from_dataframe(
    dataframe=train, 
    directory='/home/jupyter-zaiman/data/DataCenter/',
    x_col="Path", y_col="No Finding", 
    class_mode="binary", 
    target_size=(IMAGE_HEIGHT, IMAGE_WIDTH), 
    batch_size=BATCH_SIZE)
valid_generator=valid_datagen.flow_from_dataframe(dataframe=valid, directory='/home/jupyter-zaiman/data/DataCenter/', x_col="Path", y_col="No Finding", class_mode="binary", target_size=(IMAGE_HEIGHT, IMAGE_WIDTH), shuffle = True, batch_size=BATCH_SIZE)
test_generator=valid_datagen.flow_from_dataframe(dataframe=test, directory='/home/jupyter-zaiman/data/DataCenter/', x_col="Path", y_col="No Finding", class_mode="binary", target_size=(IMAGE_HEIGHT, IMAGE_WIDTH), shuffle = False, batch_size=BATCH_SIZE)

Found 26857 validated image filenames belonging to 2 classes.
Found 8952 validated image filenames belonging to 2 classes.
Found 8953 validated image filenames belonging to 2 classes.


In [7]:
#create model
base = tf.keras.applications.ResNet50(
    include_top=False,
    weights="imagenet",
    pooling='max',
)
   
for layer in base.layers:
    layer.trainable = False
    
for layer in [l for l in base.layers if 'conv5' in l.name]:
    layer.trainable = True

x = base.output
x = Dense(512, activation = 'relu')(x)
prediction = Dense(1, activation='sigmoid')(x)
model = Model(inputs=base.input, outputs=prediction)
model.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, None, None,  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, None, None, 3 0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, None, None, 6 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, None, None, 6 256         conv1_conv[0][0]                 
_______________________________________________________________________________________

In [9]:
model.compile(optimizer=Adam(lr=learning), loss='binary_crossentropy', metrics=['accuracy'])

save = ModelCheckpoint(
    '/home/jupyter-zaiman/COVID-19 Classification/ResNet50-UnderSampling/', 
    monitor='val_accuracy', 
    save_best_only=True,
    save_weights_only=True, 
    mode='max',
    verbose=1
)
stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=6)
scheduler = ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.01,
    patience=3,
    verbose=1
    )
logger = CSVLogger('/home/jupyter-zaiman/COVID-19 Classification/ResNet50-UnderSampling/training.log')
history = model.fit(train_generator, 
    epochs=EPOCHS, 
    validation_data=valid_generator, 
    verbose=1, 
    callbacks=[save, scheduler, stop, logger]
)



Epoch 1/50
 14/210 [=>............................] - ETA: 27:30 - loss: 13.1279 - accuracy: 0.5061

KeyboardInterrupt: 

In [None]:
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='lower right')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='lower right')
plt.show()
    
Y_pred = model.predict(test_generator, len(test_generator.filenames))
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(test_generator.classes, y_pred))
target_names = ['Abnormal', 'Normal']
print('Classification Report')
print(classification_report(test_generator.classes, y_pred, target_names=target_names))   
    