In [11]:
# now we will test our model on test data
# first we train our model on train data without any preprocessing

# import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import cv2
import pydicom
from keras.models import Model
from keras.layers import Dense, Flatten, Dropout
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras.utils import to_categorical
from sklearn.metrics import confusion_matrix, classification_report
from keras.applications.vgg16 import VGG16
from keras.applications.inception_v3 import InceptionV3
from keras.applications.resnet50 import ResNet50




# load data
train = pd.read_csv('train_calc_annotations.csv')
test = pd.read_csv('test_calc_annotations.csv')

# create train and test data
train_data = []
test_data = []

# print
print('Loading data...')

for i in range(len(train)):
    # load dicom images
    image = pydicom.dcmread(train['cropped_file'][i]).pixel_array
    # resize images
    image = cv2.resize(image, (224, 224))
    # normalize images
    image = image / 255.0
    # make 3 channels
    image = np.stack((image,)*3, axis=-1)
    # append data
    train_data.append(image)

for i in range(len(test)):
    # load dicom images
    image = pydicom.dcmread(test['cropped_file'][i]).pixel_array
    # resize images
    image = cv2.resize(image, (224, 224))
    # normalize images
    image = image / 255.0
    # make 3 channels
    image = np.stack((image,)*3, axis=-1)
    # append data
    test_data.append(image)

print('Data loaded...')

# convert data into numpy array
train_data = np.array(train_data)
test_data = np.array(test_data)

# create train and test labels
train_labels = train['pathology'].values
test_labels = test['pathology'].values

# list possible labels
labels = ['BENIGN', 'MALIGNANT']

# if label is BENIGN_WITHOUT_CALLBACK then convert it into BENIGN
for i in range(len(train_labels)):
    if train_labels[i] == 'BENIGN_WITHOUT_CALLBACK':
        train_labels[i] = 'BENIGN'

for i in range(len(test_labels)):
    if test_labels[i] == 'BENIGN_WITHOUT_CALLBACK':
        test_labels[i] = 'BENIGN'


# convert labels into int values
for i in range(len(train_labels)):
    if train_labels[i] == 'BENIGN':
        train_labels[i] = 0
    else:
        train_labels[i] = 1

for i in range(len(test_labels)):
    if test_labels[i] == 'BENIGN':
        test_labels[i] = 0
    else:
        test_labels[i] = 1

    

# convert labels into categorical
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

print("labels converted...")

# create internal validation data
val_data = train_data[:100]
val_labels = train_labels[:100]

# create train data
train_data = train_data[100:]
train_labels = train_labels[100:]

print('Data Format:')
# print shapes
print(train_data.shape)
print(train_labels.shape)
print(val_data.shape)
print(val_labels.shape)
print(test_data.shape)
print(test_labels.shape)


Loading data...
Data loaded...
labels converted...
Data Format:
(1446, 224, 224, 3)
(1446, 2)
(100, 224, 224, 3)
(100, 2)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20

KeyboardInterrupt: 

In [None]:
# train resnet50 model
# load resnet50 model
resnet50 = ResNet50(include_top=False, weights='imagenet', input_shape=(224, 224, 3))

# freeze layers
for layer in resnet50.layers:
    layer.trainable = False

# add layers
x = resnet50.output
# flatten layer
# convert 3d to 1d
x = Flatten()(x)
# add dense layer
# 512 neurons
# relu activation
# dense layer is fully connected layer
x = Dense(512, activation='relu')(x)
# add dropout
# to avoid overfitting
x = Dropout(0.5)(x)
# output layer
# 3 neurons
# softmax activation
output = Dense(2, activation='softmax')(x)
# each neuron will give probability of each class
# class with highest probability will be the output

# create model
model = Model(inputs=resnet50.input, outputs=output)

# compile model
model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.003), metrics=['accuracy'])

# create callbacks\
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', patience=3, verbose=1)

# create data generator
datagen = ImageDataGenerator(horizontal_flip=True, vertical_flip=True, rotation_range=30, zoom_range=0.2,
                             width_shift_range=0.1, height_shift_range=0.1, shear_range=0.1)

# fit model
history = model.fit(datagen.flow(train_data, train_labels, batch_size=32), epochs=20,
                              validation_data=(val_data, val_labels), callbacks=[early_stopping, reduce_lr])
# save model
model.save('resnet50.h5')

# plot accuracy and loss
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Test'])
plt.show()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend(['Train', 'Test'])
plt.show()

# predict test data
predictions = model.predict(test_data)
predictions = np.argmax(predictions, axis=1)

# print classification report
print(classification_report(np.argmax(test_labels, axis=1), predictions))

# print confusion matrix
print(confusion_matrix(np.argmax(test_labels, axis=1), predictions))

In [None]:
# train vgg16 model
# load vgg16 model
vgg16 = VGG16(include_top=False, weights='imagenet', input_shape=(224, 224, 3))

# freeze layers
for layer in vgg16.layers:
    layer.trainable = False

# add layers
x = vgg16.output
# flatten layer
x = Flatten()(x)
# add dense layer
x = Dense(512, activation='relu')(x)
# add dropout
x = Dropout(0.5)(x)
# output layer
output = Dense(2, activation='softmax')(x)

# create model
model = Model(inputs=vgg16.input, outputs=output)

# compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# load model
model = load_model('vgg16.h5')

# create data generator
datagen = ImageDataGenerator(horizontal_flip=True, vertical_flip=True, rotation_range=30, zoom_range=0.2,
                             width_shift_range=0.1, height_shift_range=0.1, shear_range=0.1)

# fit model
history = model.fit(datagen.flow(train_data, train_labels, batch_size=32), epochs=20,
                              validation_data=(val_data, val_labels), callbacks=[early_stopping, reduce_lr])

# save model
model.save('vgg16.h5')

# plot accuracy and loss
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Test'])
plt.show()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend(['Train', 'Test'])

# predict test data
predictions = model.predict(test_data)
predictions = np.argmax(predictions, axis=1)

# print classification report
print(classification_report(np.argmax(test_labels, axis=1), predictions))

# print confusion matrix
print(confusion_matrix(np.argmax(test_labels, axis=1), predictions))







In [None]:
# train inceptionv3 model
# load inceptionv3 model
inceptionv3 = InceptionV3(include_top=False, weights='imagenet', input_shape=(224, 224, 3))

# freeze layers
for layer in inceptionv3.layers:
    layer.trainable = False

# add layers
x = inceptionv3.output
# flatten layer
x = Flatten()(x)
# add dense layer
x = Dense(512, activation='relu')(x)
# add dropout
x = Dropout(0.5)(x)
# output layer
output = Dense(2, activation='softmax')(x)

# create model
model = Model(inputs=inceptionv3.input, outputs=output)

# compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# load model

# create data generator
datagen = ImageDataGenerator(horizontal_flip=True, vertical_flip=True, rotation_range=30, zoom_range=0.2,
                             width_shift_range=0.1, height_shift_range=0.1, shear_range=0.1)

# fit model
history = model.fit(datagen.flow(train_data, train_labels, batch_size=32), epochs=20,
                              validation_data=(val_data, val_labels), callbacks=[early_stopping, reduce_lr])

# save model
model.save('inceptionv3.h5')

# plot accuracy and loss
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Test'])
plt.show()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend(['Train', 'Test'])
plt.show()

# predict test data
predictions = model.predict(test_data)
predictions = np.argmax(predictions, axis=1)

# print classification report
print(classification_report(np.argmax(test_labels, axis=1), predictions))

# print confusion matrix
print(confusion_matrix(np.argmax(test_labels, axis=1), predictions))
