# Neural Network Project

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from keras import models
from keras import layers
# from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, f1_score
# from keras.models import load_model
# from keras.utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
# from keras.applications import inception_v3
import timeit
import pickle

np.random.seed(123)

%matplotlib inline

Using TensorFlow backend.


## Data Acquisition
- 277524 histopathology slides divided into binary classes: 198738 Negative(0), 78786 Positive(1)
- Class imbalance: 71.6% Negative, 28.4% Positive

In [2]:
train_folder = '../data/split/train'
val_folder = '../data/split/validation'
test_folder = '../data/split/test'

In [3]:
# Train Data
train_generator = ImageDataGenerator(rescale=1./255).flow_from_directory(train_folder,
                                                                         target_size=(50, 50),
                                                                         batch_size=600)

# Validation Data
val_generator = ImageDataGenerator(rescale=1./255).flow_from_directory(val_folder,
                                                                       target_size=(50, 50),
                                                                       batch_size=200)

# Test Data
test_generator = ImageDataGenerator(rescale=1./255).flow_from_directory(test_folder,
                                                                        target_size=(50, 50),
                                                                        batch_size=200)

Found 166514 images belonging to 2 classes.
Found 55505 images belonging to 2 classes.


KeyboardInterrupt: 

In [None]:
# Create datasets
train_images, train_labels = next(train_generator)
test_images, test_labels = next(test_generator)
val_images, val_labels = next(val_generator)

In [None]:
m_train = train_images.shape[0]
num_px = train_images.shape[1]
m_test = test_images.shape[0]
m_val = val_images.shape[0]

print ("Number of training samples: " + str(m_train))
print ("Number of testing samples: " + str(m_test))
print ("Number of validation samples: " + str(m_val))
print ("train_images shape: " + str(train_images.shape))
print ("train_labels shape: " + str(train_labels.shape))
print ("test_images shape: " + str(test_images.shape))
print ("test_labels shape: " + str(test_labels.shape))
print ("val_images shape: " + str(val_images.shape))
print ("val_labels shape: " + str(val_labels.shape))

In [None]:
train_img = train_images.reshape(train_images.shape[0], -1)
test_img = test_images.reshape(test_images.shape[0], -1)
val_img = val_images.reshape(val_images.shape[0], -1)

print(train_img.shape)
print(test_img.shape)
print(val_img.shape)

In [None]:
train_y = np.reshape(train_labels[:,0], (train_labels.shape[0],1))
test_y = np.reshape(test_labels[:,0], (test_labels.shape[0],1))
val_y = np.reshape(val_labels[:,0], (val_labels.shape[0],1))

In [None]:
model = models.Sequential()
model.add(layers.Dense(20, activation='relu', input_shape=(7500,)))
model.add(layers.Dense(7, activation='relu'))
model.add(layers.Dense(5, activation='relu'))
model.add(layers.Dense(1, activation='softmax'))

## EDA

In [None]:
# Standard Neural Net
model.compile(optimizer='sgd',
              loss='binary_crossentropy',
              metrics=['accuracy'])

history = model.fit(train_img,
                    train_y,
                    epochs=50,
                    batch_size=25,
                    validation_data=(val_img, val_y))

In [None]:
results_train = model.evaluate(train_img, train_y)

In [None]:
results_test = model.evaluate(test_img, test_y)

In [None]:
print('Training Loss:', results_train[0])
print('Training Accuracy:', results_train[1])
print('Testing Loss:', results_test[0])
print('Testing Accuracy:', results_test[1])

In [None]:
# Convolutional NN
cnn_model = models.Sequential()
cnn_model.add(layers.Conv2D(50, (3, 3), activation='relu', input_shape=(50, 50, 3)))
cnn_model.add(layers.MaxPooling2D((2, 2)))
cnn_model.add(layers.Conv2D(25, (4, 4), activation='relu'))
cnn_model.add(layers.MaxPooling2D((2, 2)))
cnn_model.add(layers.Conv2D(50, (3, 3), activation='relu'))
cnn_model.add(layers.MaxPooling2D((2, 2)))
cnn_model.add(layers.Flatten())
cnn_model.add(layers.Dense(50, activation='relu'))
cnn_model.add(layers.Dense(1, activation='softmax'))

cnn_model.compile(loss='binary_crossentropy',
                  optimizer="sgd",
                  metrics=['acc'])

In [None]:
cnn_history = cnn_model.fit(train_images,
                            train_y,
                            epochs=30,
                            batch_size=25,
                            validation_data=(val_images, val_y))

In [None]:
cnn_results_train = cnn_model.evaluate(train_images, train_y)
cnn_results_test = cnn_model.evaluate(test_images, test_y)

In [None]:
print('Training Loss:', cnn_results_train[0])
print('Training Accuracy:', cnn_results_train[1])
print('Testing Loss:', cnn_results_test[0])
print('Testing Accuracy:', cnn_results_test[1])

In [None]:
imagenet = inception_v3.InceptionV3(weights='imagenet', include_top=False)
imagenet_new = imagenet.output
new_model = models.Sequential()
new_model.add(imagenet)
new_model.add(layers.GlobalAveragePooling2D())
new_model.add(layers.Dense(1024,activation='relu'))
new_model.add(layers.Dense(1024,activation='relu'))
new_model.add(layers.Dense(512,activation='relu'))
new_model.add(layers.Dense(1,activation='softmax'))

In [None]:
new_model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['accuracy'])

new_model.fit(train_images,
              train_y,
              epochs=10,
              batch_size=50,
              validation_data=(val_images, val_y))

In [None]:
new_model.evaluate(train_images, train_y)

In [None]:
predictions_transfer = new_model.predict(test_images)
predictions_transfer = np.around(predictions_transfer)
f1_score(test_y, predictions_transfer)