In [1]:
import tensorflow as tf
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 
os.environ['CUDA_VISIBLE_DEVICES'] = '0' 
from keras.layers import Conv2D, Input, ZeroPadding2D, BatchNormalization, Activation, MaxPooling2D, Flatten, Dense,Dropout
from keras.models import Model, load_model
from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.utils import shuffle
import cv2
import imutils
import numpy as np
import matplotlib.pyplot as plt
import time
from os import listdir

get_ipython().magic(u'matplotlib inline')

  get_ipython().magic(u'matplotlib inline')


Prprocess the data
- Guassian blur to remove salt and pepper noise
- Thresholding to remove background
- Erosion to remove small noise
- Dilation to make the image clear
- Contour to find the brain tumor
- Crop the image to remove the background

In [2]:
def crop_contour_brain_img(image, plot=False):
    grayscale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    print(grayscale.shape)
    grayscale = cv2.GaussianBlur(grayscale, (5,5), 0)
    threshold_image = cv2.threshold(grayscale, 45, 255, cv2.THRESH_BINARY)[1]
    threshold_image = cv2.erode(threshold_image, None, iterations=2)
    threshold_image = cv2.dilate(threshold_image, None, iterations=2)

    contour = cv2.findContours(threshold_image.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour = imutils.grab_contours(contour)
    c = max(contour, key=cv2.contourArea)
    
    extreme_pnts_left = tuple(c[c[:, :, 0].argmin()][0])
    extreme_pnts_right = tuple(c[c[:, :, 0].argmax()][0])
    extreme_pnts_top = tuple(c[c[:, :, 1].argmin()][0])
    extreme_pnts_bot = tuple(c[c[:, :, 1].argmax()][0])
    
    new_image = image[extreme_pnts_top[1]:extreme_pnts_bot[1], extreme_pnts_left[0]:extreme_pnts_right[0]]
    # new_image = cv2.cvtColor(new_image, cv2.COLOR_BGR2GRAY)
    
    if plot:
        plt.figure()
        plt.subplot(1,2,1)
        plt.imshow(image)
        
        plt.tick_params(axis='both', which='both',
                        top = False, bottom=False, left=False, right=False, labelbottom=False,
                        labeltop=False, labelleft=False, labelright=False)
        plt.title('original image')
        plt.subplot(1,2,2)
        plt.imshow(new_image)
        
        plt.tick_params(axis='both', which='both',
                        top = False, bottom=False, left=False, right=False, labelbottom=False,
                        labeltop=False, labelleft=False, labelright=False)
        
        plt.title('Cropped image')
        plt.show()

        # print(new_image.shape)
    return new_image

Example how preprocessed image looks like:

In [3]:
example_image = cv2.imread('22_malignant.jpg')
example_new_image = crop_contour_brain_img(example_image, False)
# save the cropped image
# cv2.imwrite('preprocessed/22_malignant_new.jpg', example_new_image)

(380, 294)


We preprocess our orginal dataset

In [7]:
# data_dir = 'data_new/train/'

# for label in os.listdir(data_dir):
#     for file in os.listdir(os.path.join(data_dir, label)):
#         img = cv2.imread(os.path.join(data_dir, label, file))
#         processed_img = crop_contour_brain_img(img, False)
#         # save to preprocessed/ label folder
#         cv2.imwrite('preprocessed/' + label + '/' + file, processed_img)
#         # processed_images.append(processed_img)
#         # true_labels.append(label)
#         # file_names.append(file)

# Code hang so I had to do preprocessing class by class

# data_label = 'data_new/train/malignant/'
# for file in os.listdir(data_label):
#     img = cv2.imread(os.path.join(data_label, file))
#     processed_img = crop_contour_brain_img(img, True)
#     print(file)
#     # save to preprocessed/ label folder
#     cv2.imwrite('preprocessed/malignant/' + file, processed_img)
    
test_dir = 'data_new/valid/'

for label in os.listdir(test_dir):
    for file in os.listdir(os.path.join(test_dir, label)):
        img = cv2.imread(os.path.join(test_dir, label, file))
        processed_img = crop_contour_brain_img(img, False)
        # save to preprocessed/ label folder
        cv2.imwrite('preprocessed/valid/' + label + '/' + file, processed_img)
        # processed_images.append(processed_img)
        # true_labels.append(label)
        # file_names.append(file)    

(201, 173)
(168, 300)
(168, 300)
(225, 225)
(250, 201)
(192, 192)
(417, 428)
(201, 173)
(218, 180)
(360, 319)
(369, 400)
(500, 377)
(325, 254)
(380, 310)
(360, 313)
(354, 279)
(359, 297)
(212, 209)
(300, 240)
(380, 294)
(225, 225)
(938, 911)
(456, 374)
(630, 630)


Split the dataset into train and test (20%)

In [8]:
data_dir = 'preprocessed'
# Obtain 10% of the data for testing
for label in os.listdir(data_dir):
    # Take randomly 20% of the data for testing and move to test folder
    files = listdir(os.path.join(data_dir, label))
    test_files = np.random.choice(files, int(len(files)*0.2), replace=False) # replace=False means no duplicates
    for file in test_files:
        os.rename(os.path.join(data_dir, label, file), os.path.join(data_dir, 'test', label, file))

In [10]:
# Directory containing the existing benign images
benign_dir = 'preprocessed/benign'
# Directory containing the existing malignant images
malignant = 'preprocessed/malignant'

# Count the number of benign and malignant images
benign_count = len(listdir(benign_dir))
malignant_count = len(listdir(malignant))
print('Number of benign images:', benign_count)
print('Number of malignant images:', malignant_count)

Number of benign images: 62
Number of malignant images: 124


In [11]:
# Increase the number of benign images to match the number of malignant images by flipping the existing benign images
for file in listdir(benign_dir):
    img = cv2.imread(os.path.join(benign_dir, file))
    flipped_img = cv2.flip(img, 1)
    cv2.imwrite(os.path.join(benign_dir, file[:-4] + '_flipped.jpg'), flipped_img)