## Import Libraries

In [10]:
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report,accuracy_score
from sklearn import svm
from skimage.io import imread, imshow
from keras.preprocessing import image
from sklearn.model_selection import train_test_split
import tensorflow as tf

from keras.models import Sequential
from keras.layers import Conv1D, MaxPool1D, Flatten, InputLayer, BatchNormalization
from keras.layers import Dense, Dropout, Conv2D, MaxPool2D, Activation, MaxPooling2D
from keras.optimizers import Adam




import os 

print("All imports done")

All imports done


## Data preprocessing


In [2]:
basedir = './dataset'
images_dir = os.path.join(basedir,'image')
labels_filename = 'label.csv'

def categorical_labelling(): 
    labels_file = open(os.path.join(basedir, labels_filename), 'r')
    lines = labels_file.readlines()
    tumor_labels = {line.split(',')[0] : (line.split(',')[1].strip()) for line in lines[1:]}

    for i in tumor_labels: 
        if tumor_labels[i] == 'no_tumor': 
            tumor_labels[i] = 0
        elif tumor_labels[i] == "meningioma_tumor":
            tumor_labels[i] = 1
        elif tumor_labels[i] == "glioma_tumor":
            tumor_labels[i] = 2
        elif tumor_labels[i] == "pituitary_tumor":
            tumor_labels[i] = 3
    return(tumor_labels)


In [3]:
def extract_features():
    all_features = []
    all_labels = []
    
    labels = categorical_labelling()

    image_paths = [os.path.join(images_dir, l) for l in os.listdir(images_dir)]
    if os.path.isdir(images_dir):
        all_features = []
        all_labels = []
        for img_path in image_paths:
            filename = img_path.split('/')[-1]
            features = imread(img_path, as_gray=True)
            features.shape, features
            all_features.append(features)
            all_labels.append(labels[filename])
    np_features = np.array(all_features)
    np_labels = np.array(all_labels)
    
    np_features =np_features.reshape(-1,512, 512,1)
#     np_labels = np_labels.reshape(-1,512, 512,1)
    
    
    return np_features, np_labels

In [4]:
def get_data(): 

    X, Y = extract_features()

    tr_X = X[:2400]
    tr_Y = Y[:2400]
    te_X = X[2400:]
    te_Y = Y[2400:]
    
    return tr_X, tr_Y, te_X, te_Y

## Testing data preprocessing

In [30]:
## Preparing testing dataset

testdir = './test'
test_images_dir = os.path.join(testdir,'image')
test_labels_filename = 'label.csv'

def categorical_labelling_testset():
    labels_file = open(os.path.join(testdir, test_labels_filename), 'r')
    lines = labels_file.readlines()
    tumor_labels = {line.split(',')[0] : (line.split(',')[1].strip()) for line in lines[1:]}

    for i in tumor_labels: 
        if tumor_labels[i] == 'no_tumor': 
            tumor_labels[i] = 0
        elif tumor_labels[i] == "meningioma_tumor":
            tumor_labels[i] = 1
        elif tumor_labels[i] == "glioma_tumor":
            tumor_labels[i] = 2
        elif tumor_labels[i] == "pituitary_tumor":
            tumor_labels[i] = 3
    return(tumor_labels)

def extract_features_with_conv_testset():
    all_features = []
    all_labels = []
    
    labels = categorical_labelling_testset()

    image_paths = [os.path.join(test_images_dir, l) for l in os.listdir(test_images_dir)]
    if os.path.isdir(images_dir):
        all_features = []
        all_labels = []
        for img_path in image_paths:
            filename = img_path.split('/')[-1]
            features = imread(img_path, as_gray=True)
            all_features.append(features)
            all_labels.append(labels[filename])
    np_features = np.array(all_features)
    np_labels = np.array(all_labels)
    
    np_features = np_features.reshape(-1,512, 512,1)
#     np_labels = np_labels.reshape(-1,512, 512,1)
    
    return np_features, np_labels

def get_test_data(): 

    X, Y = extract_features_with_conv_testset()
    
    te_X = X
    te_Y = Y
    
    return te_X, te_Y



## Using One-encoding to avoid shape errors on the output

In [8]:

x_train, y_train, x_test, y_test = get_data()



In [21]:
model_CNN_plus = Sequential()
model_CNN_plus.add(Conv2D(32, (3, 3), input_shape=(512, 512, 1)))
model_CNN_plus.add(Activation('relu'))
model_CNN_plus.add(MaxPooling2D(pool_size=(2, 2)))

model_CNN_plus.add(Conv2D(32, (3, 3)))
model_CNN_plus.add(Activation('relu'))
model_CNN_plus.add(MaxPooling2D(pool_size=(2, 2)))

model_CNN_plus.add(Conv2D(64, (3, 3)))
model_CNN_plus.add(Activation('relu'))
model_CNN_plus.add(MaxPooling2D(pool_size=(2, 2)))

model_CNN_plus.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model_CNN_plus.add(Dense(64))
model_CNN_plus.add(Activation('relu'))
model_CNN_plus.add(Dropout(0.5))
model_CNN_plus.add(Dense(4))
model_CNN_plus.add(Activation('sigmoid'))

model_CNN_plus.summary()

model_CNN_plus.compile(loss='binary_crossentropy',
              optimizer=Adam(lr=0.0001),
              metrics=['accuracy'])




Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_14 (Conv2D)           (None, 510, 510, 32)      320       
_________________________________________________________________
activation_22 (Activation)   (None, 510, 510, 32)      0         
_________________________________________________________________
max_pooling2d_13 (MaxPooling (None, 255, 255, 32)      0         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 253, 253, 32)      9248      
_________________________________________________________________
activation_23 (Activation)   (None, 253, 253, 32)      0         
_________________________________________________________________
max_pooling2d_14 (MaxPooling (None, 126, 126, 32)      0         
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 124, 124, 64)     

### The difference is made here: the label inputs (train and test) are one-hot encoded

Note: fitting this model takes more than 10hours to run on a CPU. 

In [20]:
optimizer = Adam(lr = 10e-5)

model_CNN_plus.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer="adam")



In [13]:
y_train = tf.keras.utils.to_categorical(y_train, num_classes=4)
y_test = tf.keras.utils.to_categorical(y_test, num_classes=4)

In [22]:
x_train.shape

(2400, 512, 512, 1)

In [23]:
# model_CNN_plus.fit(x_train, tf.one_hot(y_train, 4) ,steps_per_epoch = 30, epochs = 15 ,validation_data=(x_test, tf.one_hot(y_test, 4)), validation_steps = 10)
model_CNN_plus.fit(x_train, y_train,batch_size = 64, epochs = 20 ,validation_data=(x_test, y_test))


Train on 2400 samples, validate on 600 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.callbacks.History at 0x7f8eab69c490>

## Saving the model

In [24]:
model_CNN_plus.save('CNN_Model_Cat_one_hot')

## Loading One-Hot Encoded model

In [41]:
model = tf.keras.models.load_model('CNN_Model_Cat_one_hot')

## Evaluating using testing dataset

In [38]:
# X, y = extract_features_with_conv_testset()


x_TEST, y_TEST = get_test_data()
y_TEST = tf.keras.utils.to_categorical(y_TEST, num_classes=4)

In [39]:
y_TEST.shape

(200, 4)

In [40]:
result_one_hot = model_CNN_plus.evaluate(x_TEST, y_TEST)

print("test loss, test acc:", result_one_hot)

test loss, test acc: [0.18194288969039918, 0.9300000071525574]
