## Import Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report,accuracy_score
from skimage.io import imread, imshow
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Conv2D, MaxPool2D, Flatten, InputLayer, BatchNormalization, Dense, Dropout, MaxPooling2D, Activation
from keras.optimizers import Adam
import os 

print("All imports done")

Using TensorFlow backend.


All imports done


## Data preprocessing


In [2]:
basedir = './dataset'
images_dir = os.path.join(basedir,'image')
labels_filename = 'label.csv'

def categorical_labelling(): 
    labels_file = open(os.path.join(basedir, labels_filename), 'r')
    lines = labels_file.readlines()
    tumor_labels = {line.split(',')[0] : (line.split(',')[1].strip()) for line in lines[1:]}

    for i in tumor_labels: 
        if tumor_labels[i] == 'no_tumor': 
            tumor_labels[i] = 0
        elif tumor_labels[i] == "meningioma_tumor":
            tumor_labels[i] = 1
        elif tumor_labels[i] == "glioma_tumor":
            tumor_labels[i] = 2
        elif tumor_labels[i] == "pituitary_tumor":
            tumor_labels[i] = 3
    return(tumor_labels)


In [3]:
def extract_features():
    all_features = []
    all_labels = []
    
    labels = categorical_labelling()

    image_paths = [os.path.join(images_dir, l) for l in os.listdir(images_dir)]
    if os.path.isdir(images_dir):
        all_features = []
        all_labels = []
        for img_path in image_paths:
            filename = img_path.split('/')[-1]
            features = imread(img_path, as_gray=True)
            features.shape, features
            all_features.append(features)
            all_labels.append(labels[filename])
    np_features = np.array(all_features)
    np_labels = np.array(all_labels)
    
    np_features =np_features.reshape(-1,512, 512,1)
#     np_labels = np_labels.reshape(-1,512, 512,1)
    
    
    return np_features, np_labels

In [4]:
def get_data(): 

    X, Y = extract_features()

    tr_X = X[:2400]
    tr_Y = Y[:2400]
    te_X = X[2400:]
    te_Y = Y[2400:]
    
    return tr_X, tr_Y, te_X, te_Y

## Testing data preprocessing

In [5]:
## Preparing testing dataset

testdir = './test'
test_images_dir = os.path.join(testdir,'image')
test_labels_filename = 'label.csv'

def categorical_labelling_testset():
    labels_file = open(os.path.join(testdir, test_labels_filename), 'r')
    lines = labels_file.readlines()
    tumor_labels = {line.split(',')[0] : (line.split(',')[1].strip()) for line in lines[1:]}

    for i in tumor_labels: 
        if tumor_labels[i] == 'no_tumor': 
            tumor_labels[i] = 0
        elif tumor_labels[i] == "meningioma_tumor":
            tumor_labels[i] = 1
        elif tumor_labels[i] == "glioma_tumor":
            tumor_labels[i] = 2
        elif tumor_labels[i] == "pituitary_tumor":
            tumor_labels[i] = 3
    return(tumor_labels)

def extract_features_with_conv_testset():
    all_features = []
    all_labels = []
    
    labels = categorical_labelling_testset()

    image_paths = [os.path.join(test_images_dir, l) for l in os.listdir(test_images_dir)]
    if os.path.isdir(images_dir):
        all_features = []
        all_labels = []
        for img_path in image_paths:
            filename = img_path.split('/')[-1]
            features = imread(img_path, as_gray=True)
            all_features.append(features)
            all_labels.append(labels[filename])
    np_features = np.array(all_features)
    np_labels = np.array(all_labels)
    
    np_features = np_features.reshape(-1,512, 512,1)
    
    return np_features, np_labels

def get_test_data(): 

    X, Y = extract_features_with_conv_testset()
    
    te_X = X
    te_Y = Y
    
    return te_X, te_Y



## Create CNN


In [8]:

x_train, y_train, x_test, y_test = get_data()



In [21]:
model_CNN_plus = Sequential()
model_CNN_plus.add(Conv2D(32, (3, 3), input_shape=(512, 512, 1)))
model_CNN_plus.add(Activation('relu'))
model_CNN_plus.add(MaxPooling2D(pool_size=(2, 2)))

model_CNN_plus.add(Conv2D(32, (3, 3)))
model_CNN_plus.add(Activation('relu'))
model_CNN_plus.add(MaxPooling2D(pool_size=(2, 2)))

model_CNN_plus.add(Conv2D(64, (3, 3)))
model_CNN_plus.add(Activation('relu'))
model_CNN_plus.add(MaxPooling2D(pool_size=(2, 2)))

model_CNN_plus.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model_CNN_plus.add(Dense(64))
model_CNN_plus.add(Activation('relu'))
model_CNN_plus.add(Dropout(0.5))
model_CNN_plus.add(Dense(4))
model_CNN_plus.add(Activation('sigmoid'))

model_CNN_plus.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_14 (Conv2D)           (None, 510, 510, 32)      320       
_________________________________________________________________
activation_22 (Activation)   (None, 510, 510, 32)      0         
_________________________________________________________________
max_pooling2d_13 (MaxPooling (None, 255, 255, 32)      0         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 253, 253, 32)      9248      
_________________________________________________________________
activation_23 (Activation)   (None, 253, 253, 32)      0         
_________________________________________________________________
max_pooling2d_14 (MaxPooling (None, 126, 126, 32)      0         
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 124, 124, 64)     

### Compilation of the model


In [20]:
optimizer = Adam(lr = 10e-5)

model_CNN_plus.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer="adam")



### One-hot-encoding of inputs

In [13]:
y_train = tf.keras.utils.to_categorical(y_train, num_classes=4)
y_test = tf.keras.utils.to_categorical(y_test, num_classes=4)

In [22]:
x_train.shape

(2400, 512, 512, 1)

### Training of the model
Note: fitting this model takes more than 10hours to run on a CPU. 

In [23]:
model_CNN_plus.fit(x_train, y_train,batch_size = 64, epochs = 20 ,validation_data=(x_test, y_test))


Train on 2400 samples, validate on 600 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.callbacks.History at 0x7f8eab69c490>

## Saving the model

In [24]:
model_CNN_plus.save('CNN_Model_Cat_one_hot')

## Loading One-Hot Encoded model

In [6]:
model = tf.keras.models.load_model('CNN_Model_Cat_one_hot')

2022-01-05 12:41:53.609804: I tensorflow/core/platform/cpu_feature_guard.cc:145] This TensorFlow binary is optimized with Intel(R) MKL-DNN to use the following CPU instructions in performance critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in non-MKL-DNN operations, rebuild TensorFlow with the appropriate compiler flags.
2022-01-05 12:41:53.610120: I tensorflow/core/common_runtime/process_util.cc:115] Creating new thread pool with default inter op setting: 8. Tune using inter_op_parallelism_threads for best performance.


## Evaluating using testing dataset

In [7]:
x_TEST, y_TEST = get_test_data()
y_TEST = tf.keras.utils.to_categorical(y_TEST, num_classes=4)

y_TEST.shape

In [10]:
result_one_hot = model.evaluate(x_TEST, y_TEST)

print("test loss, test acc:", result_one_hot)

test loss, test acc: [0.18194288969039918, 0.93]


In [31]:
pred = model.predict_classes(x_TEST, verbose = 0)

In [38]:
y_TEST
rounded_testing_labels = np.argmax(y_TEST, axis = 1)
print(rounded_testing_labels)

[1 2 3 1 1 0 1 1 1 1 0 1 2 0 1 0 1 1 3 2 0 2 1 1 1 0 1 1 0 3 1 1 0 3 1 3 2
 2 1 2 0 2 3 2 0 1 3 0 3 1 1 2 3 3 1 2 0 3 3 2 3 3 1 2 3 3 1 0 3 2 1 1 3 1
 3 2 3 2 3 0 2 1 3 3 0 1 3 1 0 1 2 2 2 0 3 2 1 1 1 3 0 3 0 1 3 2 1 0 1 3 1
 1 0 3 2 3 2 2 0 3 2 3 0 2 1 0 3 2 3 3 1 3 1 1 1 0 1 0 1 1 1 2 0 1 0 2 3 2
 1 0 1 1 3 3 0 0 2 3 0 1 3 3 3 2 1 2 0 1 2 1 2 0 0 3 3 1 2 2 1 1 1 1 3 0 0
 2 1 1 2 2 3 2 1 3 2 3 1 1 3 3]


In [40]:
c_report_testing = classification_report(rounded_testing_labels, pred, target_names = ["no tumour", "meningioma_tumor", "glioma_tumor", "pituitary_tumor"])
print(c_report_testing)

                  precision    recall  f1-score   support

       no tumour       0.88      0.81      0.85        37
meningioma_tumor       0.85      0.82      0.84        68
    glioma_tumor       0.88      0.86      0.87        43
 pituitary_tumor       0.90      1.00      0.95        52

        accuracy                           0.88       200
       macro avg       0.88      0.87      0.87       200
    weighted avg       0.87      0.88      0.87       200

