## Import Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report,accuracy_score
from skimage.io import imread, imshow
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D, MaxPool2D, Flatten, Activation, InputLayer, BatchNormalization
import os 

print("All imports done")

All imports done


Using TensorFlow backend.


## Define Directories


In [2]:
basedir = './dataset'
images_dir = os.path.join(basedir,'image')
labels_filename = 'label.csv'

## Data Preprocessing

In [3]:
def binary_labelling(): 
    labels_file = open(os.path.join(basedir, labels_filename), 'r')
    lines = labels_file.readlines()
    tumor_labels = {line.split(',')[0] : (line.split(',')[1].strip()) for line in lines[1:]}

    for i in tumor_labels: 
        if tumor_labels[i] == 'no_tumor': 
            tumor_labels[i] = 0
        else:
            tumor_labels[i] = 1    
    return(tumor_labels)

In [4]:
def extract_features_with_conv():
    all_features = []
    all_labels = []
    
    labels = binary_labelling()

    image_paths = [os.path.join(images_dir, l) for l in os.listdir(images_dir)]
    if os.path.isdir(images_dir):
        all_features = []
        all_labels = []
        for img_path in image_paths:
            filename = img_path.split('/')[-1]
            features = imread(img_path, as_gray=True)
            all_features.append(features)
            all_labels.append(labels[filename])
    np_features = np.array(all_features)
    np_features =np_features.reshape(-1,512, 512,1)

    np_labels = np.array(all_labels)

    return np_features, np_labels
        

In [5]:
def get_data_with_conv(): 

    X, Y = extract_features_with_conv()
    

    tr_X = X[:2400]
    tr_Y = Y[:2400]
    te_X = X[2400:]
    te_Y = Y[2400:]
    
    return tr_X, tr_Y, te_X, te_Y

## Create Convolutional Neural Net


In [6]:
testdir = './test'
test_images_dir = os.path.join(testdir,'image')
test_labels_filename = 'label.csv'

def binary_labelling_testset(): 
    labels_file = open(os.path.join(testdir, test_labels_filename), 'r')
    lines = labels_file.readlines()
    tumor_labels = {line.split(',')[0] : (line.split(',')[1].strip()) for line in lines[1:]}

    for i in tumor_labels: 
        if tumor_labels[i] == 'no_tumor': 
            tumor_labels[i] = 0
        else:
            tumor_labels[i] = 1    
    return(tumor_labels)

In [7]:
def extract_features_with_conv_testset():
    all_features = []
    all_labels = []
    
    labels = binary_labelling_testset()

    image_paths = [os.path.join(test_images_dir, l) for l in os.listdir(test_images_dir)]
    if os.path.isdir(images_dir):
        all_features = []
        all_labels = []
        for img_path in image_paths:
            filename = img_path.split('/')[-1]
            features = imread(img_path, as_gray=True)
            all_features.append(features)
            all_labels.append(labels[filename])
    np_features = np.array(all_features)
    np_features =np_features.reshape(-1,512, 512,1)

    np_labels = np.array(all_labels)
    return np_features, np_labels


In [13]:
tr_X, tr_Y, te_X, te_Y= get_data_with_conv()

In [19]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D, MaxPool2D
from keras.layers import Conv1D, MaxPool1D, Flatten, Dense, InputLayer, BatchNormalization, Dropout
from keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten, Activation

model_CNN_plus = Sequential()
model_CNN_plus.add(Conv2D(32, (3, 3), input_shape=(512, 512,1)))
model_CNN_plus.add(Activation('relu'))
model_CNN_plus.add(MaxPooling2D(pool_size=(2, 2)))

model_CNN_plus.add(Conv2D(32, (3, 3)))
model_CNN_plus.add(Activation('relu'))
model_CNN_plus.add(MaxPooling2D(pool_size=(2, 2)))

model_CNN_plus.add(Conv2D(64, (3, 3)))
model_CNN_plus.add(Activation('relu'))
model_CNN_plus.add(MaxPooling2D(pool_size=(2, 2)))

model_CNN_plus.add(Flatten()) # this converts our 3D feature maps to 1D feature vectors

model_CNN_plus.add(Dense(64))
model_CNN_plus.add(Activation('relu'))
model_CNN_plus.add(Dropout(0.5))
model_CNN_plus.add(Dense(1))
model_CNN_plus.add(Activation('sigmoid'))

model_CNN_plus.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_7 (Conv2D)            (None, 510, 510, 32)      320       
_________________________________________________________________
activation_11 (Activation)   (None, 510, 510, 32)      0         
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 255, 255, 32)      0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 253, 253, 32)      9248      
_________________________________________________________________
activation_12 (Activation)   (None, 253, 253, 32)      0         
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 126, 126, 32)      0         
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 124, 124, 64)     

In [20]:
model_CNN_plus.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer='adam')


In [21]:
model_CNN_plus.fit(tr_X, tr_Y,batch_size = 64, epochs = 20 ,validation_data=(te_X, te_Y))


Train on 2400 samples, validate on 600 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.callbacks.History at 0x7f9880f7fb10>

## Save model


In [23]:
model_CNN_plus.save('CNN_model_binary')


## Load Model


In [8]:
model = tf.keras.models.load_model('CNN_model_binary')


2022-01-05 12:33:05.259356: I tensorflow/core/platform/cpu_feature_guard.cc:145] This TensorFlow binary is optimized with Intel(R) MKL-DNN to use the following CPU instructions in performance critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in non-MKL-DNN operations, rebuild TensorFlow with the appropriate compiler flags.
2022-01-05 12:33:05.260202: I tensorflow/core/common_runtime/process_util.cc:115] Creating new thread pool with default inter op setting: 8. Tune using inter_op_parallelism_threads for best performance.


## Evaluate model on testing dataset

### Preprocessing of testing data

In [9]:
testdir = './test'
test_images_dir = os.path.join(testdir,'image')
test_labels_filename = 'label.csv'

def binary_labelling_testset(): 
    labels_file = open(os.path.join(testdir, test_labels_filename), 'r')
    lines = labels_file.readlines()
    tumor_labels = {line.split(',')[0] : (line.split(',')[1].strip()) for line in lines[1:]}

    for i in tumor_labels: 
        if tumor_labels[i] == 'no_tumor': 
            tumor_labels[i] = 0
        else:
            tumor_labels[i] = 1    
    return(tumor_labels)

In [10]:
def extract_features_with_conv_testset():
    all_features = []
    all_labels = []
    
    labels = binary_labelling_testset()

    image_paths = [os.path.join(test_images_dir, l) for l in os.listdir(test_images_dir)]
    if os.path.isdir(images_dir):
        all_features = []
        all_labels = []
        for img_path in image_paths:
            filename = img_path.split('/')[-1]
            features = imread(img_path, as_gray=True)
            all_features.append(features)
            all_labels.append(labels[filename])
    np_features = np.array(all_features)
    np_features =np_features.reshape(-1,512, 512,1)

    np_labels = np.array(all_labels)

    return np_features, np_labels

### Testing

In [11]:
testset_X, testset_Y = extract_features_with_conv_testset()


In [12]:
results = model.evaluate(testset_X, testset_Y)

print("test loss, test acc:", results)


test loss, test acc: [0.0892625638356003, 0.945]


In [23]:
y_pred = model.predict_classes(testset_X, verbose = 0)

print(y_pred)

[[1]
 [1]
 [1]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [0]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [0]
 [0]
 [1]
 [1]
 [0]
 [0]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [0]
 [0]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]]

In [28]:
c_report_testing = classification_report(testset_Y, y_pred, target_names = ["no tumour", "tumour"])
print(c_report_testing)

              precision    recall  f1-score   support

   no tumour       0.86      0.84      0.85        37
      tumour       0.96      0.97      0.97       163

    accuracy                           0.94       200
   macro avg       0.91      0.90      0.91       200
weighted avg       0.94      0.94      0.94       200

