## Import Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report,accuracy_score
from sklearn import svm
from skimage.io import imread, imshow
import os 

print("All imports done")

Using TensorFlow backend.


All imports done


## Data preprocessing


In [2]:
basedir = './dataset'
images_dir = os.path.join(basedir,'image')
labels_filename = 'label.csv'

def categorical_labelling(): 
    labels_file = open(os.path.join(basedir, labels_filename), 'r')
    lines = labels_file.readlines()
    tumor_labels = {line.split(',')[0] : (line.split(',')[1].strip()) for line in lines[1:]}

    for i in tumor_labels: 
        if tumor_labels[i] == 'no_tumor': 
            tumor_labels[i] = 0
        elif tumor_labels[i] == "meningioma_tumor":
            tumor_labels[i] = 1
        elif tumor_labels[i] == "glioma_tumor":
            tumor_labels[i] = 2
        elif tumor_labels[i] == "pituitary_tumor":
            tumor_labels[i] = 3
    return(tumor_labels)


In [3]:
def extract_features():
    all_features = []
    all_labels = []
    
    labels = categorical_labelling()

    image_paths = [os.path.join(images_dir, l) for l in os.listdir(images_dir)]
    print(images_dir)
    counter = 0
    if os.path.isdir(images_dir):
        all_features = []
        all_labels = []
        for img_path in image_paths:
            filename = img_path.split('/')[-1]
            img = imread(img_path, as_gray=True)
            features = np.reshape(img, (512*512))
            features.shape, features
            all_features.append(features)
            all_labels.append(labels[filename])
    np_features = np.array(all_features)
    np_labels = np.array(all_labels)
    return np_features, np_labels
        

In [4]:
def get_data(): 

    X, y = extract_features()

    Y = np.array([y, -(y - 1)]).T
    tr_X = X[:2400]
    tr_Y = Y[:2400]
    te_X = X[2400:]
    te_Y = Y[2400:]
    
    return tr_X, tr_Y, te_X, te_Y

In [5]:
tr_X, tr_Y, te_X, te_Y= get_data()

classifier = svm.SVC(kernel='linear')
classifier.fit(tr_X, list(zip(*tr_Y))[0])

pred = classifier.predict(te_X)

print(pred)

print("Accuracy:", accuracy_score(list(zip(*te_Y))[0], pred))

./dataset/image
[3 3 0 2 1 3 2 3 3 2 2 2 2 2 3 1 3 1 1 1 3 2 2 2 3 3 2 3 3 3 3 3 1 3 3 2 2
 2 2 2 3 2 3 2 0 2 3 0 0 2 3 2 2 3 3 3 2 0 1 1 2 2 2 3 2 0 2 3 2 3 3 0 1 3
 1 2 1 1 1 3 2 2 2 3 1 1 3 3 3 3 3 3 1 1 0 2 2 0 1 0 2 3 1 2 3 1 1 1 1 0 3
 3 1 2 0 1 2 1 3 2 3 3 2 2 1 3 2 3 2 1 1 1 3 3 0 1 0 2 3 3 2 2 1 3 1 3 3 1
 3 2 3 0 0 1 3 2 2 3 3 2 0 3 3 2 1 2 2 3 1 0 0 2 1 1 2 2 2 2 2 3 3 2 3 3 2
 1 2 0 0 3 1 0 1 3 3 2 2 3 2 1 3 1 3 3 1 2 1 1 2 3 3 2 2 1 3 3 2 1 3 0 1 3
 0 1 1 3 1 2 2 1 2 1 0 2 2 2 1 2 1 2 2 1 2 1 1 1 1 0 1 0 3 1 1 2 2 1 1 2 3
 3 2 3 2 1 2 3 2 2 2 2 1 1 3 1 2 1 1 2 0 0 0 1 3 0 2 2 2 1 2 2 1 3 2 1 0 1
 3 1 0 2 1 1 0 1 1 3 2 3 2 2 1 1 0 1 1 0 2 1 2 1 0 3 1 3 1 3 2 1 1 1 0 2 1
 3 3 2 3 1 1 0 1 1 3 1 2 1 3 3 3 3 1 1 1 0 1 3 3 3 2 3 3 2 0 1 1 2 0 2 1 2
 1 3 2 1 3 3 2 1 2 1 1 3 2 3 1 2 1 1 1 3 1 0 2 2 3 0 3 1 0 3 3 0 1 1 1 2 3
 2 2 3 0 2 3 1 2 3 3 2 3 2 1 1 1 2 2 3 0 3 3 2 3 3 3 2 2 1 0 2 0 2 1 1 3 3
 2 3 2 1 3 2 3 3 2 2 1 2 2 0 0 0 1 3 1 2 3 1 1 2 1 3 3 2 1 0 1 3 3 1 2 0 2
 3 2 1 2 

In [8]:
rounded_labels = list(zip(*te_Y))[0]
cm = classification_report(rounded_labels, pred, target_names = ["no tumour", "meningioma_tumor", "glioma_tumor", "pituitary_tumor"])
print(cm)

                  precision    recall  f1-score   support

       no tumour       0.80      0.70      0.74        79
meningioma_tumor       0.73      0.72      0.72       174
    glioma_tumor       0.74      0.77      0.76       184
 pituitary_tumor       0.96      0.99      0.97       163

        accuracy                           0.81       600
       macro avg       0.81      0.79      0.80       600
    weighted avg       0.80      0.81      0.80       600



## Testing dataset


### Testing data pre-processing


In [9]:
testdir = './test'
test_images_dir = os.path.join(testdir,'image')
test_labels_filename = 'label.csv'

def categorical_labelling_testset(): 
    labels_file = open(os.path.join(testdir, test_labels_filename), 'r')
    lines = labels_file.readlines()
    tumor_labels = {line.split(',')[0] : (line.split(',')[1].strip()) for line in lines[1:]}

    for i in tumor_labels: 
        if tumor_labels[i] == 'no_tumor': 
            tumor_labels[i] = 0
        elif tumor_labels[i] == "meningioma_tumor":
            tumor_labels[i] = 1
        elif tumor_labels[i] == "glioma_tumor":
            tumor_labels[i] = 2
        elif tumor_labels[i] == "pituitary_tumor":
            tumor_labels[i] = 3
    return(tumor_labels)

In [10]:
def extract_features_testset():
    all_features = []
    all_labels = []
    
    labels = categorical_labelling_testset()

    image_paths = [os.path.join(test_images_dir, l) for l in os.listdir(test_images_dir)]
    if os.path.isdir(images_dir):
        all_features = []
        all_labels = []
        for img_path in image_paths:
            filename = img_path.split('/')[-1]
            img = imread(img_path, as_gray=True)
            features = np.reshape(img, (512*512))
            features.shape, features
            all_features.append(features)
            all_labels.append(labels[filename])
    np_features = np.array(all_features)
    np_labels = np.array(all_labels)
    return np_features, np_labels

### Testing


In [12]:
testing_x, testing_y = extract_features_testset()
testing_Y = np.array([testing_y, -(testing_y - 1)]).T

pred_testing = classifier.predict(testing_x)

print("Accuracy:", accuracy_score(list(zip(*testing_Y))[0], pred_testing))

rounded_testing_labels = list(zip(*testing_Y))[0]
c_report_testing = classification_report(rounded_testing_labels, pred_testing, target_names = ["no tumour", "meningioma_tumor", "glioma_tumor", "pituitary_tumor"])
print(c_report_testing)

Accuracy: 0.81
                  precision    recall  f1-score   support

       no tumour       0.79      0.73      0.76        37
meningioma_tumor       0.80      0.71      0.75        68
    glioma_tumor       0.73      0.86      0.79        43
 pituitary_tumor       0.91      0.96      0.93        52

        accuracy                           0.81       200
       macro avg       0.81      0.81      0.81       200
    weighted avg       0.81      0.81      0.81       200

