In [1]:
import os

import matplotlib.pyplot as plt
import numpy as np
import cv2
import random

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D

from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

In [2]:
dataset_loc = 'dataset'
IMG_SIZE = 250

In [3]:
categories = ['glioma_tumor', 'meningioma_tumor', 'no_tumor', 'pituitary_tumor']

In [4]:
training_data = []
training_path = os.path.join(dataset_loc, 'Training') 

for category in categories:
    cancer_path = os.path.join(training_path, category)
    category_idx = categories.index(category)
    for img_file in os.listdir(cancer_path):
        try:
            img_array = cv2.imread(os.path.join(cancer_path, img_file), cv2.IMREAD_GRAYSCALE) # convert image to gray scale
            img_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE)) # all images same size
            img_array = tf.keras.utils.normalize(img_array)

            training_data.append([img_array, category_idx])
        except:
            pass

In [5]:
validation_data = []
validation_path = os.path.join(dataset_loc, 'Testing') 

for category in categories:
    cancer_path = os.path.join(validation_path, category)
    category_idx = categories.index(category)
    for img_file in os.listdir(cancer_path):
        try:
            img_array = cv2.imread(os.path.join(cancer_path, img_file), cv2.IMREAD_GRAYSCALE) # convert image to gray scale
            img_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE)) # all images same size
            img_array = tf.keras.utils.normalize(img_array)
            
            validation_data.append([img_array, category_idx])
        except:
            pass

In [6]:
random.shuffle(training_data)
random.shuffle(validation_data)

In [7]:
X_train = [pair[0] for pair in training_data]
y_train = [pair[1] for pair in training_data]

X_validation = [pair[0] for pair in validation_data]
y_validation = [pair[1] for pair in validation_data]

In [8]:
X_train = np.array(X_train) # training and validation sets must be converted to np_arrays for cnn model
y_train = np.array(y_train)

X_validation = np.array(X_validation)
y_validation = np.array(y_validation)

In [9]:
X_train = X_train.reshape(X_train.shape[0], -1)
X_validation = X_validation.reshape(X_validation.shape[0], -1)

In [10]:
print(np.shape(X_train))
print(np.shape(y_train))

(2870, 62500)
(2870,)


In [11]:
# Create Keras model
model = RandomForestClassifier(n_estimators=200)

model.fit(X_train, y_train)

In [12]:
score = model.score(X_validation, y_validation)

In [13]:
print(score)

0.682741116751269
