<a href="https://colab.research.google.com/github/tukun29/BrainMRI_Classifier/blob/main/Brain_MRI_Classification_Final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#IMPORTS
'''
  # Deep learning framework (e.g., TensorFlow/Keras or PyTorch)
   # Image processing libraries
   # Data handling (e.g., NumPy, pandas)
'''
import numpy as np # used for numeral operations on the images as arrays
import os #allows the use of files
import pandas as pd #matched images to their labels
import tensorflow as tf #source of all AI/neural network methods

from tensorflow.keras.preprocessing.image import ImageDataGenerator #for preprcessing
from tensorflow.keras.models import Sequential #stacks layers of the CNN linearly
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout #used to build the model
from google.colab import drive #allows me to bring in the images from my drive
drive.mount('/content/drive')
import cv2 # does image processing tasks such as resizing
import matplotlib.pyplot as plt # plots the models accuracy and other parameters
import random # allows us to shufffle the data so it gets randomized


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#PREPROCESSING - normalizing the data as much as possible to highlight key features
 # (ex, resizing all the pics to the same dimension
 #  so the CNN does not get confused and learns wrong)


# directories - setting up file paths as one variable so its easier
# I won't have to keep writing the entire path
train_dir = '/content/drive/MyDrive/Brain_CNN_Data/Training/'
test_dir = '/content/drive/MyDrive/Brain_CNN_Data/Testing/'
categories = ['notumor', 'glioma', 'meningioma', 'pituitary']

training_data = []
#assigns a number to each label in category
# each image is then pulled from drive then assigned the correct number
# (ex. all notumor images get 0)
# they all get resized to the same dimensions
# then added to one large dataset of images that will be used for the training of the model
for label, category in enumerate(categories): #assigns a number to each label in categores
    path = os.path.join(test_dir, category)
    for img in os.listdir(path):
        img_array = cv2.imread(os.path.join(path, img)) #pulls image from drive
        resized_img = cv2.resize(img_array, (128, 128)) # resizes
        training_data.append([resized_img, label]) # adds it to the list

# same as training images excpet pulling from a different file with pictures
# meant specifically for testing
# the model won't use these pics for learning

testing_data = []
for label, category in enumerate(categories):
    path = os.path.join(test_dir, category)
    for img in os.listdir(path):
        img_array = cv2.imread(os.path.join(path, img))
        resized_img = cv2.resize(img_array, (128, 128))
        testing_data.append([resized_img, label])

#shuffling the data sets every run so the model gets new combos everytime
random.shuffle(training_data)
random.shuffle(testing_data)





In [None]:
#BUILD_CNN - using the imports from keras.layers to build the model

# Sequential establishes the linear stacking of all the layers of the CNN

model = Sequential([
    # Conv2D (sets the number of filters for a layer, the dimesions of the filter, activiation function, size of the full image)
      # each filter is associated with a feature/trait that the filter will look for as it moves acroos the picture
      # activation function is the mathematic function the model uses to determine the weightage a filter gives its feature
        # "relu" refers to rectified linear unit, a commonly used activation function
    # MaxPooling2D
        # As the filters move throguh the image, max pooling changes their size to be smaller and focus on the most important features
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(), # literally flattens the 2D "feature maps" (arrays) into a 1D connected layer
    Dense(128, activation='relu'), #puts all the filter weightages together into one complex feature map
    Dropout(0.5), # drops nodes from the model so it doesn't get too reliant on haveing multiple simple nodes, we want a few complex ones thanks to the Dense method
    Dense(4, activation='softmax')
])



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
#COMPILE_CNN
from tensorflow.keras.utils import to_categorical

# Convert the images in training_data to  arraya since the model can't directly take image objects
X_train = np.array([x[0] for x in training_data])
y_train = np.array([x[1] for x in training_data])

# Convert testing_data the same way
X_test = np.array([x[0] for x in testing_data])
y_test = np.array([x[1] for x in testing_data])

# Normalize images but setting their values to 0 or 1 since pixels in images have
# values from 0-255, makes it easier for the model
X_train = X_train / 255.0
X_test = X_test / 255.0

# One-hot encode labels, which helpts the loss function (seen below) work
# since this is a categorical CNN, each category has a label  made of 0 and 1
# ex. 0010 or 1000
y_train = to_categorical(y_train, num_classes=4)
y_test = to_categorical(y_test, num_classes=4)

# Now compile the model we defined earlier and train
   # optimizer helps adjusts the weights as well
   # loss function, another type of mathematical function (like activation), but its puprose is to address how wrong the model is overall
   # metrics is set to accuracy so the model knows to track its value
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# runs the model with training data and checks it with the testing data as arrays
# epochs set to 25, meaning it will pas through the trainig data 25 times
model.fit(X_train, y_train, epochs=25, validation_data=(X_test, y_test))

Epoch 1/25
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 1s/step - accuracy: 0.3974 - loss: 1.3147 - val_accuracy: 0.6631 - val_loss: 0.8213
Epoch 2/25
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 1s/step - accuracy: 0.6639 - loss: 0.8084 - val_accuracy: 0.7525 - val_loss: 0.5932
Epoch 3/25
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 1s/step - accuracy: 0.7485 - loss: 0.6407 - val_accuracy: 0.8002 - val_loss: 0.5065
Epoch 4/25
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 1s/step - accuracy: 0.8063 - loss: 0.4932 - val_accuracy: 0.8728 - val_loss: 0.3532
Epoch 5/25
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 1s/step - accuracy: 0.8287 - loss: 0.4572 - val_accuracy: 0.8834 - val_loss: 0.3311
Epoch 6/25
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 1s/step - accuracy: 0.8390 - loss: 0.3722 - val_accuracy: 0.9281 - val_loss: 0.2441
Epoch 7/25
[1m42/42[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7e972775d990>

In [None]:
#SAVE_CNN