**Author :** Vaibhav Thakur

In [3]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import warnings

# Suppress all warnings
warnings.filterwarnings("ignore")

# Define the paths
train_dir = '/Users/sundaramvaibhav/Documents/MRI_Dataset/train'
test_dir = '/Users/sundaramvaibhav/Documents/MRI_Dataset/test'

# Create the ImageDataGenerator object for data augmentation and preprocessing
train_datagen = ImageDataGenerator(
    rescale = 1./255,
    shear_range = 0.4,
    zoom_range = 0.2,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    rotation_range = 90,
    horizontal_flip = True,
    fill_mode = 'nearest'
)
test_datagen = ImageDataGenerator(rescale = 1./255)

# Load the images and labels, ensuring grayscale (1 color channel)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size = (128, 128),     # Resize the images to 128 x 128
    color_mode = 'grayscale',     # we have not used batch_size, so keras by default chooses batch_size = 32
    class_mode = 'categorical'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size = (128,128),
    color_mode = 'grayscale',     # Load as grayscale
    class_mode = 'categorical',
)

    

Found 10240 images belonging to 4 classes.
Found 1279 images belonging to 4 classes.


**CNN MODEL**

In [2]:
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import Flatten
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Dropout
import numpy as np

In [3]:
# Define the CNN model
CNN1 = Sequential()

# Convolutional layers
CNN1.add(Conv2D(256, (3,3), activation = 'relu', input_shape = (128, 128, 1)))
CNN1.add(MaxPooling2D(pool_size = (2,2)))

CNN1.add(Conv2D(64, (3,3), activation = 'relu'))
CNN1.add(MaxPooling2D(pool_size = (2,2)))

CNN1.add(Conv2D(256, (3,3), activation = 'relu'))
CNN1.add(MaxPooling2D(pool_size = (2,2)))

CNN1.add(Conv2D(128, (3,3), activation = 'relu'))
CNN1.add(MaxPooling2D(pool_size = (2,2)))

CNN1.add(Conv2D(256, (3,3), activation = 'relu'))
CNN1.add(MaxPooling2D(pool_size = (2,2)))

# Flattening the layers before feeding into dense layers
CNN1.add(Flatten())

# Fully connected layers
CNN1.add(Dense(128, activation = 'relu'))
CNN1.add(Dropout(0.2)) # Dropout to prevent overfitting
CNN1.add(Dense(64, activation = 'relu'))

# Output layer (adjust number of units to match the number of classes)
CNN1.add(Dense(4, activation = 'softmax')) # Assuming 4 classes based on the dataset

# Compile the model
CNN1.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

# Model summary
CNN1.summary()

2026-02-10 20:04:47.294911: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1
2026-02-10 20:04:47.294962: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2026-02-10 20:04:47.294977: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2026-02-10 20:04:47.295005: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2026-02-10 20:04:47.295025: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [4]:
CNN1.fit(train_generator, epochs = 50) # we can also defiine batch_size here

Epoch 1/50


2026-02-10 20:04:59.283021: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 248ms/step - accuracy: 0.2542 - loss: 1.3877
Epoch 2/50
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 245ms/step - accuracy: 0.2476 - loss: 1.3870
Epoch 3/50
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 245ms/step - accuracy: 0.2460 - loss: 1.3867
Epoch 4/50
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 249ms/step - accuracy: 0.2528 - loss: 1.3867
Epoch 5/50
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 252ms/step - accuracy: 0.2514 - loss: 1.3867
Epoch 6/50
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 252ms/step - accuracy: 0.2507 - loss: 1.3866
Epoch 7/50
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 248ms/step - accuracy: 0.2412 - loss: 1.3866
Epoch 8/50
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 250ms/step - accuracy: 0.2527 - loss: 1.3864
Epoch 9/50
[1m320/320[0m 

<keras.src.callbacks.history.History at 0x319729f30>

In [6]:
# Evaluate the model on the test data
loss, accuracy1 = CNN1.evaluate(test_generator)

# Print the accuracy
print(f"Test accuracy of CNN1: {accuracy1}")
         

[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 53ms/step - accuracy: 0.5004 - loss: 1.3803
Test accuracy of CNN1: 0.5003909468650818


**With Batch Normalization**


In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization

# Define the CNN model
CNN1 = Sequential()

# Convolutional Block 1
CNN1.add(Conv2D(256, (3,3), activation = 'relu', input_shape = (128, 128, 1)))
CNN1.add(BatchNormalization()) # Added Batch Normalization
CNN1.add(MaxPooling2D(pool_size = (2,2)))

# Convolutional Block 2
CNN1.add(Conv2D(64, (3,3), activation = 'relu', input_shape = (128, 128, 1)))
CNN1.add(BatchNormalization()) # Added Batch Normalization
CNN1.add(MaxPooling2D(pool_size = (2,2)))

# Convolutional Block 3
CNN1.add(Conv2D(256, (3,3), activation = 'relu', input_shape = (128, 128, 1)))
CNN1.add(BatchNormalization()) # Added Batch Normalization
CNN1.add(MaxPooling2D(pool_size = (2,2)))

# Convolutional Block 4
CNN1.add(Conv2D(128, (3,3), activation = 'relu', input_shape = (128, 128, 1)))
CNN1.add(BatchNormalization()) # Added Batch Normalization
CNN1.add(MaxPooling2D(pool_size = (2,2)))

# Convolutional Block 5
CNN1.add(Conv2D(256, (3,3), activation = 'relu', input_shape = (128, 128, 1)))
CNN1.add(BatchNormalization()) # Added Batch Normalization
CNN1.add(MaxPooling2D(pool_size = (2,2)))

# Flatten before Dense layers
CNN1.add(Flatten())

# Fully Connected Layers
CNN1.add(Dense(128, activation = 'relu'))
CNN1.add(BatchNormalization())  # Optional BN before dropout
CNN1.add(Dropout(0.2))

CNN1.add(Dense(64, activation = 'relu'))
CNN1.add(BatchNormalization())  # Optional BN before dropout
CNN1.add(Dropout(0.2))

# Output Layer
CNN1.add(Dense(4, activation = 'softmax')) # Assuming 4 classes

# Compile the model
CNN1.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

# Model summary
CNN1.summary()


In [9]:
CNN1.fit(train_generator, epochs = 2)
# Only 2 epochs to save time and compute

Epoch 1/2
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m188s[0m 575ms/step - accuracy: 0.4096 - loss: 1.3547
Epoch 2/2
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m190s[0m 595ms/step - accuracy: 0.5792 - loss: 0.9806


<keras.src.callbacks.history.History at 0x371f69090>

In [10]:
# Evaluate the model on the test data
loss, accuracy1 = CNN1.evaluate(test_generator)

# Print the accuracy
print(f"Test accuracy of CNN1 with batch normalization: {accuracy1}")
         

[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 92ms/step - accuracy: 0.3503 - loss: 1.6848
Test accuracy of CNN1 with batch normalization: 0.35027363896369934


**With L1 regularization**

In [1]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras import regularizers

# Define the CNN model
CNN1 = Sequential()

# Convolutional layers with L1 regularization
CNN1.add(Conv2D(256, (3, 3), activation = 'relu', input_shape = (128, 128, 1),
               kernel_regularizer = regularizers.l1(0.001)))
CNN1.add(MaxPooling2D(pool_size = (2,2)))

CNN1.add(Conv2D(64, (3, 3), activation = 'relu', input_shape = (128, 128, 1),
               kernel_regularizer = regularizers.l1(0.001)))
CNN1.add(MaxPooling2D(pool_size = (2,2)))

CNN1.add(Conv2D(256, (3, 3), activation = 'relu', input_shape = (128, 128, 1),
               kernel_regularizer = regularizers.l1(0.001)))
CNN1.add(MaxPooling2D(pool_size = (2,2)))

CNN1.add(Conv2D(128, (3, 3), activation = 'relu', input_shape = (128, 128, 1),
               kernel_regularizer = regularizers.l1(0.001)))
CNN1.add(MaxPooling2D(pool_size = (2,2)))

CNN1.add(Conv2D(256, (3, 3), activation = 'relu', input_shape = (128, 128, 1),
               kernel_regularizer = regularizers.l1(0.001)))
CNN1.add(MaxPooling2D(pool_size = (2,2)))

# Flatten before Dense layers
CNN1.add(Flatten())

# Fully Connected Layers with L1 regularization
CNN1.add(Dense(128, activation = 'relu',
              kernel_regularizer = regularizers.l1(0.001)))
CNN1.add(Dropout(0.2))

CNN1.add(Dense(64, activation = 'relu',
              kernel_regularizer = regularizers.l1(0.001)))
CNN1.add(Dropout(0.2))

# Output Layer
CNN1.add(Dense(4, activation = 'softmax')) # Assuming 4 classes

# Compile the model
CNN1.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

# Model summary
CNN1.summary()


2026-02-10 22:41:24.149721: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1
2026-02-10 22:41:24.149743: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2026-02-10 22:41:24.149748: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2026-02-10 22:41:24.149767: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2026-02-10 22:41:24.149776: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [4]:
CNN1.fit(train_generator, epochs = 2)

Epoch 1/2


2026-02-10 22:42:11.506549: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 244ms/step - accuracy: 0.2485 - loss: 3.0018
Epoch 2/2
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 253ms/step - accuracy: 0.2433 - loss: 1.5129


<keras.src.callbacks.history.History at 0x30d16c700>

In [5]:
# Evaluate the model on the test data
loss, accuracy1 = CNN1.evaluate(test_generator)

# Print the accuracy
print(f"Test accuracy of CNN1 with L1 regularization: {accuracy1}")
         

[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 49ms/step - accuracy: 0.0094 - loss: 1.5146
Test accuracy of CNN1 with L1 regularization: 0.009382329881191254


For L2, replace l1 with l2 in the above code

**With Batch regularization, L2 regualrization, Dropout**

In [9]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, Activation
from tensorflow.keras import regularizers
from tensorflow.keras.optimizers import Adam

CNN1 = Sequential()


# - - - - Block 1 - - - -
CNN1.add(Conv2D(256, (3, 3), input_shape = (128, 128, 1),
                kernel_regularizer = regularizers.l2(0.001)))
CNN1.add(BatchNormalization())
CNN1.add(Activation('relu'))
CNN1.add(MaxPooling2D(pool_size = (2,2)))
CNN1.add(Dropout(0.25)) # small dropout after conv/pool (optional)


# - - - - Block 2 - - - -
CNN1.add(Conv2D(64, (3, 3), input_shape = (128, 128, 1),
                kernel_regularizer = regularizers.l2(0.001)))
CNN1.add(BatchNormalization())
CNN1.add(Activation('relu'))
CNN1.add(MaxPooling2D(pool_size = (2,2)))
CNN1.add(Dropout(0.25)) 


# - - - - Block 3 - - - -
CNN1.add(Conv2D(256, (3, 3), input_shape = (128, 128, 1),
                kernel_regularizer = regularizers.l2(0.001)))
CNN1.add(BatchNormalization())
CNN1.add(Activation('relu'))
CNN1.add(MaxPooling2D(pool_size = (2,2)))
CNN1.add(Dropout(0.25)) 

# - - - - Block 4 - - - -
CNN1.add(Conv2D(128, (3, 3), input_shape = (128, 128, 1),
                kernel_regularizer = regularizers.l2(0.001)))
CNN1.add(BatchNormalization())
CNN1.add(Activation('relu'))
CNN1.add(MaxPooling2D(pool_size = (2,2)))
CNN1.add(Dropout(0.25)) 

# - - - - Block 5 - - - -
CNN1.add(Conv2D(256, (3, 3), input_shape = (128, 128, 1),
                kernel_regularizer = regularizers.l2(0.001)))
CNN1.add(BatchNormalization())
CNN1.add(Activation('relu'))
CNN1.add(MaxPooling2D(pool_size = (2,2)))
CNN1.add(Dropout(0.25)) 

# - - - - Classifier - - - - 
CNN1.add(Flatten())

CNN1.add(Dense(128, kernel_regularizer = regularizers.l2(0.001)))
CNN1.add(BatchNormalization())
CNN1.add(Activation('relu'))
CNN1.add(Dropout(0.5))  # Higher dropout on dense layers

CNN1.add(Dense(64, kernel_regularizer = regularizers.l2(0.001)))
CNN1.add(BatchNormalization())
CNN1.add(Activation('relu'))
CNN1.add(Dropout(0.5))  

CNN1.add(Dense(4, activation = 'softmax'))

# Compile with slightly lower LR (pairs well with droout/BN)
CNN1.compile(optimizer = Adam(learning_rate = 0.0004),
             loss = 'categorical_crossentropy',
             metrics = ['accuracy'])

CNN1.summary()

In [10]:
CNN1.fit(train_generator, epochs = 2)

Epoch 1/2
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m201s[0m 618ms/step - accuracy: 0.2828 - loss: 2.5458
Epoch 2/2
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 632ms/step - accuracy: 0.3534 - loss: 2.2521


<keras.src.callbacks.history.History at 0x36cf2bc70>

In [11]:
# Evaluate the model on the test data
loss, accuracy1 = CNN1.evaluate(test_generator)

# Print the accuracy
print(f"Test accuracy of CNN1 model: {accuracy1}")

[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 95ms/step - accuracy: 0.5059 - loss: 1.8347
Test accuracy of CNN1 model: 0.5058639645576477
