# Brain Tumor Detection
## Single Task Validation - Building multi-lable binary Classifier
Description
This dataset was originally created by Yousef Ghanem. To see the current project, which may have been updated since this version, please go here: https://universe.roboflow.com/yousef-ghanem-jzj4y/brain-tumor-detection-fpf1f.

This dataset is part of RF100, an Intel-sponsored initiative to create a new object detection benchmark for model generalizability.

Access the RF100 Github repo: https://github.com/roboflow-ai/roboflow-100-benchmark

## Imports

In [None]:
# Go to project root folder
import os
os.chdir("../")
%pwd

In [None]:
from tqdm.notebook import tqdm
from pathlib import Path
from dotenv import load_dotenv
load_dotenv()

import tensorflow as tf
tf.random.set_seed(42)

import matplotlib.pyplot as plt

In [None]:
found_gpu = tf.config.list_physical_devices('GPU')
if not found_gpu:
    raise Exception("No GPU found")
found_gpu, tf.__version__

In [None]:
from src.data_handler.data_loader import DataLoader
from src.data_handler.annotation_processor import AnnotationProcessor
from src.data_handler.preprocessor import Preprocessor

In [None]:
# auto reload dotenv 
%load_ext dotenv
%dotenv

# auto reload libs
%load_ext autoreload
%autoreload 2

## Paths Setup

In [None]:
from hydra import initialize, compose

# https://gist.github.com/bdsaglam/586704a98336a0cf0a65a6e7c247d248

with initialize(version_base=None, config_path="../conf"):
    cfg = compose(config_name="config")
    print(cfg.DATASET_DIRS.TRAIN_DIR)

In [None]:
cfg.DATASET_DIRS

In [None]:
DATASET_DIRS = Path(cfg.DATASET.DATASET_DIR)
TRAIN_DIR = Path(cfg.DATASET_DIRS.TRAIN_DIR)
VALIDATION_DIR = Path(cfg.DATASET_DIRS.VALIDATION_DIR)
TEST_DIR = Path(cfg.DATASET_DIRS.TEST_DIR)


IMG_SIZE = cfg.TRAIN.IMG_SIZE
BATCH_SIZE = cfg.TRAIN.BATCH_SIZE
LOG_DIR = cfg.OUTPUTS.LOG_DIR
CHECK_POINT_DIR = Path(cfg.OUTPUTS.CHECKPOINT_PATH)
CLASS_NAME = [
    'label0',
    'label1',
    'label2'
]
class_map = {k: v for k, v in enumerate(CLASS_NAME)}

NUM_EPOCHS = cfg.TRAIN.NUM_EPOCHS
LEARNING_RATE = cfg.TRAIN.LEARNING_RATE

NUM_CLASSES = len(CLASS_NAME)


## Dataset Download from Roboflow

In [None]:
if not TRAIN_DIR.exists():
    from roboflow import Roboflow
    rf = Roboflow()
    project = rf.workspace("roboflow-100").project("brain-tumor-m2pbp")
    version = project.version(2)
    dataset = version.download("tensorflow")      

## Load images from directory

### Load Training datasets

In [None]:
prepare_train_dataset = AnnotationProcessor(annotation_file= str(TRAIN_DIR/'_annotations.csv'))
_class_map = {v: k for k, v in enumerate(CLASS_NAME)}
train_images, train_class_ids, train_bboxes  = prepare_train_dataset.process_annotations(image_dir=TRAIN_DIR, class_id_map=_class_map)

len(train_images), len(train_class_ids), len(train_bboxes)

In [None]:
train_images[0],train_class_ids[0], train_bboxes[0]

In [None]:
import keras
from tensorflow.keras import layers

class DataLoader:
    def __init__(self, img_list:list[str], cls_id_list:list[list], bbx_list:list[list], num_classes=3):
        self.img_list = img_list
        self.cls_id_list = cls_id_list
        self.bbx_list = bbx_list
        self.num_classes = num_classes
        self.data_augmentation = tf.keras.Sequential([
            layers.RandomBrightness(0.1),
            layers.RandomContrast(0.1),
            layers.RandomSaturation(0.1),
            layers.RandomHue(0.1)
        ])

    
    def load_image(self, image_path) -> tf.Tensor:
        image = tf.io.read_file(image_path)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.cast(image, tf.float32) 
        return image

    def load_dataset(self, image, class_ids, bbox):
        tf_image = self.load_image(image)
        # multi_hot = tf.reduce_max(tf.one_hot(tf.cast(class_ids, tf.int32), self.num_classes), axis=0 )  # Shape: (NUM_CLASSES,)
        return  tf_image, bbox  #(multi_hot, bbox)
        # return  tf_image,  {'classes': multi_hot, 'boxes': tf.cast(bbox, tf.float32)}
    
    def _common_loader(self)->tf.data.Dataset:
        padded_class_ids, padded_bbx = self.pad_cls_id_bbx()
        datasets = tf.data.Dataset.from_tensor_slices((self.img_list, padded_class_ids, padded_bbx))
        ds = datasets.map(self.load_dataset, num_parallel_calls=tf.data.AUTOTUNE) 
        return ds
    
    def load_train_dataset(self)->tf.data.Dataset:
        ds = self._common_loader()
        ds =  ds.map(lambda x, y: (self.data_augmentation(x),y), num_parallel_calls=tf.data.AUTOTUNE)
        return ds.shuffle(buffer_size=ds.cardinality().numpy())

    def load_val_dataset(self) ->tf.data.Dataset:
        ds = self._common_loader()
        return ds

    def pad_cls_id_bbx(self):
        """
        Pads class id and bounding box lists to the length of the longest in the batch.
        
        Args:
            class_id_list (list): List of class ids.
            bbox_list (list): List of bounding boxes.
        
        Returns:
            tuple: Padded class id list and padded bounding box list.
        """
        
        padded_class_ids = keras.preprocessing.sequence.pad_sequences(self.cls_id_list, padding='post', dtype='int32')
        padded_bbx = keras.preprocessing.sequence.pad_sequences(self.bbx_list, padding='post', dtype='float32')
        
        return padded_class_ids, padded_bbx

In [None]:
train_dl = DataLoader(train_images, train_class_ids, train_bboxes)
train_ds = train_dl.load_train_dataset()
train_ds = Preprocessor(train_ds).preprocess()
train_ds = train_ds.repeat(3).batch(BATCH_SIZE)\
                .prefetch(tf.data.AUTOTUNE)

In [None]:
for batch in train_ds.take(1):
    image, cls = batch
    print(cls.shape)
    print(image.shape)
    print(image[1].numpy().min(), image[1].numpy().max())


### Validation datasets 

In [None]:
prepare_valid_dataset = AnnotationProcessor(annotation_file= str(VALIDATION_DIR/'_annotations.csv'))

valid_image_paths, valid_class_ids, valid_bboxes  = prepare_valid_dataset.process_annotations(image_dir=VALIDATION_DIR, class_id_map=_class_map)
len(valid_image_paths), len(valid_class_ids), len(valid_bboxes)

In [None]:
valid_dl = DataLoader(valid_image_paths, valid_class_ids, valid_bboxes).load_val_dataset()
valid_ds = Preprocessor(valid_dl).preprocess()
valid_ds = valid_ds.batch(BATCH_SIZE)\
                .prefetch(tf.data.AUTOTUNE)

In [None]:
for batch in valid_ds.take(1):
    image, cls, = batch
    print(cls.shape)
    print(image.shape)
    print(image[1].numpy().min(), image[1].numpy().max())

## Training Setup

In [None]:
import tensorflow_addons as tfa
METRICS = [
        tfa.image.iou,
        tf.keras.metrics.MeanSquaredError(),
        tf.keras.metrics.MeanAbsoluteError()
]

### Define  Callbacks

In [None]:
import os

to_monitor = 'val_loss'
mode = 'min'
callbacks = [
    tf.keras.callbacks.ReduceLROnPlateau(factor=0.1, 
                                            patience=5, 
                                            monitor=to_monitor,
                                            mode=mode,
                                            min_lr=1e-6,
                                            verbose=1),

    tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(str(CHECK_POINT_DIR), "regressor_ckpt_{epoch}.keras") ,
                                        save_weights_only=False,
                                        save_best_only=True,
                                        monitor=to_monitor,
                                        mode=mode,
                                        verbose=1),
                                        
    tf.keras.callbacks.EarlyStopping(monitor=to_monitor, 
                                    patience=10,
                                    mode=mode, 
                                    restore_best_weights=True),

]

### Define Optimizer

In [None]:
optimizer=tf.keras.optimizers.SGD(learning_rate=LEARNING_RATE)

## Define ResNet50 Model Classifier

In [None]:
from src.models.resnet50 import resnet50_regressor
tf.keras.backend.clear_session()
model = resnet50_regressor(input_shape=(IMG_SIZE,IMG_SIZE,3), num_classes=NUM_CLASSES)

# model.summary()

## Model Building and Compilation

In [None]:
model.compile(
    optimizer=optimizer,
    loss= tf.keras.losses.Huber(),
    metrics=METRICS)  # Use IoU metric

## Train and Validate the model

In [None]:
EPOCHS = 50

In [None]:
import mlflow

mlflow.set_experiment("/brain-tumor-resnet50_regressor")
mlflow.tensorflow.autolog(log_models=True, log_datasets=False)

history = model.fit(
    train_ds,
    epochs=EPOCHS,
    validation_data=valid_ds,
    batch_size=BATCH_SIZE,
    # class_weight=class_weight_dict,
    callbacks=[callbacks],
)

## Training Visualization

In [None]:
import matplotlib.pyplot as plt

def visualize_training_results(history):
    """
    Visualizes training and validation loss, and training and validation accuracy.

    Args:
        history: A dictionary or object containing training history data.
                 For example, a Keras History object or a dictionary with keys:
                 'loss', 'val_loss', 'accuracy', 'val_accuracy'.
    """

    if isinstance(history, dict):
        # Assumes history is a dictionary
        loss = history.get('loss')
        val_loss = history.get('val_loss')
        accuracy = history.get('accuracy')
        val_accuracy = history.get('val_accuracy')
    else:
        # Assumes history is a Keras History object or similar
        loss = history.history.get('loss')
        val_loss = history.history.get('val_loss')
        accuracy = history.history.get('accuracy')
        val_accuracy = history.history.get('val_accuracy')

    if loss and val_loss:
        epochs = range(1, len(loss) + 1)

        plt.figure(figsize=(12, 5))

        # Plot training & validation loss values
        plt.subplot(1, 2, 1)
        plt.plot(epochs, loss, 'r', label='Training loss')
        plt.plot(epochs, val_loss, 'b', label='Validation loss')
        plt.title('Training and validation loss')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend()

    if accuracy and val_accuracy:
        if not (loss and val_loss):
          plt.figure(figsize=(12, 5))
        else:
          plt.subplot(1, 2, 2)
        # Plot training & validation accuracy values
        plt.plot(epochs, accuracy, 'r', label='Training accuracy')
        plt.plot(epochs, val_accuracy, 'b', label='Validation accuracy')
        plt.title('Training and validation accuracy')
        plt.xlabel('Epochs')
        plt.ylabel('Accuracy')
        plt.legend()

    plt.tight_layout() #prevents overlapping titles/labels
    plt.show()


In [None]:
history.history.keys()

In [None]:
visualize_training_results(history.history)

## Model Evaluation

### Testing Datasets setup

In [None]:

prepare_test_dataset = AnnotationProcessor(annotation_file= str(TEST_DIR/'_annotations.csv'))
_class_map = {v: k for k, v in enumerate(CLASS_NAME)}
test_image_paths, test_class_ids, test_bboxes = prepare_test_dataset.process_annotations(image_dir=TEST_DIR, class_id_map=_class_map)

len(test_image_paths), len(test_class_ids), len(test_bboxes)

In [None]:
test_dl = DataLoader(test_image_paths, test_class_ids, test_bboxes).load_val_dataset()
test_ds = Preprocessor(test_dl).preprocess()
test_ds = test_ds.batch(BATCH_SIZE)\
                .prefetch(tf.data.AUTOTUNE)

In [None]:
results = model.evaluate(test_ds, return_dict=True, steps=1)
print("Testing accuracy: ", results)

In [None]:
results

In [None]:
from sklearn.metrics import classification_report
import numpy as np

y_true = test_class_ids
y_pred = model.predict(test_ds)


In [None]:
y_pred = (y_pred>0.5).astype(int)
y_pred

In [None]:
from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer()
y_true_bin = mlb.fit_transform(y_true)
y_pred_bin = mlb.transform(y_pred) #use transform, not fit_transform
y_true_bin,y_pred_bin

In [None]:
print(classification_report(y_true_bin, y_pred_bin, labels=[0,1,2], target_names=CLASS_NAME))

In [None]:
from sklearn import metrics
auc_roc_values = []
fig, axs = plt.subplots(1)
for i in range(len(test_class_ids)):
    try:
        roc_score_per_label = metrics.roc_auc_score(y_true=y_true[:,i], y_score=y_pred_bin[:,i])
        auc_roc_values.append(roc_score_per_label)
        fpr, tpr, _ = metrics.roc_curve(y_true=y_true[:,i],  y_score=y_pred_bin[:,i])
        
        axs.plot([0,1], [0,1], 'k--')
        axs.plot(fpr, tpr, 
                label=f'{CLASS_NAME[i]} - AUC = {round(roc_score_per_label, 3)}')

        axs.set_xlabel('False Positive Rate')
        axs.set_ylabel('True Positive Rate')
        axs.legend(loc='lower right')
    except:
        print(
            f"Error in generating ROC curve for {CLASS_NAME[i]}. "
            f"Dataset lacks enough examples."
        )
plt.savefig(f"{cfg.OUTPUTS.OUPUT_DIR}/ROC-Curve.png")
mlflow.log_figure(fig, 'ROC-Curve.png')
results = model.evaluate(test_ds, verbose=0,return_dict=True)
mlflow.log_metrics(results)