<a href="https://colab.research.google.com/github/tylaar1/PICAR-autopilot/blob/main/GRADCAM_MobNetV3_DUAL_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split
from sklearn.metrics import balanced_accuracy_score
import matplotlib.pyplot as plt

In [None]:
# makes it so pd dfs aren't truncated

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
labels_file_path = '/content/drive/MyDrive/machine-learning-in-science-ii-2025/training_norm.csv' # tylers file path
#labels_file_path = '/home/apyba3/KAGGLEDATAmachine-learning-in-science-ii-2025/training_norm.csv' # ben hpc file path (mlis2 cluster)
#labels_file_path = '/home/ppytr13/machine-learning-in-science-ii-2025/training_norm.csv' # ben hpc file path (mlis2 cluster)
labels_df = pd.read_csv(labels_file_path, index_col='image_id')

In [None]:
#image_folder_path = '/home/apyba3/KAGGLEDATAmachine-learning-in-science-ii-2025/training_data/training_data' # bens hpc file path
image_folder_path = '/content/drive/MyDrive/machine-learning-in-science-ii-2025/training_data/training_data' # tylers file path
#image_folder_path = '/home/ppytr13/machine-learning-in-science-ii-2025/training_data/training_data' # bens hpc file path
image_file_paths = [
    os.path.join(image_folder_path, f)
    for f in os.listdir(image_folder_path)
    if f.lower().endswith(('.png', '.jpg', '.jpeg'))
]

image_file_paths.sort(key=lambda x: int(os.path.splitext(os.path.basename(x))[0])) # sorts the files in the right order (1.png, 2.png, 3.png, ...)

imagefilepaths_df = pd.DataFrame(
    image_file_paths,
    columns=['image_file_paths'],
    index=[int(os.path.splitext(os.path.basename(path))[0]) for path in image_file_paths]
)

imagefilepaths_df.index.name = 'image_id'

In [None]:
merged_df = pd.merge(labels_df, imagefilepaths_df, on='image_id', how='inner')
merged_df['speed'] = merged_df['speed'].round(6) # to get rid of floating point errors

In [None]:
cleaned_df = merged_df[merged_df['speed'] != 1.428571]
cleaned_df.loc[3882:3886]

Unnamed: 0_level_0,angle,speed,image_file_paths
image_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1


In [None]:
def process_image(image_path, resized_shape=(224, 224)):
    # Load and preprocess the image
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, resized_shape)
    image = image / 255.0  # Normalize pixel values to [0,1]
    return image

# Creating the dataset
dataset = tf.data.Dataset.from_tensor_slices(
    (cleaned_df["image_file_paths"], cleaned_df["angle"], cleaned_df["speed"])
)  # Convert pandas DataFrame to a tf.data.Dataset

# Apply the map function to process images and format labels
dataset = dataset.map(
    lambda image_path, angle, speed: (
        process_image(image_path),  # Process the image
        {"classification": angle, "regression": speed}  # Format labels as a dictionary
    )
)

dataset = dataset.cache()
dataset = dataset.shuffle(len(cleaned_df))
dataset = dataset.batch(32)
dataset = dataset.prefetch(tf.data.AUTOTUNE)

lets check and see if what we have done works

In [None]:
# 80-20 split

dataset_size = tf.data.experimental.cardinality(dataset).numpy()
train_size = int(0.8 * dataset_size)

train_dataset = dataset.take(train_size)
validation_dataset = dataset.skip(train_size)

augmenting so the images can be uploaded to vscode via cuda

In [None]:
def augment_image(image, label):
  seed = (6, 9)
  image = tf.image.stateless_random_brightness(image, 0.2, seed)
  image = tf.image.stateless_random_contrast(image, 0.8, 1.2, seed)
  image = tf.image.stateless_random_hue(image, 0.2, seed)
  image = tf.image.stateless_random_saturation(image, 0.8, 1.2, seed)
  return image, label

augmented_dataset = train_dataset.map(augment_image, num_parallel_calls=tf.data.AUTOTUNE)
train_dataset = train_dataset.concatenate(augmented_dataset)
train_dataset = train_dataset.shuffle(buffer_size=len(cleaned_df))

In [None]:
dropoutrate = 0.2
num_classes = 1 # we're predicting the prob of the class with a relu so that it has non-linearity
input_shape = (224,224,3)

mbnet = tf.keras.applications.MobileNetV3Large(
    input_shape=input_shape,
    include_top=False,
    weights='imagenet',
    minimalistic=False
)

input_layer = tf.keras.Input(shape=(224, 224, 3))

x = mbnet(input_layer)


x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(dropoutrate)(x)


x = tf.keras.layers.Dense(256, activation='relu')(x)
x = tf.keras.layers.Dropout(dropoutrate)(x)
x = tf.keras.layers.Dense(128, activation='relu')(x)
x = tf.keras.layers.Dropout(dropoutrate)(x)
x = tf.keras.layers.Dense(64, activation='relu')(x)
x = tf.keras.layers.Dropout(dropoutrate)(x)
x = tf.keras.layers.Dense(32, activation='relu')(x)

#split outputs to predict speed and angle
classification_output = tf.keras.layers.Dense(num_classes, activation='sigmoid', name="classification")(x)
regression_output = tf.keras.layers.Dense(1, activation='linear', name="regression")(x)

#combine both outputs
model = tf.keras.Model(inputs=input_layer, outputs=[classification_output, regression_output])

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss={'classification': 'binary_crossentropy', 'regression': 'mse'},
              metrics={'classification': 'accuracy', 'regression': 'mse'})


model.summary()

model.build(input_layer)

mbnet.trainable = False

model.summary()

In [None]:
history = model.fit(train_dataset.take(1),
                    epochs=1,
                    batch_size=32,
                    validation_data=validation_dataset.take(1))

In [None]:
#model.save_weights('/home/apyba3/car_frozen_regression.weights.h5')
model.save_weights('/home/ppytr13/car_frozen_regression.weights.h5')

In [None]:
tf.keras.backend.clear_session() #Clear keras session

### 3e) save predictions to a file inside kaggle (to then later send to the supercomputer cloud)

In [None]:
#predictions_df.to_csv('/home/apyba3/mbnetv3_angleregression_predictions.csv')
predictions_df.to_csv('/home/ppytr13/mbnetv3_dual_predictions.csv')

## instead - convert to tf lite (chatgpt code - not tested yet)

In [None]:
# Define the converter
converter = tf.lite.TFLiteConverter.from_keras_model(model)

# Enable optimizations for smaller size and faster inference
converter.optimizations = [tf.lite.Optimize.DEFAULT]

# If your inputs have fixed shapes, specify them for further optimization
converter.target_spec.supported_types = [tf.float16]  # Optional: FP16 for faster inference

# Convert the model
tflite_model = converter.convert()

# Save the TFLite model to disk
with open('model.tflite', 'wb') as f:
    f.write(tflite_model)

# Guided GradCAM
Below are the basic steps that I did to set up a GUided gradCAM


1.   Extract Gradients: first compute gradients of the predicted output w.r.t. the final convolutional layer.
2.   the i Computed the importance Weights: Average the gradients spatially to obtain neuron importance.

1.   Generate Heatmap: Compute a weighted sum of feature maps and apply ReLU.
2.   Overlay on Image: Blend the heatmap with the original image for visualisation.







In [None]:
import tensorflow as tf
import numpy as np
import cv2
import matplotlib.pyplot as plt
from tensorflow.keras.models import Model

# baisc function to compute Grad-CAM
def compute_gradcam(model, img_array, layer_name):
    grad_model = Model(inputs=model.input, outputs=[model.get_layer(layer_name).output, model.output])
    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(img_array)
        class_idx = tf.argmax(predictions[0])
        loss = predictions[:, class_idx]
    grads = tape.gradient(loss, conv_outputs)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
    heatmap = tf.reduce_mean(tf.multiply(pooled_grads, conv_outputs), axis=-1)
    heatmap = tf.nn.relu(heatmap)
    heatmap = heatmap[0].numpy()
    heatmap = cv2.resize(heatmap, (img_array.shape[2], img_array.shape[1]))
    heatmap = np.maximum(heatmap, 0) / np.max(heatmap)
    return heatmap

# this is the function for Guided Backpropagation
def guided_backprop(model, img_array):
    with tf.GradientTape() as tape:
        tape.watch(img_array)
        preds = model(img_array)
        class_idx = tf.argmax(preds[0])
        loss = preds[:, class_idx]
    grads = tape.gradient(loss, img_array)
    guided_grads = tf.cast(grads > 0, "float32") * grads
    return guided_grads[0].numpy()

# fuunction to overlay heatmap on image
def overlay_heatmap(img, heatmap, alpha=0.5):
    heatmap = cv2.applyColorMap(np.uint8(255 * heatmap), cv2.COLORMAP_JET)
    superimposed_img = heatmap * alpha + img
    return np.uint8(superimposed_img)

# loadding pre-trained model and test image
def apply_guided_gradcam(model, img_path, layer_name):
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (224, 224))
    img_array = np.expand_dims(img / 255.0, axis=0)

    # computeing Grad-CAM
    heatmap = compute_gradcam(model, img_array, layer_name)

    # compute Guided Backpropagation
    guided_grads = guided_backprop(model, tf.convert_to_tensor(img_array, dtype=tf.float32))

    # overlay heatmap
    heatmap_img = overlay_heatmap(img, heatmap)

    #  theresults
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 3, 1)
    plt.imshow(img)
    plt.title("Original Image")
    plt.axis("off")

    plt.subplot(1, 3, 2)
    plt.imshow(heatmap_img)
    plt.title("Grad-CAM Heatmap")
    plt.axis("off")

    plt.subplot(1, 3, 3)
    plt.imshow(guided_grads)
    plt.title("Guided Backprop")
    plt.axis("off")

    plt.show()

# this is just an example usege, need to be conected to HPC go run properly
model = tf.keras.applications.MobileNetV3Large(weights="imagenet", include_top=True)



# Testing on MobNet model and our images

In [None]:
import torch
from torchvision import models

# load our pre-trained MobileNetV3 model
model = models.mobilenet_v3_large(pretrained=True)  # i havent used our pretrained weights here
model.eval()


Downloading: "https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v3_large-8738ca79.pth
100%|██████████| 21.1M/21.1M [00:00<00:00, 155MB/s]


MobileNetV3(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): Hardswish()
    )
    (1): InvertedResidual(
      (block): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (1): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        )
      )
    )
    (2): InvertedResidual(
      (block): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bi

In [None]:
from torchvision import transforms
from PIL import Image

# prreprocessing transformation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalization for ImageNet
])


In [None]:
import torch.nn.functional as F

def guided_gradcam_function(model, input_tensor):
    # Hook the last convolutional layer
    def register_hooks(model):
        def forward_hook(module, input, output):
            # Save activations
            model.features = output.detach()
        def backward_hook(module, grad_input, grad_output):
            # Save gradients
            model.gradients = grad_output[0].detach()
        return forward_hook, backward_hook

    # Register hooks
    forward_hook, backward_hook = register_hooks(model.features[-1])  # Last convolutional layer
    model.features[-1].register_forward_hook(forward_hook)
    model.features[-1].register_backward_hook(backward_hook)

    # Perform forward pass
    model.zero_grad()
    output = model(input_tensor)
    predicted_class = output.argmax(dim=1)

    # Backward pass
    output[0, predicted_class].backward()

    # Calculate Grad-CAM
    gradients = model.gradients
    activations = model.features

    # Global average pooling
    weights = F.adaptive_avg_pool2d(gradients, (1, 1))  # Shape: (1, 1280, 1, 1)
    weights = weights.squeeze()  # Remove singleton dimensions

    # Grad-CAM
    grad_cam = torch.sum(weights * activations, dim=1).squeeze()  # Weighted sum of activations
    grad_cam = F.relu(grad_cam)  # Apply ReLU
    grad_cam = grad_cam - grad_cam.min()  # Normalize to 0-1
    grad_cam = grad_cam / grad_cam.max()

    # Resize to input image size
    grad_cam = grad_cam.cpu().numpy()
    return grad_cam
