<a href="https://colab.research.google.com/github/tylaar1/PICAR-autopilot/blob/main/MobNetV3_DUAL_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# SWITCH TO **`T4 GPU`** OR THE **`HPC`**

# Imports

In [1]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split
from sklearn.metrics import balanced_accuracy_score
import matplotlib.pyplot as plt

In [2]:
# makes it so pd dfs aren't truncated

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# 1) DATA PRE-PROCESSING

a) Load in labels + image file paths

b) combine them into one dataframe

c) EDA - spotted and removed erroneous label (speed = 1.42...)

- `cleaned_df` is the cleaned df with a) b) c) completed

d) convert images to numerical RGB feature maps - ML algorithms only understand numerical data

e) Splitting data into training and validation sets

f) data augmentation applied to training set

### 1a) load in labels + image file paths

In [4]:
labels_file_path = '/content/drive/MyDrive/machine-learning-in-science-ii-2025/training_norm.csv' # tylers file path
#labels_file_path = '/home/apyba3/KAGGLEDATAmachine-learning-in-science-ii-2025/training_norm.csv' # ben hpc file path (mlis2 cluster)
#labels_file_path = '/home/ppytr13/machine-learning-in-science-ii-2025/training_norm.csv' # ben hpc file path (mlis2 cluster)
labels_df = pd.read_csv(labels_file_path, index_col='image_id')

In [5]:
#image_folder_path = '/home/apyba3/KAGGLEDATAmachine-learning-in-science-ii-2025/training_data/training_data' # bens hpc file path
image_folder_path = '/content/drive/MyDrive/machine-learning-in-science-ii-2025/training_data/training_data' # tylers file path
#image_folder_path = '/home/ppytr13/machine-learning-in-science-ii-2025/training_data/training_data' # bens hpc file path
image_file_paths = [
    os.path.join(image_folder_path, f)
    for f in os.listdir(image_folder_path)
    if f.lower().endswith(('.png', '.jpg', '.jpeg'))
]

image_file_paths.sort(key=lambda x: int(os.path.splitext(os.path.basename(x))[0])) # sorts the files in the right order (1.png, 2.png, 3.png, ...)

imagefilepaths_df = pd.DataFrame(
    image_file_paths,
    columns=['image_file_paths'],
    index=[int(os.path.splitext(os.path.basename(path))[0]) for path in image_file_paths]
)

imagefilepaths_df.index.name = 'image_id'

Checking labels dataframe

In [6]:
labels_df.head()

Unnamed: 0_level_0,angle,speed
image_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,0.4375,0.0
2,0.8125,1.0
3,0.4375,1.0
4,0.625,1.0
5,0.5,0.0


Checking image file paths dataframe - as you can see the file paths are ordered correctly (1.png, 2.png, 3.png, ...)

In [7]:
imagefilepaths_df.head()

Unnamed: 0_level_0,image_file_paths
image_id,Unnamed: 1_level_1
1,/content/drive/MyDrive/machine-learning-in-science-ii-2025/training_data/training_data/1.png
2,/content/drive/MyDrive/machine-learning-in-science-ii-2025/training_data/training_data/2.png
3,/content/drive/MyDrive/machine-learning-in-science-ii-2025/training_data/training_data/3.png
4,/content/drive/MyDrive/machine-learning-in-science-ii-2025/training_data/training_data/4.png
5,/content/drive/MyDrive/machine-learning-in-science-ii-2025/training_data/training_data/5.png


### 1b) Combine labels and image file paths into one dataframe

In [6]:
merged_df = pd.merge(labels_df, imagefilepaths_df, on='image_id', how='inner')
merged_df['speed'] = merged_df['speed'].round(6) # to get rid of floating point errors

In [9]:
merged_df.head()

Unnamed: 0_level_0,angle,speed,image_file_paths
image_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,0.4375,0.0,/content/drive/MyDrive/machine-learning-in-science-ii-2025/training_data/training_data/1.png
2,0.8125,1.0,/content/drive/MyDrive/machine-learning-in-science-ii-2025/training_data/training_data/2.png
3,0.4375,1.0,/content/drive/MyDrive/machine-learning-in-science-ii-2025/training_data/training_data/3.png
4,0.625,1.0,/content/drive/MyDrive/machine-learning-in-science-ii-2025/training_data/training_data/4.png
5,0.5,0.0,/content/drive/MyDrive/machine-learning-in-science-ii-2025/training_data/training_data/5.png


In [10]:
merged_df.loc[3139:3143]

Unnamed: 0_level_0,angle,speed,image_file_paths
image_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
3139,0.75,1.0,/content/drive/MyDrive/machine-learning-in-science-ii-2025/training_data/training_data/3139.png
3140,0.875,1.0,/content/drive/MyDrive/machine-learning-in-science-ii-2025/training_data/training_data/3140.png
3142,0.625,0.0,/content/drive/MyDrive/machine-learning-in-science-ii-2025/training_data/training_data/3142.png
3143,0.625,1.0,/content/drive/MyDrive/machine-learning-in-science-ii-2025/training_data/training_data/3143.png


The above cell shows that:

 1) the image files and labels match (see image_id and the number at the end of the file path)

 2) the missing rows in labels_df (image_id: 3141, 3999, 4895, 8285, 10171) have been taken care of

### 1c) EDA

In [11]:
merged_df.value_counts('angle')

Unnamed: 0_level_0,count
angle,Unnamed: 1_level_1
0.75,2123
0.5,2046
0.6875,2007
0.625,1963
0.5625,1609
0.4375,1467
0.8125,1147
0.375,428
0.875,301
0.3125,213


note: imbalance datset

identifying the row with the erroneous speed value

In [12]:
merged_df[merged_df['speed'] == 1.428571]

Unnamed: 0_level_0,angle,speed,image_file_paths
image_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
3884,0.4375,1.428571,/content/drive/MyDrive/machine-learning-in-science-ii-2025/training_data/training_data/3884.png


we want to remove this row

In [7]:
cleaned_df = merged_df[merged_df['speed'] != 1.428571]
cleaned_df.loc[3882:3886]

Unnamed: 0_level_0,angle,speed,image_file_paths
image_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
3882,0.5625,1.0,/content/drive/MyDrive/machine-learning-in-science-ii-2025/training_data/training_data/3882.png
3883,0.375,0.0,/content/drive/MyDrive/machine-learning-in-science-ii-2025/training_data/training_data/3883.png
3885,0.0,1.0,/content/drive/MyDrive/machine-learning-in-science-ii-2025/training_data/training_data/3885.png
3886,0.75,1.0,/content/drive/MyDrive/machine-learning-in-science-ii-2025/training_data/training_data/3886.png


### 1d) convert images to numerical RGB feature maps

In [8]:
def process_image(image_path, resized_shape=(224, 224)):
    # Load and preprocess the image
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, resized_shape)
    image = image / 255.0  # Normalize pixel values to [0,1]
    return image

# Creating the dataset
dataset = tf.data.Dataset.from_tensor_slices(
    (cleaned_df["image_file_paths"], cleaned_df["angle"], cleaned_df["speed"])
)  # Convert pandas DataFrame to a tf.data.Dataset

# Apply the map function to process images and format labels
dataset = dataset.map(
    lambda image_path, angle, speed: (
        process_image(image_path),  # Process the image
        {"classification": angle, "regression": speed}  # Format labels as a dictionary
    )
)

dataset = dataset.cache()
dataset = dataset.shuffle(len(cleaned_df))
dataset = dataset.batch(32)
dataset = dataset.prefetch(tf.data.AUTOTUNE)

lets check and see if what we have done works

### 1e) Splitting data into training and validation sets (test set is already provided in kaggle data)

In [9]:
# 80-20 split

dataset_size = tf.data.experimental.cardinality(dataset).numpy()
train_size = int(0.8 * dataset_size)

train_dataset = dataset.take(train_size)
validation_dataset = dataset.skip(train_size)

In [16]:
print(f"Train size: {train_size}, validation size: {dataset_size - train_size}")

Train size: 344, validation size: 87


In [17]:
validation_dataset

<_SkipDataset element_spec=(TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name=None), {'classification': TensorSpec(shape=(None,), dtype=tf.float64, name=None), 'regression': TensorSpec(shape=(None,), dtype=tf.float64, name=None)})>

### 1f) Data augmentation applied to training set

Flipping or rotating the image will render the angle labels incorrect so none of that was applied to the images for this regression task

- Random Brightness Adjustment
- Random Contrast Adjustment
- Random Hue Adjustment
- Random Saturation Adjustment


In [10]:
def augment_image(image, label):
  seed = (6, 9)
  image = tf.image.stateless_random_brightness(image, 0.2, seed)
  image = tf.image.stateless_random_contrast(image, 0.8, 1.2, seed)
  image = tf.image.stateless_random_hue(image, 0.2, seed)
  image = tf.image.stateless_random_saturation(image, 0.8, 1.2, seed)
  return image, label

augmented_dataset = train_dataset.map(augment_image, num_parallel_calls=tf.data.AUTOTUNE)
train_dataset = train_dataset.concatenate(augmented_dataset)
train_dataset = train_dataset.shuffle(buffer_size=len(cleaned_df))

# 2) Model Building - MobileNetV3Large Transfer Learning

a) Set up model architecture

b) define training step

c) training the model on the training set

d) fine-tuning

### 2a) Set up model architecture

- MobileNetV2 to learn lower level features
- global average pooling layer
- drop out layer
- dense layer with sigmoid activation

In [10]:
dropoutrate = 0.2
num_classes = 1 # we're only predicting the prob of the positive class with a sigmoid
input_shape = (224,224,3)

mbnet = tf.keras.applications.MobileNetV3Large(
    input_shape=input_shape,
    include_top=False,
    weights='imagenet',
    minimalistic=False
)

input_layer = tf.keras.Input(shape=(224, 224, 3))

x = mbnet(input_layer)


x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(dropoutrate)(x)


x = tf.keras.layers.Dense(256, activation='relu')(x)
x = tf.keras.layers.Dropout(dropoutrate)(x)
x = tf.keras.layers.Dense(128, activation='relu')(x)
x = tf.keras.layers.Dropout(dropoutrate)(x)
x = tf.keras.layers.Dense(64, activation='relu')(x)
x = tf.keras.layers.Dropout(dropoutrate)(x)
x = tf.keras.layers.Dense(32, activation='relu')(x)

#split outputs to predict speed and angle
classification_output = tf.keras.layers.Dense(num_classes, activation='sigmoid', name="classification")(x)
regression_output = tf.keras.layers.Dense(1, activation='linear', name="regression")(x)

#combine both outputs
model = tf.keras.Model(inputs=input_layer, outputs=[classification_output, regression_output])

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss={'classification': 'binary_crossentropy', 'regression': 'mse'},
              metrics={'classification': 'accuracy', 'regression': 'mse'})


model.summary()

model.build(input_layer)

mbnet.trainable = False

model.summary()

### 2c) Training the model on the training set

In [None]:
history = model.fit(train_dataset.take(1),
                    epochs=1,
                    batch_size=32,
                    validation_data=validation_dataset.take(1))

In [None]:
#model.save_weights('/home/apyba3/car_frozen_regression.weights.h5')
model.save_weights('/home/ppytr13/car_frozen_regression.weights.h5')

In [None]:
tf.keras.backend.clear_session() #Clear keras session

### 2d) fine-tuning

rebuild model after clearing keras session

In [None]:
dropoutrate = 0.2
num_classes = 1 # we're only predicting the prob of the positive class with a sigmoid
input_shape = (224,224,3)

mbnet = tf.keras.applications.MobileNetV3Large(
    input_shape=input_shape,
    include_top=False,
    weights='imagenet',
    minimalistic=False
)

input_layer = tf.keras.Input(shape=(224, 224, 3))

x = mbnet(input_layer)


x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(dropoutrate)(x)


x = tf.keras.layers.Dense(256, activation='relu')(x)
x = tf.keras.layers.Dropout(dropoutrate)(x)
x = tf.keras.layers.Dense(128, activation='relu')(x)
x = tf.keras.layers.Dropout(dropoutrate)(x)
x = tf.keras.layers.Dense(64, activation='relu')(x)
x = tf.keras.layers.Dropout(dropoutrate)(x)
x = tf.keras.layers.Dense(32, activation='relu')(x)

#split outputs to predict speed and angle
classification_output = tf.keras.layers.Dense(num_classes, activation='sigmoid', name="classification")(x)
regression_output = tf.keras.layers.Dense(1, activation='linear', name="regression")(x)

#combine both outputs
model = tf.keras.Model(inputs=input_layer, outputs=[classification_output, regression_output])

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
              loss={'classification': 'binary_crossentropy', 'regression': 'mse'},
              metrics={'classification': 'accuracy', 'regression': 'mse'})


model.summary()

model.build(input_layer)

mbnet.trainable = True

model.summary()

In [None]:
model.load_weights('/home/apyba3/car_frozen_regression.weights.h5')

Set up fine-tuning training

In [None]:
history = model.fit(train_dataset,
                    epochs=50,
                    batch_size=32,
                    validation_data=validation_dataset)

In [None]:
#model.save_weights('/home/apyba3/car_unfrozen_regression.weights.h5')
model.save_weights('/home/ppytr13/car_unfrozen_regression.weights.h5')

# 3) Test-Set Predictions

a) load in test data

b) convert test images to numerical RGB feature maps

c) generate predictions on the test set

d) correctly format the predictions into a pandas dataframe

e) save predictions to a file inside the hpc (to then later send from hpc to my laptop)

### 3a) load in test data

In [None]:
#image_folder_path = '/home/apyba3/KAGGLEDATAmachine-learning-in-science-ii-2025/test_data/test_data'
image_folder_path = '/content/drive/MyDrive/machine-learning-in-science-ii-2025/test_data/test_data'
image_file_paths = [
    os.path.join(image_folder_path, f)
    for f in os.listdir(image_folder_path)
    if f.lower().endswith(('.png', '.jpg', '.jpeg'))
]

image_file_paths.sort(key=lambda x: int(os.path.splitext(os.path.basename(x))[0])) # sorts the files in the right order (1.png, 2.png, 3.png, ...)

imagefilepaths_df = pd.DataFrame(
    image_file_paths,
    columns=['image_file_paths'],
    index=[int(os.path.splitext(os.path.basename(path))[0]) for path in image_file_paths]
)

imagefilepaths_df.index.name = 'image_id'
imagefilepaths_df.head()

### 3b) convert test images to numerical RGB feature maps

In [None]:
def process_image_no_label(image_path, resized_shape=(224, 224)):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)  # Use decode_png for PNG images
    image = tf.image.resize(image, resized_shape)  # Resize to uniform shape
    image = image / 255.0  # Normalize pixel values to [0,1]
    return image

test_dataset = tf.data.Dataset.from_tensor_slices((imagefilepaths_df["image_file_paths"]))

test_dataset = test_dataset.map(process_image_no_label, num_parallel_calls=tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(32)
test_dataset = test_dataset.prefetch(tf.data.AUTOTUNE)

### 3c) generate predictions on test set

In [None]:
predictions = model.predict(test_dataset.take(1))

### 3d) correctly format the predictions into a pandas dataframe

In [None]:
predictions_array = np.concatenate([predictions[0], predictions[1]], axis=1)
predictions_df = pd.DataFrame(predictions_array, columns=['angle', 'speed'])

In [None]:
predictions_df.head()


In [None]:
predictions_df[predictions_df['speed'] > 0.5] = 1
predictions_df[predictions_df['speed'] < 0.5] = 0
predictions_df['speed'] = predictions_df['speed'].astype(int)
predictions_df.head()

In [1]:
predictions_df['angle'].value_counts()

NameError: name 'predictions_df' is not defined

### 3e) save predictions to a file inside the hpc (to then later send from hpc to my laptop)

In [None]:
#predictions_df.to_csv('/home/apyba3/mbnetv3_angleregression_predictions.csv')
predictions_df.to_csv('/home/ppytr13/mbnetv3_dual_predictions.csv')

## instead - convert to tf lite (chatgpt code - not tested yet)

In [None]:
# Define the converter
converter = tf.lite.TFLiteConverter.from_keras_model(model)

# Enable optimizations for smaller size and faster inference
converter.optimizations = [tf.lite.Optimize.DEFAULT]

# If your inputs have fixed shapes, specify them for further optimization
converter.target_spec.supported_types = [tf.float16]  # Optional: FP16 for faster inference

# Convert the model
tflite_model = converter.convert()

# Save the TFLite model to disk
with open('model.tflite', 'wb') as f:
    f.write(tflite_model)