# Artificial Neural Networks and Deep Learning

---

## Homework 2: Minimal Working Example

To make your first submission, follow these steps:
1. Create a folder named `[2024-2025] AN2DL/Homework 2` in your Google Drive.
2. Upload the `mars_for_students.npz` file to this folder.
3. Upload the Jupyter notebook `Homework 2 - Minimal Working Example.ipynb`.
4. Load and process the data.
5. Implement and train your model.
6. Submit the generated `.csv` file to Kaggle.


### Clear GPU

In [None]:
from keras import backend as K

K.clear_session()

In [None]:
import tensorflow as tf
from numba import cuda
import gc

def clear_memory():
    # Clear VRAM
    tf.keras.backend.clear_session()
    cuda.select_device(0)
    cuda.close()
    
    # Clear RAM
    gc.collect()

#This should clear the VRAM and RAM
clear_memory()

## 🌐 Connect Colab to Google Drive

In [None]:
#from google.colab import drive

#drive.mount("/gdrive")
#%cd /gdrive/My Drive/

## ⚙️ Import Libraries

In [None]:
import os
from datetime import datetime

import numpy as np
import pandas as pd
import keras_cv

import tensorflow as tf
import keras as tfk
from keras import layers as tfkl

import matplotlib.pyplot as plt

import numpy as np
from sklearn.model_selection import train_test_split


%matplotlib inline

np.random.seed(42)
tf.random.set_seed(42)

print(f"TensorFlow version: {tf.__version__}")
print(f"Keras version: {tfk.__version__}")
print(f"GPU devices: {len(tf.config.list_physical_devices('GPU'))}")

## ⏳ Load the Data

In [None]:
data = np.load("Datasets/mars_filtered.npz")
# Access the keys correctly
X_train = data["X_train"]
y_train = data["y_train"]

# Print shapes to confirm
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")

## 🛠️ Train and Save the Model

In [None]:
# Split the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [None]:
# Assuming X_train and X_test are your image datasets
# Add a channel dimension and normalize pixel values to [0, 1]
X_train = X_train[..., np.newaxis] / 255.0
X_val = X_val[..., np.newaxis] / 255.0

# Calculate input shape and the number of unique classes in the labels
input_shape = X_train.shape[1:]
num_classes = len(np.unique(y_train))

# Print the results
print(f"Input shape: {input_shape}")
print(f"Number of classes: {num_classes}")
print("X_train shape:", X_train.shape)  # Should be (batch_size, 64, 128, 3)
print("X_val shape:", X_val.shape)      # Should be (batch_size, 64, 128, 3)

In [None]:
# Convert grayscale to RGB correctly
X_train_rgb = np.repeat(X_train, 3, axis=-1)  # Ensure shape (batch_size, 64, 128, 3)
X_val_rgb = np.repeat(X_val, 3, axis=-1)
print("X_train_rgb shape:", X_train_rgb.shape)  # Should be (batch_size, 64, 128, 3)
print("X_test_rgb shape:", X_val_rgb.shape)    # Should be (batch_size, 64, 128, 3)

In [None]:
from tensorflow.keras.layers import (
    Input,
    Conv2D,
    UpSampling2D,
    MaxPooling2D,
    Concatenate,
    BatchNormalization,
    Activation,
    Add
)
from tensorflow.keras.models import Model
from tensorflow.keras.initializers import GlorotUniform

def conv_block(x, filters, initializer, name):
    """A convolutional block with two Conv2D layers, BatchNorm, and ReLU activation."""
    x = Conv2D(filters, (3, 3), padding="same", kernel_initializer=initializer, name=name + "_conv1")(x)
    x = BatchNormalization(name=name + "_bn1")(x)
    x = Activation("relu", name=name + "_act1")(x)

    x = Conv2D(filters, (3, 3), padding="same", kernel_initializer=initializer, name=name + "_conv2")(x)
    x = BatchNormalization(name=name + "_bn2")(x)
    x = Activation("relu", name=name + "_act2")(x)
    return x

def unet(input_shape, num_classes, name):
    """Builds a single UNet."""
    initializer = GlorotUniform()  # Xavier initialization
    inputs = Input(shape=input_shape, name=name + "_input")

    # Encoder Path
    e1 = conv_block(inputs, 64, initializer, name=name + "_encoder1")
    p1 = MaxPooling2D((2, 2), name=name + "_pool1")(e1)

    e2 = conv_block(p1, 128, initializer, name=name + "_encoder2")
    p2 = MaxPooling2D((2, 2), name=name + "_pool2")(e2)

    e3 = conv_block(p2, 256, initializer, name=name + "_encoder3")
    p3 = MaxPooling2D((2, 2), name=name + "_pool3")(e3)

    e4 = conv_block(p3, 512, initializer, name=name + "_encoder4")
    p4 = MaxPooling2D((2, 2), name=name + "_pool4")(e4)

    # Bottleneck
    bn = conv_block(p4, 1024, initializer, name=name + "_bottleneck")

    # Decoder Path
    d4 = UpSampling2D((2, 2), name=name + "_up4")(bn)
    d4 = Concatenate(name=name + "_concat4")([d4, e4])
    d4 = conv_block(d4, 512, initializer, name=name + "_decoder4")

    d3 = UpSampling2D((2, 2), name=name + "_up3")(d4)
    d3 = Concatenate(name=name + "_concat3")([d3, e3])
    d3 = conv_block(d3, 256, initializer, name=name + "_decoder3")

    d2 = UpSampling2D((2, 2), name=name + "_up2")(d3)
    d2 = Concatenate(name=name + "_concat2")([d2, e2])
    d2 = conv_block(d2, 128, initializer, name=name + "_decoder2")

    d1 = UpSampling2D((2, 2), name=name + "_up1")(d2)
    d1 = Concatenate(name=name + "_concat1")([d1, e1])
    d1 = conv_block(d1, 64, initializer, name=name + "_decoder1")

    # Output Layer
    outputs = Conv2D(num_classes, (1, 1), activation="softmax", kernel_initializer=initializer, name=name + "_output")(d1)
    return Model(inputs, outputs, name=name)

def hierarchical_unet(input_shape, num_classes):
    """Creates the hierarchical UNet structure with coarse and fine segmentation."""
    # Coarse UNet
    coarse_unet = unet(input_shape, num_classes, name="coarse_unet")

    # Fine UNet
    coarse_output = coarse_unet.output
    fine_input = Concatenate(name="fine_input_concat")([coarse_output, coarse_unet.input])

    # Adjust fine UNet to accept concatenated input
    fine_input_shape = (input_shape[0], input_shape[1], input_shape[2] + num_classes)  # Adjusted shape
    fine_unet_model = unet(fine_input_shape, num_classes, name="fine_unet")
    fine_unet = fine_unet_model(fine_input)

    # Hierarchical Model
    model = Model(inputs=coarse_unet.input, outputs=[coarse_output, fine_unet], name="hierarchical_unet")
    return model


# Define the input shape and number of classes
input_shape = (64, 128, 3)  # Example input size
num_classes = 5  # Number of segmentation classes

# Build the hierarchical UNet
model = hierarchical_unet(input_shape, num_classes)

# Summary of the model
model.summary()


In [None]:
class MeanIntersectionOverUnion(tf.keras.metrics.MeanIoU):
    def __init__(self, num_classes, labels_to_exclude=None, name="mean_iou", dtype=None):
        super(MeanIntersectionOverUnion, self).__init__(num_classes=num_classes, name=name, dtype=dtype)
        if labels_to_exclude is None:
            labels_to_exclude = [0]  # Default to excluding label 0
        self.labels_to_exclude = labels_to_exclude

    def update_state(self, y_true, y_pred, sample_weight=None):
        # Convert predictions to class labels
        y_pred = tf.math.argmax(y_pred, axis=-1)

        # Flatten the tensors
        y_true = tf.reshape(y_true, [-1])
        y_pred = tf.reshape(y_pred, [-1])

        # Apply mask to exclude specified labels
        for label in self.labels_to_exclude:
            mask = tf.not_equal(y_true, label)
            y_true = tf.boolean_mask(y_true, mask)
            y_pred = tf.boolean_mask(y_pred, mask)

        # Update the state
        return super().update_state(y_true, y_pred, sample_weight)

    def get_config(self):
        config = super().get_config()
        config.update({"labels_to_exclude": self.labels_to_exclude})
        return config

    @classmethod
    def from_config(cls, config):
        return cls(**config)


In [None]:
from tensorflow.keras.metrics import MeanIoU

# Define Mean IoU for evaluation
def mean_iou_metric(num_classes):
    def mean_iou(y_true, y_pred):
        y_pred = tf.argmax(y_pred, axis=-1)
        y_true = tf.cast(y_true, tf.int32)
        metric = MeanIoU(num_classes=num_classes)
        return metric(y_true, y_pred)
    return mean_iou

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss={
        "coarse_unet_output": "sparse_categorical_crossentropy",
        "fine_unet": "sparse_categorical_crossentropy",
    },
    loss_weights={"coarse_unet_output": 0.4, "fine_unet": 0.6},
    metrics=["accuracy", MeanIntersectionOverUnion(num_classes=5, labels_to_exclude=[0])],
)


In [None]:
class VisualizationCallback(tf.keras.callbacks.Callback):
    def __init__(self, val_dataset, save_interval=5, num_samples=1):
        super().__init__()
        self.val_dataset = val_dataset.unbatch().take(num_samples)
        self.save_interval = save_interval
        self.num_samples = num_samples

    def on_epoch_end(self, epoch, logs=None):
        if (epoch + 1) % self.save_interval == 0:
            print(f"\nEpoch {epoch + 1}: Visualizing predictions...\n")
            for idx, (inputs, labels) in enumerate(self.val_dataset):
                # Extract inputs and labels
                coarse_input = inputs["coarse_unet_input"]
                fine_input = inputs["fine_unet_input"]
                true_mask = labels["fine_unet"]

                # Generate predictions
                pred_mask = self.model.predict({"coarse_unet_input": coarse_input[None], "fine_unet_input": fine_input[None]})
                pred_mask = pred_mask[1]  # Extract the second output (fine_unet)

                # Post-process predicted mask
                pred_mask = np.argmax(pred_mask[0], axis=-1)  # Convert probabilities to class indices

                # Plot the results
                plt.figure(figsize=(12, 6))
                plt.subplot(1, 3, 1)
                plt.imshow(np.uint8(fine_input), interpolation="nearest")
                plt.title("Input Image")
                plt.axis("off")

                plt.subplot(1, 3, 2)
                plt.imshow(true_mask.numpy(), cmap="viridis", interpolation="nearest")
                plt.title("True Mask")
                plt.axis("off")

                plt.subplot(1, 3, 3)
                plt.imshow(pred_mask, cmap="viridis", interpolation="nearest")
                plt.title("Predicted Mask")
                plt.axis("off")

                plt.suptitle(f"Sample {idx + 1}")
                plt.show()


In [None]:
# Constants
BATCH_SIZE = 32
AUTO = tf.data.AUTOTUNE

# Spatial augmentations: Applied to both x and y
spatial_augmentations = keras_cv.layers.Augmenter(
    [
        keras_cv.layers.RandomFlip(mode="horizontal_and_vertical"),
        keras_cv.layers.RandomRotation(factor=0.1),  # Rotate by ±10°
        keras_cv.layers.RandomZoom(height_factor=(-0.2, 0.2), width_factor=(-0.2, 0.2)),
        keras_cv.layers.RandomTranslation(height_factor=0.1, width_factor=0.1),
        keras_cv.layers.RandomShear(x_factor=0.1, y_factor=0.1),
    ]
)

color_augmentations = keras_cv.layers.Augmenter(
    [

        keras_cv.layers.AutoContrast(value_range=(0, 255)),   # Adjust contrast
        keras_cv.layers.RandomSaturation(factor=0.2),  # Adjust saturation
        keras_cv.layers.RandomHue(factor=0.2, value_range=(0, 255)),         # Adjust hue
    ]
)

data_augmentation = keras_cv.layers.Augmenter(
    [
        keras_cv.layers.RandomCutout(height_factor=0.2, width_factor=0.2),
        keras_cv.layers.RandomShear(x_factor=0.2, y_factor=0.2),
        keras_cv.layers.RandomTranslation(height_factor=0.2, width_factor=0.2),
        keras_cv.layers.RandomZoom(height_factor=0.2, width_factor=0.2),
    ]
)

def apply_combined_transform(x, y):
    """Apply spatial augmentations to both x and y, and color augmentations to x only."""
    # Resize y to match x (use nearest interpolation for segmentation masks)
    y = tf.expand_dims(y, axis=-1)  # Add a channel dimension to y

    # Ensure consistent data types
    x = tf.cast(x, tf.float32)
    y = tf.cast(y, tf.float32)

    # Concatenate x and y along the channel dimension
    concat = tf.concat([x, y], axis=-1)  # Shape: [batch_size, 64, 128, 4]

    # Apply spatial augmentations to the combined tensor
    augmented = spatial_augmentations(concat)

    # Split the augmented tensor back into x and y
    x_augmented = augmented[..., :3]  # First 3 channels are x (RGB)
    y_augmented = augmented[..., 3:]  # Last channel is y (mask)
    y_augmented = tf.squeeze(y_augmented, axis=-1)  # Remove the channel dimension from y

    # Apply color augmentations to x only
    x_augmented = color_augmentations(x_augmented)

    return x_augmented, y_augmented

train_ds = (
    tf.data.Dataset.from_tensor_slices((X_train_rgb, y_train))
    .shuffle(BATCH_SIZE * 100)
    .batch(BATCH_SIZE)
    .map(
        lambda x, y: (
            {"coarse_unet_input": x, "fine_unet_input": x},  # Input keys matching model inputs
            {"coarse_unet_output": y, "fine_unet": y},  # Output keys matching model outputs
        ),
        num_parallel_calls=AUTO,
    )
    .prefetch(AUTO)
)

val_ds = (
    tf.data.Dataset.from_tensor_slices((X_val_rgb, y_val))
    .batch(BATCH_SIZE)
    .map(
        lambda x, y: (
            {"coarse_unet_input": x, "fine_unet_input": x},  # Input keys
            {"coarse_unet_output": y, "fine_unet": y},  # Output keys
        ),
        num_parallel_calls=AUTO,
    )
    .prefetch(AUTO)
)

# Add the callback to your training
visualization_callback = VisualizationCallback(val_ds, save_interval=5, num_samples=3)

model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=100,
    steps_per_epoch=64,
    callbacks = [tfk.callbacks.EarlyStopping(monitor='val_fine_unet_mean_iou', mode='max', patience=20, restore_best_weights=True),
                 tfk.callbacks.ReduceLROnPlateau(
    monitor='val_fine_unet_mean_iou',
    factor=0.5,        # Riduzione graduale per un modello grande
    patience=5,        # ~25-30% delle epoche totali
    min_lr=1e-7,      # Considerando Adam come optimizer
    verbose=1,
    mode='min',
    min_delta=1e-4,   # Basato sulla scala delle tue loss
    cooldown=1        # Breve periodo di stabilizzazione
    ),
    visualization_callback,]
)


In [None]:
timestep_str = datetime.now().strftime("%y%m%d_%H%M%S")
model_filename = f"model_{timestep_str}.keras"
model.save(model_filename)


print(f"Model saved to {model_filename}")

In [None]:
del X_train, X_val, y_train, y_val, X_train_rgb, X_val_rgb, data

In [None]:
data = np.load("Datasets/mars_for_students.npz")
X_test = data["test_set"]
X_test = X_test[..., np.newaxis] / 255.0
X_test_rgb = np.repeat(X_test, 3, axis=-1)
del data, X_test
batch_size = 32  # Prova con un valore inferiore se necessario

In [None]:
preds = model.predict(X_test_rgb, batch_size=batch_size)
preds = np.argmax(preds, axis=-1)
print(f"Predictions shape: {preds.shape}")

In [None]:
def y_to_df(y) -> pd.DataFrame:
    """Converts segmentation predictions into a DataFrame format for Kaggle."""
    n_samples = len(y)
    y_flat = y.reshape(n_samples, -1)
    df = pd.DataFrame(y_flat)
    df["id"] = np.arange(n_samples)
    cols = ["id"] + [col for col in df.columns if col != "id"]
    return df[cols]

In [None]:
# Create and download the csv submission file
timestep_str = model_filename.replace("model_", "").replace(".keras", "")
submission_filename = f"submission_{timestep_str}.csv"
submission_df = y_to_df(preds)
submission_df.to_csv(submission_filename, index=False)

In [None]:
#from google.colab import files
#files.download(submission_filename)

#!cp submission_filename /content/drive/MyDrive/submissions