# Artificial Neural Networks and Deep Learning

---

## Homework 2: Minimal Working Example

To make your first submission, follow these steps:
1. Create a folder named `[2024-2025] AN2DL/Homework 2` in your Google Drive.
2. Upload the `mars_for_students.npz` file to this folder.
3. Upload the Jupyter notebook `Homework 2 - Minimal Working Example.ipynb`.
4. Load and process the data.
5. Implement and train your model.
6. Submit the generated `.csv` file to Kaggle.


## ⚙️ Import Libraries

In [1]:
import os
from datetime import datetime

import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl
from sklearn.model_selection import train_test_split


import matplotlib.pyplot as plt
%matplotlib inline

np.random.seed(42)
tf.random.set_seed(42)

print(f"TensorFlow version: {tf.__version__}")
print(f"Keras version: {tfk.__version__}")
#print(f"GPU devices: {len(tf.config.list_physical_devices('GPU'))}")

TensorFlow version: 2.16.1
Keras version: 3.3.3


## ⏳ Load the Data

In [2]:
data = np.load("/kaggle/input/mars_for_students.npz")

# Estrazione dei set di dati
training_set = data["training_set"]
X_t = training_set[:, 0]  # Immagini di training
y_t = training_set[:, 1]  # Maschere di training

# Identificazione della maschera da escludere
outlier_mask = y_t[62]

# Funzione per filtrare il training set
def remove_outliers(X, y, outlier_mask):
    mask_indices = [i for i in range(len(y)) if np.array_equal(y[i], outlier_mask)]
    X_train = np.delete(X, mask_indices, axis=0)
    y_train = np.delete(y, mask_indices, axis=0)
    return X_train, y_train, mask_indices

# Rimozione degli outliers
X_clean, y_clean, removed_indices = remove_outliers(X_t, y_t, outlier_mask)

# Stampa degli indici rimossi
print(f"Indici rimossi: {removed_indices}")
print(f"Numero di immagini originali: {len(X_t)}")
print(f"Numero di immagini dopo la rimozione: {len(X_clean)}")

X_train, X_val, y_train, y_val = train_test_split(X_clean, y_clean, test_size=0.2, random_state=42)

X_train = X_train[..., np.newaxis]
X_val = X_val[..., np.newaxis]

print(f"X_train shape: {X_train.shape}")  # Dovrebbe essere (20040, 64, 128, 1)
print(f"X_val shape: {X_val.shape}")

Indici rimossi: [62, 79, 125, 139, 142, 147, 152, 156, 170, 210, 217, 266, 289, 299, 313, 339, 348, 365, 412, 417, 426, 450, 461, 536, 552, 669, 675, 741, 744, 747, 799, 802, 808, 820, 821, 849, 863, 890, 909, 942, 971, 1005, 1057, 1079, 1082, 1092, 1095, 1106, 1119, 1125, 1177, 1194, 1224, 1247, 1248, 1258, 1261, 1262, 1306, 1324, 1365, 1370, 1443, 1449, 1508, 1509, 1519, 1551, 1584, 1588, 1628, 1637, 1693, 1736, 1767, 1768, 1782, 1813, 1816, 1834, 1889, 1925, 1942, 1975, 1979, 2000, 2002, 2086, 2096, 2110, 2111, 2151, 2161, 2222, 2235, 2239, 2242, 2301, 2307, 2350, 2361, 2365, 2372, 2414, 2453, 2522, 2535, 2561, 2609, 2614]
Numero di immagini originali: 2615
Numero di immagini dopo la rimozione: 2505
X_train shape: (2004, 64, 128, 1)
X_val shape: (501, 64, 128, 1)


In [3]:
# Aggiunta del canale
#X_train = np.expand_dims(X_train, axis=-1)
#X_val = np.expand_dims(X_val, axis=-1)

# Dopo l'aggiunta del canale
print("\nDopo l'aggiunta del canale:")
print(f"X_train shape: {X_train.shape}")  # Dovrebbe essere (20040, 64, 128, 1)
print(f"X_val shape: {X_val.shape}")      # Dovrebbe essere (7515, 64, 128, 1)

# Verifica anche il tipo di dati e il range dei valori
print("\nInformazioni aggiuntive:")
print(f"X_train dtype: {X_train.dtype}")
print(f"X_train min value: {X_train.min()}")
print(f"X_train max value: {X_train.max()}")


Dopo l'aggiunta del canale:
X_train shape: (2004, 64, 128, 1)
X_val shape: (501, 64, 128, 1)

Informazioni aggiuntive:
X_train dtype: float64
X_train min value: 3.0
X_train max value: 254.0


## 🛠️ Train and Save the Model

In [4]:
# Assuming X_train and X_test are your image datasets
# Add a channel dimension and normalize pixel values to [0, 1]
#X_train = X_train[..., np.newaxis] / 255.0
#X_val = X_val[..., np.newaxis] / 255.0

# Calculate input shape and the number of unique classes in the labels
input_shape = X_train.shape[1:]
num_classes = 5

# Print the results
print(f"Input shape: {input_shape}")
print(f"Number of classes: {num_classes}")
print("X_train shape:", X_train.shape)  # Should be (batch_size, 64, 128, 3)
print("X_val shape:", X_val.shape)      # Should be (batch_size, 64, 128, 3)

Input shape: (64, 128, 1)
Number of classes: 5
X_train shape: (2004, 64, 128, 1)
X_val shape: (501, 64, 128, 1)


In [None]:
# Convert grayscale to RGB correctly
#X_train_rgb = np.repeat(X_train, 3, axis=-1)  # Ensure shape (batch_size, 64, 128, 3)
#X_val_rgb = np.repeat(X_val, 3, axis=-1)
#print("X_train_rgb shape:", X_train_rgb.shape)  # Should be (batch_size, 64, 128, 3)
#print("X_test_rgb shape:", X_val_rgb.shape)    # Should be (batch_size, 64, 128, 3)

In [5]:
from tensorflow.keras.layers import (
    Input, 
    Conv2D,
    MaxPooling2D,
    UpSampling2D,
    Concatenate,
    BatchNormalization,
    Activation,
    Dropout,
    Multiply
)
from tensorflow.keras.models import Model

def conv_block(x, filters, name, kernel_size=(3, 3)):
    x = Conv2D(filters, kernel_size, padding='same', kernel_initializer='he_normal', name=f'{name}_conv1')(x)
    x = BatchNormalization(name=f'{name}_bn1')(x)
    x = Activation('relu', name=f'{name}_act1')(x)
    x = Dropout(0.1)(x)  # Aggiunto dropout moderato
    
    x = Conv2D(filters, kernel_size, padding='same', kernel_initializer='he_normal', name=f'{name}_conv2')(x)
    x = BatchNormalization(name=f'{name}_bn2')(x)
    x = Activation('relu', name=f'{name}_act2')(x)
    return x

def attention_block(x, filters):
    # Spatial attention
    attention = Conv2D(filters, (1, 1), activation='sigmoid')(x)
    return Multiply()([x, attention])

def nested_unet(input_shape, num_classes):
    inputs = Input(input_shape)
    
    # Encoder Path con attention
    x00 = conv_block(inputs, 64, 'x00')
    x00 = attention_block(x00, 64)
    p0 = MaxPooling2D((2, 2), name='p0')(x00)
    
    x10 = conv_block(p0, 128, 'x10')
    x10 = attention_block(x10, 128)
    p1 = MaxPooling2D((2, 2), name='p1')(x10)
    
    x20 = conv_block(p1, 256, 'x20')
    x20 = attention_block(x20, 256)
    p2 = MaxPooling2D((2, 2), name='p2')(x20)
    
    x30 = conv_block(p2, 512, 'x30')
    x30 = attention_block(x30, 512)
    p3 = MaxPooling2D((2, 2), name='p3')(x30)
    
    # Bridge
    x40 = conv_block(p3, 1024, 'x40')
    x40 = attention_block(x40, 1024)
    
    # Decoder Path con skip connections migliorate
    u03 = UpSampling2D((2, 2), name='u03')(x40)
    x31 = Concatenate(name='cat31')([u03, x30])
    x31 = conv_block(x31, 512, 'x31')
    x31 = attention_block(x31, 512)
    
    u02 = UpSampling2D((2, 2), name='u02')(x31)
    x21 = Concatenate(name='cat21')([u02, x20])
    x21 = conv_block(x21, 256, 'x21')
    x21 = attention_block(x21, 256)
    
    u01 = UpSampling2D((2, 2), name='u01')(x21)
    x11 = Concatenate(name='cat11')([u01, x10])
    x11 = conv_block(x11, 128, 'x11')
    x11 = attention_block(x11, 128)
    
    u00 = UpSampling2D((2, 2), name='u00')(x11)
    x01 = Concatenate(name='cat01')([u00, x00])
    x01 = conv_block(x01, 64, 'x01')
    x01 = attention_block(x01, 64)
    
    # Output con doppia supervisione
    coarse_output = Conv2D(num_classes, (1, 1), activation='softmax', name='coarse_output')(x01)
    
    fine_pre = Conv2D(num_classes, (1, 1), activation='softmax', name='fine_pre')(x01)
    attention_map = Conv2D(1, (1, 1), activation='sigmoid', name='attention_map')(x01)
    fine_output = Multiply(name='fine_output')([fine_pre, attention_map])
    
    model = Model(inputs=[inputs], outputs=[coarse_output, fine_output])
    return model

# Recreate the model
input_shape = (64, 128, 1)
num_classes = 5
model = nested_unet(input_shape, num_classes)

# Ricrea il modello con i nuovi nomi
model = nested_unet(input_shape, num_classes)

# Stampa i nomi degli output per verifica
print("New model output names:", model.output_names)



New model output names: ListWrapper(['coarse_output', 'fine_output'])


In [6]:
# Define custom Mean Intersection Over Union metric
class MeanIntersectionOverUnion(tf.keras.metrics.MeanIoU):
    def __init__(self, num_classes, labels_to_exclude=None, name="mean_iou", dtype=None):
        super(MeanIntersectionOverUnion, self).__init__(num_classes=num_classes, name=name, dtype=dtype)
        if labels_to_exclude is None:
            labels_to_exclude = [0]  # Default to excluding label 0
        self.labels_to_exclude = labels_to_exclude

    def update_state(self, y_true, y_pred, sample_weight=None):
        # Convert predictions to class labels
        y_pred = tf.math.argmax(y_pred, axis=-1)

        # Flatten the tensors
        y_true = tf.reshape(y_true, [-1])
        y_pred = tf.reshape(y_pred, [-1])

        # Apply mask to exclude specified labels
        for label in self.labels_to_exclude:
            mask = tf.not_equal(y_true, label)
            y_true = tf.boolean_mask(y_true, mask)
            y_pred = tf.boolean_mask(y_pred, mask)

        # Update the state
        return super().update_state(y_true, y_pred, sample_weight)




In [7]:
import tensorflow.keras.backend as K

def focal_loss_with_label_smoothing(alpha=0.25, gamma=2.0, smoothing=0.1):
    def loss(y_true, y_pred):
        # Convertiamo y_true in one-hot encoding
        y_true = tf.cast(y_true, tf.int32)
        y_true_one_hot = tf.one_hot(tf.squeeze(y_true), depth=5)
        
        # Clip predictions per evitare log(0)
        y_pred = K.clip(y_pred, K.epsilon(), 1.0 - K.epsilon())
        
        # Label smoothing
        y_true_smooth = y_true_one_hot * (1.0 - smoothing) + smoothing / 5.0
        
        # Focal loss
        ce = -y_true_smooth * K.log(y_pred)
        weight = alpha * K.pow(1.0 - y_pred, gamma)
        focal = weight * ce
        
        # Media su tutti i pixel
        return K.mean(K.sum(focal, axis=-1))
    
    return loss

def weighted_focal_loss(alpha=0.25, gamma=2.0):
    def loss(y_true, y_pred):
        # Converti y_true in one-hot
        y_true = tf.cast(y_true, tf.int32)
        y_true_one_hot = tf.one_hot(tf.squeeze(y_true), depth=5)
        
        # Pesi per le classi
        class_weights = tf.constant([0.1, 1.0, 2.0, 1.5, 2.0])
        
        # Focal loss con pesi per classe
        epsilon = 1e-7
        y_pred = tf.clip_by_value(y_pred, epsilon, 1. - epsilon)
        
        ce = -y_true_one_hot * tf.math.log(y_pred)
        weight = alpha * tf.pow(1. - y_pred, gamma)
        fl = weight * ce * class_weights
        
        return tf.reduce_mean(tf.reduce_sum(fl, axis=-1))
    return loss

In [8]:
# Constants
BATCH_SIZE = 32
AUTO = tf.data.AUTOTUNE
    
def apply_combined_transform(x, y):
    """Funzione per applicare trasformazioni o preprocessamenti ai dataset."""
    y = tf.expand_dims(y, axis=-1)  # Aggiungi dimensione per il canale
    
    return (
        tf.cast(x, tf.float32) / 255.0,  # Normalizza input
        {
            'coarse_output': y,
            'fine_output': y
        }
    )

# Ricrea i dataset
train_ds = (
    tf.data.Dataset.from_tensor_slices((X_train, y_train))
    .shuffle(BATCH_SIZE * 100, seed=42)
    .batch(BATCH_SIZE)
    .map(apply_combined_transform, num_parallel_calls=AUTO)
    .prefetch(AUTO)
)

val_ds = (
    tf.data.Dataset.from_tensor_slices((X_val, y_val))
    .batch(BATCH_SIZE)
    .map(apply_combined_transform, num_parallel_calls=AUTO)
    .prefetch(AUTO)
) 




In [9]:
# Compilazione del modello
model.compile(
    optimizer=tf.keras.optimizers.AdamW(
        learning_rate=1e-3,
        weight_decay=1e-4,
        beta_1=0.9,
        beta_2=0.999
    ),
    loss={
        'coarse_output': weighted_focal_loss(alpha=0.25, gamma=2.0),
        'fine_output': weighted_focal_loss(alpha=0.25, gamma=2.0)
    },
    loss_weights={
        'coarse_output': 0.4,
        'fine_output': 1.0
    },
    metrics=['accuracy', MeanIntersectionOverUnion(num_classes=5)]
)

# Callbacks migliorati
callbacks = [
    tf.keras.callbacks.EarlyStopping(
        monitor='val_fine_output_mean_iou',
        mode='max',
        patience=15,
        restore_best_weights=True,
        verbose=1
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_fine_output_mean_iou',
        mode='max',
        factor=0.5,
        patience=5,
        min_lr=1e-7,
        verbose=1
    )
]

# Training
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=100,
    callbacks=callbacks,
    verbose=1
)

Epoch 1/100


I0000 00:00:1733422978.318132     244 service.cc:145] XLA service 0x7b96080042d0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1733422978.318193     244 service.cc:153]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0


[1m 1/63[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m52:27[0m 51s/step - coarse_output_accuracy: 0.1514 - fine_output_mean_iou: 0.0672 - loss: 0.6711

I0000 00:00:1733423015.079446     244 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 661ms/step - coarse_output_accuracy: 0.2911 - fine_output_mean_iou: 0.1576 - loss: 0.4007




[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 857ms/step - coarse_output_accuracy: 0.2917 - fine_output_mean_iou: 0.1578 - loss: 0.3991 - val_coarse_output_accuracy: 0.1979 - val_fine_output_mean_iou: 0.0656 - val_loss: 1.3223 - learning_rate: 0.0010
Epoch 2/100
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 151ms/step - coarse_output_accuracy: 0.3786 - fine_output_mean_iou: 0.2022 - loss: 0.1826 - val_coarse_output_accuracy: 0.1986 - val_fine_output_mean_iou: 0.0656 - val_loss: 0.7290 - learning_rate: 0.0010
Epoch 3/100
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 149ms/step - coarse_output_accuracy: 0.3952 - fine_output_mean_iou: 0.2278 - loss: 0.1597 - val_coarse_output_accuracy: 0.1979 - val_fine_output_mean_iou: 0.0656 - val_loss: 1.2320 - learning_rate: 0.0010
Epoch 4/100
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 151ms/step - coarse_output_accuracy: 0.3902 - fine_output_mean_iou: 0.2247 - loss: 0.1503

In [10]:
timestep_str = datetime.now().strftime("%y%m%d_%H%M%S")
model_filename = f"model_{timestep_str}.keras"
model.save(model_filename)

print(f"Model saved to {model_filename}")

Model saved to model_241205_183518.keras


In [12]:
# Load test data
data = np.load("/kaggle/input/mars_for_students.npz")
test_set = data["test_set"]
X_test = np.expand_dims(test_set, axis=-1)

# Normalize test data
X_test = X_test.astype('float32') / 255.0

# Get predictions
predictions = model.predict(X_test)
# Use the fine output predictions (index 1 for fine_output)
fine_predictions = predictions[1]  # Use index 1 instead of 'fine_output'
# Convert to class indices
final_predictions = np.argmax(fine_predictions, axis=-1)

[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 40ms/step


In [13]:
# Create submission DataFrame
def y_to_df(y) -> pd.DataFrame:
    """Converts segmentation predictions into a DataFrame format for Kaggle."""
    n_samples = len(y)
    y_flat = y.reshape(n_samples, -1)
    df = pd.DataFrame(y_flat)
    df["id"] = np.arange(n_samples)
    cols = ["id"] + [col for col in df.columns if col != "id"]
    return df[cols]



In [14]:
# Create and save the submission file
timestep_str = datetime.now().strftime("%y%m%d_%H%M%S")
submission_filename = f"submission_{timestep_str}.csv"
submission_df = y_to_df(final_predictions)
submission_df.to_csv(submission_filename, index=False)

print(f"Submission saved to {submission_filename}")

Submission saved to submission_241205_183711.csv


#  
<img src="https://airlab.deib.polimi.it/wp-content/uploads/2019/07/airlab-logo-new_cropped.png" width="350">

<img src="https://upload.wikimedia.org/wikipedia/commons/thumb/9/95/Instagram_logo_2022.svg/800px-Instagram_logo_2022.svg.png" width="15"> **Instagram:** https://www.instagram.com/airlab_polimi/

<img src="https://upload.wikimedia.org/wikipedia/commons/thumb/8/81/LinkedIn_icon.svg/2048px-LinkedIn_icon.svg.png" width="15"> **LinkedIn:** https://www.linkedin.com/company/airlab-polimi/
___
Credits: Alberto Archetti 📧 alberto.archetti@polito.it





```
   Copyright 2024 Alberto Archetti

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
```