### Code for training a Monocular Depth Estimation Model

In [1]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Tue Jan  7 02:28:20 2025       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          Off | 00000000:00:04.0 Off |                    0 |
| N/A   32C    P0              49W / 400W |      2MiB / 40960MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [2]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'  # Add this line

# Now import TensorFlow and other libraries
import tensorflow as tf
import pandas as pd
import cv2
import numpy as np
import gc
import matplotlib.pyplot as plt
from tensorflow.keras.layers import (
    Input, Conv2D, DepthwiseConv2D, ReLU,
    MaxPooling2D, Concatenate, Layer, Add
)
from tensorflow.keras.models import Model

In [3]:
# Simple GPU check
print(f"GPU Available: {tf.test.is_built_with_cuda()}")
print(f"GPU Devices: {tf.config.list_physical_devices('GPU')}")

# Enable mixed precision
tf.keras.mixed_precision.set_global_policy('mixed_float16')

GPU Available: True
GPU Devices: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [4]:
# Paths and Configuration
TRAIN_CSV = "datasets/nyu_data/data/nyu2_train.csv"
TEST_CSV = "datasets/nyu_data/data/nyu2_test.csv"
BASE_PATH = "datasets/nyu_data/"
TARGET_SIZE = (320, 320)
BATCH_SIZE = 32

In [5]:
# Learning rate schedule configuration
initial_learning_rate = 0.001
maximal_learning_rate = 0.006
step_size = 2000
cycle_steps = 8000

def cyclic_learning_rate(step):
    """Cyclic learning rate calculation"""
    cycle = np.floor(1 + step / (2 * step_size))
    x = np.abs(step / step_size - 2 * cycle + 1)
    lr = initial_learning_rate + (maximal_learning_rate - initial_learning_rate) * max(0., 1 - x)
    return lr

In [6]:
def cleanup():
    """Aggressive cleanup of memory"""
    gc.collect()
    tf.keras.backend.clear_session()

In [7]:
class MemoryEfficientDataset:
    def __init__(self, csv_path, batch_size=32):
        self.data = pd.read_csv(csv_path, header=None, names=["rgb_path", "depth_path"])
        self.batch_size = batch_size
        self.current_index = 0
        self.steps = 0

    def __len__(self):
        return len(self.data) // self.batch_size

    def get_batch(self):
        if self.current_index >= len(self.data):
            self.current_index = 0
            return None

        batch_data = self.data.iloc[self.current_index:self.current_index + self.batch_size]
        self.current_index += self.batch_size

        rgb_batch = []
        depth_batch = []

        for _, row in batch_data.iterrows():
            rgb_path = BASE_PATH + row["rgb_path"]
            depth_path = BASE_PATH + row["depth_path"]

            rgb_img = cv2.imread(rgb_path)
            rgb_img = cv2.resize(rgb_img, TARGET_SIZE)
            rgb_img = rgb_img.astype(np.float32) / 255.0

            depth_map = cv2.imread(depth_path, cv2.IMREAD_GRAYSCALE)
            depth_map = cv2.resize(depth_map, TARGET_SIZE)
            depth_map = depth_map.astype(np.float32) / 255.0

            rgb_batch.append(rgb_img)
            depth_batch.append(depth_map)

        self.steps += 1
        return np.array(rgb_batch), np.array(depth_batch)

In [8]:
class UpsampleBlock(Layer):
    """Edge TPU compatible upsampling"""
    def __init__(self, filters, **kwargs):
        super(UpsampleBlock, self).__init__(**kwargs)
        self.conv = Conv2D(
            filters * 4,
            1,
            padding='same',
            kernel_regularizer=tf.keras.regularizers.l2(0.01)
        )
        self.relu = ReLU()

    def call(self, inputs):
        x = self.conv(inputs)
        x = tf.nn.depth_to_space(x, 2)
        x = self.relu(x)
        return x

def conv_block(x, filters, kernel_size=3, strides=1):
    """Convolutional block with ReLU activation and L2 regularization"""
    x = Conv2D(
        filters,
        kernel_size,
        strides=strides,
        padding='same',
        kernel_regularizer=tf.keras.regularizers.l2(0.01)
    )(x)
    x = ReLU()(x)
    return x

def residual_block(x, filters):
    """Edge TPU compatible residual block with L2 regularization"""
    skip = x

    if skip.shape[-1] != filters:
        skip = Conv2D(
            filters,
            1,
            padding='same',
            kernel_regularizer=tf.keras.regularizers.l2(0.01)
        )(skip)

    x = conv_block(x, filters)
    x = Conv2D(
        filters,
        3,
        padding='same',
        kernel_regularizer=tf.keras.regularizers.l2(0.01)
    )(x)
    x = Add()([x, skip])
    x = ReLU()(x)
    return x

def create_depth_model(input_shape=(320, 320, 3)):
    """Create Edge TPU compatible depth estimation model with increased capacity"""
    inputs = Input(shape=input_shape)

    # Initial Feature Extraction with more filters
    x = conv_block(inputs, 48, strides=2)  # 160x160
    x = conv_block(x, 48)
    block1 = residual_block(x, 48)  # Save 160x160

    # Encoder Stage 1
    x = conv_block(block1, 96, strides=2)  # 80x80
    x = residual_block(x, 96)
    block2 = residual_block(x, 96)  # Save 80x80

    # Encoder Stage 2 with additional residual blocks
    x = conv_block(block2, 128, strides=2)  # 40x40
    x = residual_block(x, 128)
    x = residual_block(x, 128)
    block3 = residual_block(x, 128)  # Save 40x40

    # Bridge with increased capacity
    x = conv_block(block3, 256)  # 40x40
    x = residual_block(x, 256)
    x = residual_block(x, 256)

    # Decoder Stage 1 (40x40 -> 80x80)
    x = UpsampleBlock(128)(x)  # Now 80x80
    x = Concatenate()([x, block2])  # block2 is also 80x80
    x = residual_block(x, 128)
    x = residual_block(x, 128)

    # Decoder Stage 2 (80x80 -> 160x160)
    x = UpsampleBlock(96)(x)  # Now 160x160
    x = Concatenate()([x, block1])  # block1 is also 160x160
    x = residual_block(x, 96)
    x = residual_block(x, 96)

    # Final upsampling (160x160 -> 320x320)
    x = UpsampleBlock(48)(x)  # Now 320x320
    x = residual_block(x, 48)

    # Final layers
    x = Conv2D(32, 3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01))(x)
    outputs = Conv2D(1, 3, padding='same', activation='relu6', kernel_regularizer=tf.keras.regularizers.l2(0.01))(x)

    return Model(inputs=inputs, outputs=outputs)

In [9]:
@tf.function(reduce_retracing=True)
def train_step(model, optimizer, x, y, step):
    """Single training step with memory optimization and gradient clipping"""
    step = tf.cast(step, tf.float32)

    with tf.GradientTape() as tape:
        predictions = model(x, training=True)
        loss = tf.reduce_mean(tf.square(y - predictions[:,:,:,0]))
        reg_loss = tf.reduce_sum(model.losses)
        total_loss = loss + reg_loss

    gradients = tape.gradient(total_loss, model.trainable_variables)
    gradients = [tf.clip_by_norm(g, 1.0) if g is not None else g for g in gradients]

    # Calculate learning rate
    cycle = tf.floor(1 + step / (2 * step_size))
    x = tf.abs(step / step_size - 2 * cycle + 1)
    lr = initial_learning_rate + (maximal_learning_rate - initial_learning_rate) * tf.maximum(0., 1 - x)

    optimizer.learning_rate.assign(lr)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    return loss, lr

@tf.function(reduce_retracing=True)
def val_step(model, x, y):
    """Calculate validation loss"""
    predictions = model(x, training=False)
    loss = tf.reduce_mean(tf.square(y - predictions[:,:,:,0]))
    reg_loss = tf.reduce_sum(model.losses)
    return loss + reg_loss

In [10]:
cleanup()

In [11]:
# Create datasets
print("\nCreating datasets...")
train_data = MemoryEfficientDataset(TRAIN_CSV, BATCH_SIZE)
val_data = MemoryEfficientDataset(TEST_CSV, BATCH_SIZE)


Creating datasets...


In [12]:
# Create and compile model
print("\nCreating model...")
model = create_depth_model()
model.summary()

optimizer = tf.keras.optimizers.Adam(learning_rate=initial_learning_rate)


Creating model...


In [13]:
# Section 1: Checkpoint Setup
try:
    print("\nSetting up checkpoints...")
    checkpoint = tf.train.Checkpoint(model=model, optimizer=optimizer)
    checkpoint_dir = './training_checkpoints'
    os.makedirs(checkpoint_dir, exist_ok=True)
    manager = tf.train.CheckpointManager(
        checkpoint, checkpoint_dir, max_to_keep=3
    )
    print("Checkpoint setup complete!")
except Exception as e:
    print(f"Error in checkpoint setup: {str(e)}")
    raise


Setting up checkpoints...
Checkpoint setup complete!


In [14]:
# Training section
try:
    print("\nStarting training...")
    epochs = 50
    training_history = []
    best_val_loss = float('inf')
    patience = 10
    patience_counter = 0
    global_step = 0

    for epoch in range(epochs):
        print(f"\nEpoch {epoch+1}/{epochs}")

        # Training
        train_losses = []
        learning_rates = []
        steps = len(train_data)

        for step in range(steps):
            batch = train_data.get_batch()
            if batch is None:
                continue

            x_batch, y_batch = batch
            if x_batch.size == 0 or y_batch.size == 0:
                continue

            # Convert step to tensor
            step_tensor = tf.constant(global_step, dtype=tf.float32)
            loss, current_lr = train_step(model, optimizer, x_batch, y_batch, step_tensor)

            if not tf.math.is_nan(loss) and not tf.math.is_inf(loss):
                train_losses.append(float(loss))
                learning_rates.append(float(current_lr))

            if step % 10 == 0 and train_losses:
                current_mean = np.mean(train_losses[-10:])
                if not np.isnan(current_mean):
                    print(f"Step {step}/{steps}, Loss: {current_mean:.4f}, LR: {current_lr:.6f}")

            if step % 50 == 0:
                cleanup()

            global_step += 1

        # Validation
        val_losses = []
        val_steps = len(val_data)
        for step in range(val_steps):
            batch = val_data.get_batch()
            if batch is None:
                continue

            x_val, y_val = batch
            if x_val.size == 0 or y_val.size == 0:
                continue

            val_loss = val_step(model, x_val, y_val)
            if not tf.math.is_nan(val_loss) and not tf.math.is_inf(val_loss):
                val_losses.append(float(val_loss))

        # Calculate epoch metrics
        epoch_train_loss = np.mean(train_losses) if train_losses else float('inf')
        epoch_val_loss = np.mean(val_losses) if val_losses else float('inf')
        epoch_lr = np.mean(learning_rates) if learning_rates else 0.0

        # Early stopping check
        if epoch_val_loss < best_val_loss:
            best_val_loss = epoch_val_loss
            patience_counter = 0
            # Save best model
            model.save('best_model.h5')
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"\nEarly stopping triggered after {epoch+1} epochs")
                break

        if not np.isnan(epoch_train_loss) and not np.isnan(epoch_val_loss):
            training_history.append({
                'epoch': epoch + 1,
                'train_loss': epoch_train_loss,
                'val_loss': epoch_val_loss,
                'learning_rate': epoch_lr
            })

            print(f"\nEpoch {epoch+1} Results:")
            print(f"Training Loss: {epoch_train_loss:.4f}")
            print(f"Validation Loss: {epoch_val_loss:.4f}")
            print(f"Learning Rate: {epoch_lr:.6f}")
        else:
            print(f"\nEpoch {epoch+1} produced invalid losses, skipping...")

            if epoch > 0:
                print("Training unstable, stopping...")
                break

        if (epoch + 1) % 5 == 0 and not np.isnan(epoch_train_loss):
            save_path = manager.save()
            print(f"\nSaved checkpoint for epoch {epoch+1}: {save_path}")
            cleanup()

    print("\nTraining completed!")
except Exception as e:
    print(f"Error during training: {str(e)}")
    raise


Starting training...

Epoch 1/50
Step 0/1584, Loss: 0.1621, LR: 0.001000
Step 10/1584, Loss: 0.0688, LR: 0.001025
Step 20/1584, Loss: 0.0105, LR: 0.001050
Step 30/1584, Loss: 0.0117, LR: 0.001075
Step 40/1584, Loss: 0.0160, LR: 0.001100
Step 50/1584, Loss: 0.0167, LR: 0.001125
Step 60/1584, Loss: 0.0294, LR: 0.001150
Step 70/1584, Loss: 0.0130, LR: 0.001175
Step 80/1584, Loss: 0.0178, LR: 0.001200
Step 90/1584, Loss: 0.0071, LR: 0.001225
Step 100/1584, Loss: 0.0059, LR: 0.001250
Step 110/1584, Loss: 0.0260, LR: 0.001275
Step 120/1584, Loss: 0.0188, LR: 0.001300
Step 130/1584, Loss: 0.0084, LR: 0.001325
Step 140/1584, Loss: 0.0168, LR: 0.001350
Step 150/1584, Loss: 0.0168, LR: 0.001375
Step 160/1584, Loss: 0.0107, LR: 0.001400
Step 170/1584, Loss: 0.0189, LR: 0.001425
Step 180/1584, Loss: 0.0175, LR: 0.001450
Step 190/1584, Loss: 0.0183, LR: 0.001475
Step 200/1584, Loss: 0.0162, LR: 0.001500
Step 210/1584, Loss: 0.0203, LR: 0.001525
Step 220/1584, Loss: 0.0114, LR: 0.001550
Step 230/15




Epoch 1 Results:
Training Loss: 0.0253
Validation Loss: 0.0840
Learning Rate: 0.002979

Epoch 2/50
Step 10/1584, Loss: 0.0212, LR: 0.004983
Step 20/1584, Loss: 0.0173, LR: 0.005008
Step 30/1584, Loss: 0.0108, LR: 0.005033
Step 40/1584, Loss: 0.0128, LR: 0.005058
Step 50/1584, Loss: 0.0151, LR: 0.005083
Step 60/1584, Loss: 0.0195, LR: 0.005108
Step 70/1584, Loss: 0.0139, LR: 0.005133
Step 80/1584, Loss: 0.0169, LR: 0.005158
Step 90/1584, Loss: 0.0083, LR: 0.005183
Step 100/1584, Loss: 0.0072, LR: 0.005208
Step 110/1584, Loss: 0.0213, LR: 0.005233
Step 120/1584, Loss: 0.0185, LR: 0.005258
Step 130/1584, Loss: 0.0087, LR: 0.005283
Step 140/1584, Loss: 0.0168, LR: 0.005308
Step 150/1584, Loss: 0.0223, LR: 0.005333
Step 160/1584, Loss: 0.0128, LR: 0.005358
Step 170/1584, Loss: 0.0139, LR: 0.005383
Step 180/1584, Loss: 0.0163, LR: 0.005408
Step 190/1584, Loss: 0.0191, LR: 0.005433
Step 200/1584, Loss: 0.0144, LR: 0.005458
Step 210/1584, Loss: 0.0177, LR: 0.005483
Step 220/1584, Loss: 0.0101




Epoch 2 Results:
Training Loss: 0.0240
Validation Loss: 0.0802
Learning Rate: 0.004789

Epoch 3/50
Step 0/1584, Loss: 0.0286, LR: 0.003082
Step 10/1584, Loss: 0.0232, LR: 0.003060
Step 20/1584, Loss: 0.0172, LR: 0.003035
Step 30/1584, Loss: 0.0108, LR: 0.003010
Step 40/1584, Loss: 0.0108, LR: 0.002985
Step 50/1584, Loss: 0.0175, LR: 0.002960
Step 60/1584, Loss: 0.0160, LR: 0.002935
Step 70/1584, Loss: 0.0168, LR: 0.002910
Step 80/1584, Loss: 0.0165, LR: 0.002885
Step 90/1584, Loss: 0.0096, LR: 0.002860
Step 100/1584, Loss: 0.0064, LR: 0.002835
Step 110/1584, Loss: 0.0195, LR: 0.002810
Step 120/1584, Loss: 0.0196, LR: 0.002785
Step 130/1584, Loss: 0.0103, LR: 0.002760
Step 140/1584, Loss: 0.0122, LR: 0.002735
Step 150/1584, Loss: 0.0256, LR: 0.002710
Step 160/1584, Loss: 0.0102, LR: 0.002685
Step 170/1584, Loss: 0.0134, LR: 0.002660
Step 180/1584, Loss: 0.0162, LR: 0.002635
Step 190/1584, Loss: 0.0191, LR: 0.002610
Step 200/1584, Loss: 0.0144, LR: 0.002585
Step 210/1584, Loss: 0.0183, 




Epoch 3 Results:
Training Loss: 0.0235
Validation Loss: 0.0793
Learning Rate: 0.001992

Epoch 4/50
Step 0/1584, Loss: 0.0186, LR: 0.002875
Step 10/1584, Loss: 0.0243, LR: 0.002898
Step 20/1584, Loss: 0.0169, LR: 0.002923
Step 30/1584, Loss: 0.0103, LR: 0.002948
Step 40/1584, Loss: 0.0085, LR: 0.002972
Step 50/1584, Loss: 0.0200, LR: 0.002998
Step 60/1584, Loss: 0.0138, LR: 0.003022
Step 70/1584, Loss: 0.0190, LR: 0.003048
Step 80/1584, Loss: 0.0159, LR: 0.003072
Step 90/1584, Loss: 0.0111, LR: 0.003098
Step 100/1584, Loss: 0.0061, LR: 0.003123
Step 110/1584, Loss: 0.0185, LR: 0.003148
Step 120/1584, Loss: 0.0196, LR: 0.003173
Step 130/1584, Loss: 0.0111, LR: 0.003198
Step 140/1584, Loss: 0.0117, LR: 0.003223
Step 150/1584, Loss: 0.0267, LR: 0.003248
Step 160/1584, Loss: 0.0099, LR: 0.003273
Step 170/1584, Loss: 0.0136, LR: 0.003298
Step 180/1584, Loss: 0.0153, LR: 0.003323
Step 190/1584, Loss: 0.0199, LR: 0.003348
Step 200/1584, Loss: 0.0149, LR: 0.003373
Step 210/1584, Loss: 0.0179, 




Epoch 5 Results:
Training Loss: 0.0238
Validation Loss: 0.0648
Learning Rate: 0.003190

Saved checkpoint for epoch 5: ./training_checkpoints/ckpt-1

Epoch 6/50
Step 0/1584, Loss: 0.0227, LR: 0.001210
Step 10/1584, Loss: 0.0275, LR: 0.001187
Step 20/1584, Loss: 0.0120, LR: 0.001162
Step 30/1584, Loss: 0.0105, LR: 0.001137
Step 40/1584, Loss: 0.0100, LR: 0.001112
Step 50/1584, Loss: 0.0181, LR: 0.001087
Step 60/1584, Loss: 0.0159, LR: 0.001062
Step 70/1584, Loss: 0.0207, LR: 0.001037
Step 80/1584, Loss: 0.0137, LR: 0.001012
Step 90/1584, Loss: 0.0129, LR: 0.001013
Step 100/1584, Loss: 0.0060, LR: 0.001038
Step 110/1584, Loss: 0.0156, LR: 0.001063
Step 120/1584, Loss: 0.0225, LR: 0.001088
Step 130/1584, Loss: 0.0145, LR: 0.001113
Step 140/1584, Loss: 0.0099, LR: 0.001138
Step 150/1584, Loss: 0.0235, LR: 0.001163
Step 160/1584, Loss: 0.0095, LR: 0.001188
Step 170/1584, Loss: 0.0135, LR: 0.001213
Step 180/1584, Loss: 0.0133, LR: 0.001238
Step 190/1584, Loss: 0.0221, LR: 0.001263
Step 200/1

In [16]:
# Define evaluation code that handles custom layers and provides comprehensive metrics
def evaluate_model(model, test_dataset, num_samples=5):
    """
    Comprehensive evaluation of the depth estimation model.
    Calculates both numerical metrics and generates visualizations to assess performance.

    Args:
        model: The trained depth estimation model
        test_dataset: Dataset object containing test data
        num_samples: Number of example predictions to visualize

    Returns:
        float: Average MSE loss across the test dataset
    """
    print("\nStarting model evaluation...")

    # Initialize tracking metrics
    all_losses = []          # Track MSE losses
    all_abs_rel = []         # Track relative errors
    all_rmse = []           # Track root mean square errors
    # Standard thresholds used in depth estimation literature (within 25%, 56.25%, and 95.0625% of ground truth)
    threshold_accuracies = {1.25: 0, 1.25**2: 0, 1.25**3: 0}
    total_samples = 0

    # Store example predictions for visualization
    example_data = []

    # Process all batches in test dataset
    for step in range(len(test_dataset)):
        batch = test_dataset.get_batch()
        if batch is None:
            continue

        images, true_depths = batch
        predictions = model(images, training=False)
        predictions = predictions[:,:,:,0]  # Remove single-channel dimension

        # Store first few examples for visualization
        if step < num_samples:
            example_data.append((images[:1], true_depths[:1], predictions[:1]))

        # Calculate metrics for each image in batch
        for i in range(len(images)):
            true_depth = true_depths[i]
            pred_depth = predictions[i]

            # Calculate mean squared error
            mse = tf.reduce_mean(tf.square(true_depth - pred_depth))
            all_losses.append(float(mse))

            # Calculate relative error (important for depth estimation)
            abs_rel = tf.reduce_mean(tf.abs(true_depth - pred_depth) / true_depth)
            all_abs_rel.append(float(abs_rel))

            # Calculate RMSE (in same units as depth)
            rmse = tf.sqrt(mse)
            all_rmse.append(float(rmse))

            # Calculate threshold accuracies (percentage of pixels within error bounds)
            ratios = tf.maximum(true_depth / pred_depth, pred_depth / true_depth)
            for threshold in threshold_accuracies.keys():
                threshold_accuracies[threshold] += tf.reduce_mean(
                    tf.cast(ratios < threshold, tf.float32)
                )

            total_samples += 1

    # Print comprehensive metrics summary
    print("\nOverall Model Performance:")
    print(f"Average MSE Loss: {np.mean(all_losses):.4f}")
    print(f"Average Absolute Relative Error: {np.mean(all_abs_rel):.4f}")
    print(f"Root Mean Square Error: {np.mean(all_rmse):.4f}")

    # Calculate and print threshold accuracies
    for threshold, total in threshold_accuracies.items():
        accuracy = total / total_samples
        print(f"δ < {threshold:.2f}: {accuracy:.4f}")

    # Generate visualization of example predictions
    plt.figure(figsize=(15, 3*num_samples))
    for idx, (image, true_depth, pred_depth) in enumerate(example_data):
        # Show input image
        plt.subplot(num_samples, 3, idx*3 + 1)
        plt.imshow(image[0])
        plt.title('Input Image')
        plt.axis('off')

        # Show ground truth depth
        plt.subplot(num_samples, 3, idx*3 + 2)
        plt.imshow(true_depth[0], cmap='magma')
        plt.title('Ground Truth Depth')
        plt.colorbar()
        plt.axis('off')

        # Show predicted depth
        plt.subplot(num_samples, 3, idx*3 + 3)
        plt.imshow(pred_depth[0], cmap='magma')
        plt.title('Predicted Depth')
        plt.colorbar()
        plt.axis('off')

    plt.tight_layout()
    plt.savefig('evaluation_results.png', dpi=300, bbox_inches='tight')
    plt.close()

    # Generate error distribution visualizations
    plt.figure(figsize=(12, 4))

    plt.subplot(1, 2, 1)
    plt.hist(all_losses, bins=50, alpha=0.75)
    plt.title('Distribution of MSE Losses')
    plt.xlabel('MSE Loss')
    plt.ylabel('Count')

    plt.subplot(1, 2, 2)
    plt.hist(all_abs_rel, bins=50, alpha=0.75)
    plt.title('Distribution of Absolute Relative Errors')
    plt.xlabel('Absolute Relative Error')
    plt.ylabel('Count')

    plt.tight_layout()
    plt.savefig('error_distributions.png', dpi=300, bbox_inches='tight')
    plt.close()

    # Generate detailed error analysis for a sample prediction
    sample_image, sample_true, sample_pred = example_data[0]
    error_map = tf.abs(sample_true[0] - sample_pred[0])

    plt.figure(figsize=(15, 5))
    plt.subplot(1, 3, 1)
    plt.imshow(sample_image[0])
    plt.title('Sample Image')
    plt.axis('off')

    plt.subplot(1, 3, 2)
    plt.imshow(sample_pred[0], cmap='magma')
    plt.title('Predicted Depth')
    plt.colorbar()
    plt.axis('off')

    plt.subplot(1, 3, 3)
    plt.imshow(error_map, cmap='hot')
    plt.title('Depth Error Map')
    plt.colorbar()
    plt.axis('off')

    plt.tight_layout()
    plt.savefig('depth_error_analysis.png', dpi=300, bbox_inches='tight')
    plt.close()

    return np.mean(all_losses)

# Run evaluation after training
print("\nEvaluating model performance...")
try:
    # Register custom layer for model loading
    custom_objects = {
        'UpsampleBlock': UpsampleBlock
    }

    # Load the best model if available
    if os.path.exists('best_model.h5'):
        with tf.keras.utils.custom_object_scope(custom_objects):
            model = tf.keras.models.load_model('best_model.h5')
            print("Loaded best model for evaluation")

    # Create fresh test dataset
    test_dataset = MemoryEfficientDataset(TEST_CSV, BATCH_SIZE)

    # Run comprehensive evaluation
    final_loss = evaluate_model(model, test_dataset)
    print(f"\nEvaluation complete! Final average loss: {final_loss:.4f}")

except Exception as e:
    print(f"Error during evaluation: {str(e)}")
    raise
finally:
    # Clean up any remaining memory
    cleanup()


Evaluating model performance...




Loaded best model for evaluation

Starting model evaluation...

Overall Model Performance:
Average MSE Loss: 0.0649
Average Absolute Relative Error: 8.4485
Root Mean Square Error: 0.2545
δ < 1.25: 0.0000
δ < 1.56: 0.0000
δ < 1.95: 0.0000

Evaluation complete! Final average loss: 0.0649
