### Code for training a Monocular Depth Estimation Model

In [1]:
import tensorflow as tf
import pandas as pd
import cv2
import numpy as np
from tensorflow.keras.layers import Input, Conv2D, DepthwiseConv2D, ReLU, MaxPooling2D, Concatenate, Layer, Add
from tensorflow.keras.models import Model
import os

In [2]:
# Paths
TRAIN_CSV = "datasets/nyu_data/data/nyu2_train.csv"
TEST_CSV = "datasets/nyu_data/data/nyu2_test.csv"
BASE_PATH = "datasets/nyu_data/"  # Base path for image and depth folders

TARGET_SIZE = (300, 300)  # Input size for the model
BATCH_SIZE = 16

In [3]:
# Dataset Loading and Preprocessing
def load_csv(csv_path):
    """Load the CSV containing RGB and depth map paths."""
    return pd.read_csv(csv_path, header=None, names=["rgb_path", "depth_path"])

In [4]:
def preprocess_image_and_depth(rgb_path, depth_path):
    """Preprocess the RGB image and depth map."""
    # Load and resize RGB image
    rgb_image_path = BASE_PATH + rgb_path
    depth_map_path = BASE_PATH + depth_path

    rgb_image = cv2.imread(rgb_image_path)
    if rgb_image is None:
        raise FileNotFoundError(f"RGB image not found at {rgb_image_path}")

    rgb_image = cv2.resize(rgb_image, TARGET_SIZE)
    rgb_image = rgb_image / 255.0  # Normalize to [0, 1]

    # Load and resize depth map
    depth_map = cv2.imread(depth_map_path, cv2.IMREAD_GRAYSCALE)
    if depth_map is None:
        raise FileNotFoundError(f"Depth map not found at {depth_map_path}")

    depth_map = cv2.resize(depth_map, TARGET_SIZE)
    depth_map = depth_map.astype(np.float32) / 255.0  # Normalize to [0, 1]

    return rgb_image, depth_map


In [5]:
def create_dataset(csv_path, batch_size=BATCH_SIZE):
    """Create a TensorFlow dataset from the CSV mapping."""
    data = load_csv(csv_path)

    def generator():
        for _, row in data.iterrows():
            rgb_path = row["rgb_path"]
            depth_path = row["depth_path"]
            yield preprocess_image_and_depth(rgb_path, depth_path)

    dataset = tf.data.Dataset.from_generator(
        generator,
        output_signature=(
            tf.TensorSpec(shape=(TARGET_SIZE[1], TARGET_SIZE[0], 3), dtype=tf.float32),
            tf.TensorSpec(shape=(TARGET_SIZE[1], TARGET_SIZE[0]), dtype=tf.float32),
        )
    )
    dataset = dataset.shuffle(100).batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset


In [6]:
# Load train and test datasets
train_dataset = create_dataset(TRAIN_CSV, batch_size=BATCH_SIZE)
test_dataset = create_dataset(TEST_CSV, batch_size=BATCH_SIZE)

In [7]:
class ResizeLayer(Layer):
    def __init__(self, target_shape, **kwargs):
        super(ResizeLayer, self).__init__(**kwargs)
        self.target_shape = target_shape

    def call(self, inputs):
        return tf.image.resize(inputs, self.target_shape, method='bilinear')

def conv_block(x, filters, kernel_size=3, strides=1):
    """Convolutional block with ReLU activation"""
    x = Conv2D(filters, kernel_size, strides=strides, padding='same')(x)
    x = ReLU()(x)
    return x

def residual_block(x, filters):
    """Residual block using only Edge TPU compatible operations"""
    skip = x
    
    # If the number of filters changes, we need to project the shortcut
    if skip.shape[-1] != filters:
        skip = Conv2D(filters, 1, padding='same')(skip)
    
    x = conv_block(x, filters)
    x = Conv2D(filters, 3, padding='same')(x)
    x = Add()([x, skip])
    x = ReLU()(x)
    return x

def create_depth_model(input_shape=(300, 300, 3)):
    """Create an enhanced monocular depth estimation model compatible with Edge TPU."""
    inputs = Input(shape=input_shape)
    
    # Initial Feature Extraction
    x = conv_block(inputs, 32, strides=2)  # 150x150
    x = conv_block(x, 32)
    block1 = residual_block(x, 32)
    
    # Encoder Stage 1
    x = conv_block(block1, 64, strides=2)  # 75x75
    x = residual_block(x, 64)
    x = residual_block(x, 64)
    block2 = residual_block(x, 64)
    
    # Encoder Stage 2
    x = conv_block(block2, 128, strides=2)  # 38x38
    x = residual_block(x, 128)
    x = residual_block(x, 128)
    block3 = residual_block(x, 128)
    
    # Encoder Stage 3
    x = conv_block(block3, 256, strides=2)  # 19x19
    x = residual_block(x, 256)
    x = residual_block(x, 256)
    block4 = residual_block(x, 256)
    
    # Bridge
    x = conv_block(block4, 512)
    x = residual_block(x, 512)
    x = residual_block(x, 512)
    
    # Decoder Stage 1
    x = Conv2D(256, 1, padding='same')(x)
    x = ResizeLayer((38, 38))(x)  # Match block3
    x = Concatenate()([x, block3])
    x = residual_block(x, 256)
    x = residual_block(x, 256)
    
    # Decoder Stage 2
    x = Conv2D(128, 1, padding='same')(x)
    x = ResizeLayer((75, 75))(x)  # Match block2
    x = Concatenate()([x, block2])
    x = residual_block(x, 128)
    x = residual_block(x, 128)
    
    # Decoder Stage 3
    x = Conv2D(64, 1, padding='same')(x)
    x = ResizeLayer((150, 150))(x)  # Match block1
    x = Concatenate()([x, block1])
    x = residual_block(x, 64)
    x = residual_block(x, 64)
    
    # Final upsampling and refinement
    x = Conv2D(32, 1, padding='same')(x)
    x = ResizeLayer((300, 300))(x)
    x = residual_block(x, 32)
    x = conv_block(x, 32)
    
    # Multi-scale depth estimation
    x = Conv2D(16, 3, padding='same')(x)
    x = ReLU()(x)
    outputs = Conv2D(1, 3, padding='same', activation='sigmoid')(x)
    
    return Model(inputs=inputs, outputs=outputs)

In [8]:
# Create and compile model
model = create_depth_model()
model.compile(
    optimizer='adam',
    loss='mse',
    metrics=['mae']
)
model.summary()

In [9]:
# Create representative dataset for quantization
def representative_dataset():
    for images, _ in train_dataset.take(100):
        # Ensure images are resized to 300x300
        resized_images = tf.image.resize(images, (300, 300))
        yield [resized_images]

# Convert to TFLite with quantization
converter = tf.lite.TFLiteConverter.from_keras_model(model)

# Set optimization flags
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8
converter.representative_dataset = representative_dataset

# Convert model
tflite_model = converter.convert()

# Save the model
with open('depth_model_edge_tpu.tflite', 'wb') as f:
    f.write(tflite_model)

INFO:tensorflow:Assets written to: /var/folders/5c/vwh99hn14231yhb5yks0nssc0000gn/T/tmptyh6x4jt/assets


INFO:tensorflow:Assets written to: /var/folders/5c/vwh99hn14231yhb5yks0nssc0000gn/T/tmptyh6x4jt/assets


Saved artifact at '/var/folders/5c/vwh99hn14231yhb5yks0nssc0000gn/T/tmptyh6x4jt'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 300, 300, 3), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 300, 300, 1), dtype=tf.float32, name=None)
Captures:
  5977721312: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5977796672: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5977799664: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5977799488: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5977827280: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5977827104: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5977854896: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5977854720: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5977883744: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5977883568: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5977886560

W0000 00:00:1736147658.008631 28877953 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.
W0000 00:00:1736147658.008645 28877953 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.
2025-01-06 02:14:18.008990: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /var/folders/5c/vwh99hn14231yhb5yks0nssc0000gn/T/tmptyh6x4jt
2025-01-06 02:14:18.011563: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2025-01-06 02:14:18.011570: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: /var/folders/5c/vwh99hn14231yhb5yks0nssc0000gn/T/tmptyh6x4jt
I0000 00:00:1736147658.039875 28877953 mlir_graph_optimization_pass.cc:401] MLIR V1 optimization pass is not enabled
2025-01-06 02:14:18.045143: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2025-01-06 02:14:18.277351: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: /var/fol