### Strategy for optimizing the Real Time depth estimation and battery optimization

####1. Use a Lightweight Depth Model
*   Uses MiDaS (DPT_Hybrid) for depth estimation (reasonably accurate and lightweight)

####2. Skip Frames
*   Estimating depth every N frames and Using optical flow to propagate in-between. Tune FRAME_SKIP dynamically: slow it down when robot is idle or background is static.
*   Cuts AI inference time by ~80%. Consider running AI every 10 frames if flow is stable.

####3. Optical Flow
*   Maintains temporal coherence.

####4. EMA filtering
*   Applies temporal smoothing, even with good models, monocular depth is noisy.This stabilizes jitter and produce consistent output with less frequent AI inference.

####5. Low res inference
*   Fast, barely affects visual quality

These techniques keeps the system efficient for real-time use with just a webcam.

In [1]:
import cv2
import torch
import numpy as np

##### Configurations

In [2]:
FRAME_SKIP = 5  # Estimate depth every N frames
TEMPORAL_ALPHA = 0.9  # For exponential moving average filtering
USE_GPU = torch.cuda.is_available()
DEVICE = torch.device("cuda" if USE_GPU else "cpu")
DEPTH_MODEL_TYPE = "DPT_Hybrid"  # Light but decent quality

##### Load MiDAS Depth Estimation model

In [None]:
midas = torch.hub.load("intel-isl/MiDaS", DEPTH_MODEL_TYPE)
midas.to(DEVICE).eval()

##### Load the corresponding transforms

In [4]:
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
transform = midas_transforms.dpt_transform if DEPTH_MODEL_TYPE.startswith("DPT") else midas_transforms.small_transform

Using cache found in /root/.cache/torch/hub/intel-isl_MiDaS_master


##### Depth Estimation module

In [5]:
def estimate_depth_midas(frame):
    img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    input_tensor = transform(img_rgb).to(DEVICE).unsqueeze(0)

    with torch.no_grad():
        prediction = midas(input_tensor)
        prediction = torch.nn.functional.interpolate(
            prediction.unsqueeze(1),
            size=img_rgb.shape[:2],
            mode="bicubic",
            align_corners=False,
        ).squeeze()
        depth_map = prediction.cpu().numpy()

    return depth_map

##### Propagate depth optical flow

In [6]:
def propagate_depth_optical_flow(prev_gray, curr_gray, prev_depth):
    flow = cv2.calcOpticalFlowFarneback(prev_gray, curr_gray, None,
                                        pyr_scale=0.5, levels=3, winsize=15,
                                        iterations=3, poly_n=5, poly_sigma=1.2, flags=0)

    h, w = flow.shape[:2]
    flow_map_x, flow_map_y = np.meshgrid(np.arange(w), np.arange(h), indexing='xy')
    remap_x = (flow_map_x + flow[..., 0]).astype(np.float32)
    remap_y = (flow_map_y + flow[..., 1]).astype(np.float32)

    warped_depth = cv2.remap(prev_depth, remap_x, remap_y,
                             interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT)

    return warped_depth

##### Temporal Filtering

In [7]:
def apply_temporal_filter(new_depth, prev_filtered):
    return TEMPORAL_ALPHA * prev_filtered + (1 - TEMPORAL_ALPHA) * new_depth

##### Main method

In [9]:
cap = cv2.VideoCapture(0)

frame_count = 0
prev_gray = None
prev_depth = None
filtered_depth = None

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Convert to grayscale for optical flow
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Decide whether to run full depth estimation or warp
    if frame_count % FRAME_SKIP == 0 or prev_depth is None:
        depth_map = estimate_depth_midas(frame)
        prev_depth = depth_map
    else:
        depth_map = propagate_depth_optical_flow(prev_gray, gray, prev_depth)

    # Apply temporal smoothing
    if filtered_depth is None:
        filtered_depth = depth_map
    else:
        filtered_depth = apply_temporal_filter(depth_map, filtered_depth)

    # Normalize for display
    depth_vis = cv2.normalize(filtered_depth, None, 0, 255, cv2.NORM_MINMAX)
    depth_vis = np.uint8(depth_vis)
    depth_color = cv2.applyColorMap(depth_vis, cv2.COLORMAP_INFERNO)

    # Display
    cv2.imshow("Real-Time Depth Map", depth_color)

    # Update state
    prev_gray = gray
    prev_depth = depth_map
    frame_count += 1

    # Break on key
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# clean up
# cap.release()
# cv2.destroyAllWindows()