In [1]:
import cv2
import threading
import time
import torch
import numpy as np
from IPython.display import display, clear_output
import ipywidgets as widgets
from PIL import Image
from io import BytesIO
from depth_anything_v2.dpt import DepthAnythingV2
import gdown
# Global flag to control the webcam stream
is_streaming = False
stream_thread = None
display_lock = threading.Lock()  # Lock to sync the display updates

# Set up the depth estimation model
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
model_configs = {
    'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
    'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
    'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
    'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
}
encoder = 'vits'  # You can choose between 'vits', 'vitb', 'vitl', and 'vitg'
model = DepthAnythingV2(**model_configs[encoder])
model.load_state_dict(torch.load('/home/jovyan/depth_anything_v2_vits.pth', map_location=DEVICE))
model = model.to(DEVICE).eval()

# Function to start/stop webcam feed
def toggle_webcam(button):
    global is_streaming, stream_thread

    if is_streaming:
        # Stop the stream
        is_streaming = False
        button.icon = 'video-camera'  # Change icon back to camera
        button.style.button_color = "#FFFFFF"
        style.value = """
        <style>
            .fa-video-camera {
                font-size: 30px !important;
                color: black !important;
            }
        </style>
        """
        if stream_thread is not None:
            stream_thread.join()  # Ensure the thread finishes properly
        gray_out_camera_view()
        gray_out_depth_view()
    else:
        # Start the stream
        is_streaming = True
        button.icon = 'video-camera'
        button.style.button_color = "#FFFFFF"
        style.value = """
        <style>
            .fa-video-camera {
                font-size: 30px !important;
                color: red !important;
            }
        </style>
        """
        # Start webcam and depth estimation in a single thread
        stream_thread = threading.Thread(target=start_webcam_and_depth)
        stream_thread.start()

# Function to capture and display the webcam feed and depth map
def start_webcam_and_depth():
    cap = cv2.VideoCapture(0)  # Use default camera
    cap.set(cv2.CAP_PROP_FPS, 30)  # Attempt to set 30 FPS

    if not cap.isOpened():
        print("Error: Could not open video stream.")
        return

    try:
        while is_streaming:
            ret, frame = cap.read()
            if not ret:
                print("Failed to capture image")
                break

            # Resize the frame to a smaller resolution to increase FPS
            frame_resized = cv2.resize(frame, (320, 240))

            # Convert the frame from BGR (OpenCV format) to RGB
            rgb_frame = cv2.cvtColor(frame_resized, cv2.COLOR_BGR2RGB)

            # Estimate depth using the model
            with torch.no_grad():
                depth_map = model.infer_image(frame_resized)

            # Convert the depth map to a grayscale image
            depth_colored = cv2.applyColorMap(cv2.normalize(depth_map, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8), cv2.COLORMAP_JET)

            # Synchronize the display updates
            with display_lock:
                # Convert the frame to a PIL image and display it in the camera view widget
                pil_img = Image.fromarray(rgb_frame)
                with BytesIO() as f:
                    pil_img.save(f, 'jpeg')
                    cam_output.value = f.getvalue()

                # Convert the depth map to a PIL image and display it in the depth view widget
                depth_pil_img = Image.fromarray(cv2.cvtColor(depth_colored, cv2.COLOR_BGR2RGB))
                with BytesIO() as f:
                    depth_pil_img.save(f, 'jpeg')
                    depth_output.value = f.getvalue()

            # Limit the FPS to 30 by sleeping
            time.sleep(1/30)

    finally:
        cap.release()  # Ensure the camera is released
        cv2.destroyAllWindows()
        gray_out_camera_view()
        gray_out_depth_view()

# Function to gray out the camera view area when the webcam is stopped
def gray_out_camera_view():
    gray_image = Image.new("RGB", (320, 240), color=(128, 128, 128))
    with BytesIO() as f:
        gray_image.save(f, 'jpeg')
        cam_output.value = f.getvalue()

# Function to gray out the depth map view area when the depth map is not available
def gray_out_depth_view():
    gray_image = Image.new("RGB", (320, 240), color=(128, 128, 128))
    with BytesIO() as f:
        gray_image.save(f, 'jpeg')
        depth_output.value = f.getvalue()

# Create the main widgets for the interface
title = widgets.HTML(value="<h1 style='text-align:center; margin-bottom: 10px;'>📸 Live Cam Depth Anything V2</h1>")
camera_label = widgets.HTML(value="<h3 style='text-align:left; margin-top: 0;'>Camera View:</h3>")
cam_output = widgets.Image(layout={'height': '240px', 'width': '320px'})  # 320x240 camera live view
depth_label = widgets.HTML(value="<h3 style='text-align:left;'>Depth Prediction:</h3>")
depth_output = widgets.Image(layout={'height': '240px', 'width': '320px'})  # 320x240 depth map

# Initially gray out the camera and depth views
gray_out_camera_view()
gray_out_depth_view()

# Create a circular toggle button with a larger icon using custom HTML
toggle_button = widgets.Button(
    description='',  # No text
    icon='video-camera',  # Font Awesome camera icon
    layout=widgets.Layout(width='60px', height='60px', margin='10px 0 0 0'),
)
toggle_button.style.button_color = '#FFFFFF'
style = widgets.HTML(value="""
    <style>
        .fa-video-camera {
            font-size: 30px !important;
        }
    </style>
""")
toggle_button.on_click(toggle_webcam)

# Organize the layout in a vertical box (VBox) and center the content
ui = widgets.VBox([
    style,
    title,
    camera_label, 
    cam_output,
    depth_label, 
    depth_output,
    toggle_button
], layout=widgets.Layout(align_items='center'))

# Display the organized UI with the CSS style included
display(ui)

xFormers not available
xFormers not available
Downloading...
From (original): https://drive.google.com/uc?id=1WA67pX-IhSxpj9yXQbeHOVjYfaVynEmF
From (redirected): https://drive.google.com/uc?id=1WA67pX-IhSxpj9yXQbeHOVjYfaVynEmF&confirm=t&uuid=d4a8f453-59ed-40c2-9541-33b5a54d489a
To: /Users/zai28/Desktop/live-depth/checkpoints/depth_anything_v2_vits.pth
100%|██████████| 99.2M/99.2M [00:03<00:00, 32.5MB/s]
  model.load_state_dict(torch.load('depth_anything_v2_vits.pth', map_location=DEVICE))


VBox(children=(HTML(value='\n    <style>\n        .fa-video-camera {\n            font-size: 30px !important;\…