In [None]:
下面，我将为您提供一份融合了 双边滤波 和 卡尔曼滤波 的最终代码。这份代码将能有效解决您提出的两个问题，显著提升深度跟踪的平滑度和鲁棒性。引入filterpy库来实现卡尔曼滤波。您需要在您的环境中安装它：%pip install filterpy

在视频处理函数中，为每个track_id创建一个独立的卡尔曼滤波器实例。

在计算每个目标的深度后，使用卡尔曼滤波器进行预测和更新，得到平滑后的深度值。

保留了“百分位截断均值”作为对单帧观测值的预处理，以提供更稳定的输入给卡尔曼滤波器。

加入了对不再出现的目标的卡尔曼滤波器进行清理的逻辑，避免内存无限增长。

In [1]:
# ==============================================================================
# 0. 关键依赖库检查 (用于调试)
# ==============================================================================
print(">>> [DEBUG] 步骤 0: 检查关键库版本...")
try:
    import mmcv
    import timm
    # --- 新增：导入 filterpy ---
    from filterpy.kalman import KalmanFilter
    print(f">>> [INFO] mmcv version: {mmcv.__version__}")
    print(f">>> [INFO] timm version: {timm.__version__}")
    print(">>> [INFO] filterpy 库已成功导入。")
except ImportError as e:
    print(f"!!! [ERROR] 缺少核心库: {e}")
    print("!!! [HINT] 请确保已安装 filterpy (pip install filterpy)。")
    raise
print(">>> [DEBUG] 步骤 0: 检查完成。\n" + "="*60 + "\n")


# ==============================================================================
# 1. 导入必要的库
# ==============================================================================
print(">>> [DEBUG] 步骤 1: 开始导入核心库...")
try:
    import cv2
    import torch
    import numpy as np
    from ultralytics import YOLO
    import sys
    import os
    from tqdm import tqdm
    from mmcv import Config
    print(">>> [DEBUG] 核心库 cv2, torch, numpy, ultralytics, tqdm, mmcv.Config 导入成功。")
except ImportError as e:
    print(f"!!! [ERROR] 导入核心库失败: {e}")
    print("!!! [HINT] 请确保您已经按照教程正确安装了所有依赖。")
    raise

# --- 导入 Metric3D 相关的模块 ---
METRIC3D_PATH = '/root/autodl-tmp/Metric3D'
if METRIC3D_PATH not in sys.path:
    sys.path.insert(0, METRIC3D_PATH)
    print(f">>> [DEBUG] 已将 '{METRIC3D_PATH}' 添加到系统路径。")

try:
    from mono.model.monodepth_model import DepthModel as MonoDepthModel
    print(">>> [DEBUG] Metric3D 模块 'DepthModel' (作为 MonoDepthModel) 导入成功。")
except ImportError as e:
    print(f"!!! [ERROR] 从 Metric3D 导入模块失败: {e}")
    print(f"!!! [HINT] 请确认 Metric3D 的代码库是否存在于 '{METRIC3D_PATH}' 路径下。")
    raise

print(">>> [DEBUG] 步骤 1: 所有库导入完成。\n" + "="*60 + "\n")

# ==============================================================================
# 2. 配置区域与路径检查
# ==============================================================================
print(">>> [DEBUG] 步骤 2: 配置模型和文件路径...")

YOLO_MODEL_PATH = '/root/autodl-tmp/epoch30.pt'
METRIC3D_MODEL_PATH = '/root/autodl-tmp/weights/metric_depth_vit_large_800k.pth'
METRIC3D_CONFIG_PATH = '/root/autodl-tmp/Metric3D/mono/configs/HourglassDecoder/vit.raft5.large.py'
INPUT_VIDEO_PATH = '/root/autodl-tmp/0000.mp4'
OUTPUT_VIDEO_PATH = '/root/autodl-tmp/output_video_kalman_filtered.mp4' # <-- 修改输出文件名
TRACKER_CONFIG_PATH = '/root/autodl-tmp/bytetrack.yaml'


paths_to_check = {
    "YOLOv8 权重": YOLO_MODEL_PATH,
    "Metric3D 权重": METRIC3D_MODEL_PATH,
    "Metric3D 配置": METRIC3D_CONFIG_PATH,
    "输入视频": INPUT_VIDEO_PATH,
    "跟踪器配置": TRACKER_CONFIG_PATH,
}
all_paths_ok = True
for name, path in paths_to_check.items():
    if not os.path.exists(path):
        print(f"!!! [ERROR] 路径检查失败: {name} 文件未找到于 '{path}'")
        all_paths_ok = False
if not all_paths_ok:
    raise FileNotFoundError("一个或多个关键文件路径无效。请确保已创建 bytetrack.yaml 文件。")
else:
    print(">>> [DEBUG] 所有文件路径检查通过。")

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f">>> [DEBUG] 将要使用的设备: {DEVICE}")
if DEVICE.type == 'cpu':
    print("!!! [WARNING] 未检测到 CUDA 设备，将使用 CPU 运行。速度会很慢！")

print(">>> [DEBUG] 步骤 2: 配置完成。\n" + "="*60 + "\n")

# ==============================================================================
# 3. 模型加载
# ==============================================================================
print(">>> [DEBUG] 步骤 3: 开始加载深度学习模型...")
# --- 加载 YOLOv8 & ByteTrack 模型 ---
try:
    print(">>> [DEBUG] 正在加载 YOLOv8 模型...")
    yolo_model = YOLO(YOLO_MODEL_PATH)
    print(">>> [DEBUG] YOLOv8 模型加载成功！")

    TARGET_CLASS_NAME = 'Car'
    TARGET_CLASS_ID = [k for k, v in yolo_model.names.items() if v == TARGET_CLASS_NAME][0]
    print(f">>> [INFO] 目标类别 '{TARGET_CLASS_NAME}' 已找到, ID为: {TARGET_CLASS_ID}")

except Exception as e:
    print(f"!!! [ERROR] 加载 YOLOv8 模型或查找类别ID时失败: {e}")
    raise

# --- 加载 Metric3Dv2 模型 ---
try:
    print(">>> [DEBUG] 正在加载 Metric3Dv2 模型...")
    
    cfg = Config.fromfile(METRIC3D_CONFIG_PATH)
    cfg.model.backbone.use_mask_token = False
    metric3d_model = MonoDepthModel(cfg).to(DEVICE)
    
    checkpoint = torch.load(METRIC3D_MODEL_PATH, map_location=DEVICE)
    state_dict = checkpoint.get('model_state_dict', checkpoint.get('model', checkpoint))
    metric3d_model.load_state_dict(state_dict, strict=False)
    
    metric3d_model.eval()
    print(">>> [SUCCESS] Metric3Dv2 模型加载并移动到 GPU 成功！")
except Exception as e:
    print(f"!!! [FATAL ERROR] 加载 Metric3Dv2 模型时出错: {e}")
    import traceback
    traceback.print_exc()
    raise

print(">>> [DEBUG] 步骤 3: 所有模型加载完成。\n" + "="*60 + "\n")

# ==============================================================================
# 4. 视频处理主函数
# ==============================================================================
print(">>> [DEBUG] 步骤 4: 定义视频处理函数...")
def process_video_debug(input_path, output_path):
    print("\n--- 开始视频处理 ---")
    cap = cv2.VideoCapture(input_path)
    if not cap.isOpened():
        print(f"!!! [ERROR] 无法打开视频文件: {input_path}")
        return

    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    metric3d_input_size = (cfg.data_basic['vit_size'][1], cfg.data_basic['vit_size'][0])
    
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    
    print(f">>> [INFO] 输入视频信息: {width}x{height} @ {fps:.2f} FPS, 共 {total_frames} 帧。")
    print(f">>> [INFO] 处理后的视频将保存至: {output_path}")

    # ============================================================
    # === 新增: 初始化卡尔曼滤波器字典 ===
    # ============================================================
    kalman_filters = {}
    # ============================================================

    with tqdm(total=total_frames, desc="视频处理进度") as pbar:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            track_results = yolo_model.track(
                frame, 
                persist=True, 
                verbose=False, 
                tracker=TRACKER_CONFIG_PATH,
                classes=[TARGET_CLASS_ID] 
            )
            
            annotated_frame = frame.copy()

            with torch.no_grad():
                rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                rgb_frame_resized = cv2.resize(rgb_frame, metric3d_input_size)
                rgb_torch = torch.from_numpy(rgb_frame_resized).permute(2, 0, 1).unsqueeze(0).float().to(DEVICE) / 255.0
                pred_output = metric3d_model(data={'input': rgb_torch})
                pred_depth_np = pred_output[0].squeeze().cpu().numpy()
                pred_depth_resized = cv2.resize(pred_depth_np, (width, height)).astype(np.float32)

                pred_depth_filtered = cv2.bilateralFilter(pred_depth_resized, d=5, sigmaColor=0.2, sigmaSpace=15)

            boxes = track_results[0].boxes.xyxy.cpu().numpy()
            track_ids = track_results[0].boxes.id.int().cpu().tolist() if track_results[0].boxes.id is not None else []
            
            # --- 用于记录本帧仍然活跃的track_id ---
            active_track_ids = set()

            if len(track_ids) > 0:
                for box, track_id in zip(boxes, track_ids):
                    active_track_ids.add(track_id)
                    x1, y1, x2, y2 = map(int, box)
                    
                    cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                    
                    box_w, box_h = x2 - x1, y2 - y1
                    roi_w, roi_h = int(box_w * 0.5), int(box_h * 0.5)
                    roi_x1 = max(x1 + (box_w - roi_w) // 2, 0)
                    roi_y1 = max(y1 + (box_h - roi_h) // 2, 0)
                    roi_x2 = min(roi_x1 + roi_w, width)
                    roi_y2 = min(roi_y1 + roi_h, height)

                    depth_roi = pred_depth_filtered[roi_y1:roi_y2, roi_x1:roi_x2]
                    
                    if depth_roi.size > 0:
                        # 步骤1: 对当前帧的观测值进行预处理，去除极端值
                        sorted_depths = np.sort(depth_roi.flatten())
                        cut_off = int(len(sorted_depths) * 0.10) # 稍微增加截断比例
                        
                        if len(sorted_depths) > 2 * cut_off:
                            filtered_depths = sorted_depths[cut_off:-cut_off]
                            observed_depth = np.mean(filtered_depths) if filtered_depths.size > 0 else 0
                        else:
                            observed_depth = np.mean(sorted_depths) if sorted_depths.size > 0 else 0
                        
                        # 如果观测到的深度异常（例如为0），则跳过卡尔曼滤波的更新步骤
                        if observed_depth <= 0:
                            continue

                        # ============================================================
                        # === 新增: 应用卡尔曼滤波进行时间一致性平滑 ===
                        # ============================================================
                        if track_id not in kalman_filters:
                            # 1. 为新目标初始化卡尔曼滤波器
                            kf = KalmanFilter(dim_x=2, dim_z=1) # 状态量x=[深度, 深度变化速度], 观测z=[深度]
                            kf.x = np.array([observed_depth, 0.])   # 初始状态 [depth, velocity]
                            kf.F = np.array([[1., 1.], [0., 1.]])    # 状态转移矩阵 (匀速模型)
                            kf.H = np.array([[1., 0.]])             # 观测矩阵
                            kf.P *= 100.                            # 初始状态协方差
                            kf.R = 10                               # 测量噪声协方差 (关键可调参数)
                            kf.Q = 0.1                              # 过程噪声协方差 (关键可调参数)
                            kalman_filters[track_id] = kf
                        else:
                            kf = kalman_filters[track_id]

                        # 2. 预测与更新
                        kf.predict()
                        kf.update(observed_depth)
                        
                        # 3. 获取平滑后的深度值
                        smoothed_depth = kf.x[0]
                        # ============================================================
                        
                        depth_text = f"ID:{track_id} D:{smoothed_depth:.2f}m"
                        (text_w, text_h), _ = cv2.getTextSize(depth_text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
                        cv2.rectangle(annotated_frame, (x1, y1 - 25), (x1 + text_w + 5, y1 - 5), (0, 100, 0), -1)
                        cv2.putText(annotated_frame, depth_text, (x1 + 2, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
            
            # --- 清理不再活跃的目标的滤波器，防止内存泄漏 ---
            inactive_ids = set(kalman_filters.keys()) - active_track_ids
            for inactive_id in inactive_ids:
                del kalman_filters[inactive_id]

            out.write(annotated_frame)
            pbar.update(1)

    cap.release()
    out.release()
    cv2.destroyAllWindows()
    print(f"\n--- 视频处理完成！ ---")
    print(f">>> [SUCCESS] 输出视频已成功保存到: {output_path}")

print(">>> [DEBUG] 步骤 4: 视频处理函数定义完成。\n" + "="*60 + "\n")

# ==============================================================================
# 5. 运行主程序
# ==============================================================================
print(">>> [DEBUG] 步骤 5: 开始执行主程序...")
try:
    process_video_debug(INPUT_VIDEO_PATH, OUTPUT_VIDEO_PATH)
except Exception as e:
    print(f"!!! [FATAL ERROR] 在视频处理过程中发生严重错误: {e}")
    import traceback
    traceback.print_exc()
    print("!!! [HINT] 请检查上面的错误信息。可能的原因包括：CUDA内存不足、模型与输入数据维度不匹配等。")

print(">>> [DEBUG] 步骤 5: 主程序执行完毕。\n" + "="*60)

>>> [DEBUG] 步骤 0: 检查关键库版本...


  from pkg_resources import packaging  # type: ignore[attr-defined]
  from .autonotebook import tqdm as notebook_tqdm


>>> [INFO] mmcv version: 1.7.2
>>> [INFO] timm version: 0.6.12
>>> [INFO] filterpy 库已成功导入。
>>> [DEBUG] 步骤 0: 检查完成。

>>> [DEBUG] 步骤 1: 开始导入核心库...
>>> [DEBUG] 核心库 cv2, torch, numpy, ultralytics, tqdm, mmcv.Config 导入成功。
>>> [DEBUG] 已将 '/root/autodl-tmp/Metric3D' 添加到系统路径。
>>> [DEBUG] Metric3D 模块 'DepthModel' (作为 MonoDepthModel) 导入成功。
>>> [DEBUG] 步骤 1: 所有库导入完成。

>>> [DEBUG] 步骤 2: 配置模型和文件路径...
>>> [DEBUG] 所有文件路径检查通过。
>>> [DEBUG] 将要使用的设备: cuda
>>> [DEBUG] 步骤 2: 配置完成。

>>> [DEBUG] 步骤 3: 开始加载深度学习模型...
>>> [DEBUG] 正在加载 YOLOv8 模型...
>>> [DEBUG] YOLOv8 模型加载成功！
>>> [INFO] 目标类别 'Car' 已找到, ID为: 0
>>> [DEBUG] 正在加载 Metric3Dv2 模型...
>>> [SUCCESS] Metric3Dv2 模型加载并移动到 GPU 成功！
>>> [DEBUG] 步骤 3: 所有模型加载完成。

>>> [DEBUG] 步骤 4: 定义视频处理函数...
>>> [DEBUG] 步骤 4: 视频处理函数定义完成。

>>> [DEBUG] 步骤 5: 开始执行主程序...

--- 开始视频处理 ---
>>> [INFO] 输入视频信息: 1242x374 @ 1.00 FPS, 共 154 帧。
>>> [INFO] 处理后的视频将保存至: /root/autodl-tmp/output_video_kalman_filtered.mp4


视频处理进度: 100%|██████████| 154/154 [01:07<00:00,  2.29it/s]


--- 视频处理完成！ ---
>>> [SUCCESS] 输出视频已成功保存到: /root/autodl-tmp/output_video_kalman_filtered.mp4
>>> [DEBUG] 步骤 5: 主程序执行完毕。





In [None]:
结合K-Means聚类的完整代码
下面的代码在上一版（双边滤波+卡尔曼滤波）的基础上，用K-Means聚类替换了原来的“百分位截断均值”方法。

关键改动:

引入sklearn.cluster.KMeans。您需要在环境中安装scikit-learn：%pip install scikit-learn。

在深度计算部分，我们使用K-Means将ROI中的深度值聚为两类。

我们选取两个聚类中心中深度值较小（即较近）的那个作为当前帧的“观测深度”。

为了代码的健壮性，如果ROI中的像素点太少（少于k个），K-Means会失败，此时我们回退到使用均值的方法。

In [1]:
# ==============================================================================
# 0. 关键依赖库检查 (用于调试)
# ==============================================================================
print(">>> [DEBUG] 步骤 0: 检查关键库版本...")
try:
    import mmcv
    import timm
    from filterpy.kalman import KalmanFilter
    # --- 新增：导入 KMeans ---
    from sklearn.cluster import KMeans
    print(f">>> [INFO] mmcv version: {mmcv.__version__}")
    print(f">>> [INFO] timm version: {timm.__version__}")
    print(">>> [INFO] filterpy 和 scikit-learn 库已成功导入。")
except ImportError as e:
    print(f"!!! [ERROR] 缺少核心库: {e}")
    print("!!! [HINT] 请确保已安装 filterpy (pip install filterpy) 和 scikit-learn (pip install scikit-learn)。")
    raise
print(">>> [DEBUG] 步骤 0: 检查完成。\n" + "="*60 + "\n")


# ==============================================================================
# 1. 导入必要的库
# ==============================================================================
print(">>> [DEBUG] 步骤 1: 开始导入核心库...")
try:
    import cv2
    import torch
    import numpy as np
    from ultralytics import YOLO
    import sys
    import os
    from tqdm import tqdm
    from mmcv import Config
    print(">>> [DEBUG] 核心库 cv2, torch, numpy, ultralytics, tqdm, mmcv.Config 导入成功。")
except ImportError as e:
    print(f"!!! [ERROR] 导入核心库失败: {e}")
    print("!!! [HINT] 请确保您已经按照教程正确安装了所有依赖。")
    raise

# --- 导入 Metric3D 相关的模块 ---
METRIC3D_PATH = '/root/autodl-tmp/Metric3D'
if METRIC3D_PATH not in sys.path:
    sys.path.insert(0, METRIC3D_PATH)
    print(f">>> [DEBUG] 已将 '{METRIC3D_PATH}' 添加到系统路径。")

try:
    from mono.model.monodepth_model import DepthModel as MonoDepthModel
    print(">>> [DEBUG] Metric3D 模块 'DepthModel' (作为 MonoDepthModel) 导入成功。")
except ImportError as e:
    print(f"!!! [ERROR] 从 Metric3D 导入模块失败: {e}")
    print(f"!!! [HINT] 请确认 Metric3D 的代码库是否存在于 '{METRIC3D_PATH}' 路径下。")
    raise

print(">>> [DEBUG] 步骤 1: 所有库导入完成。\n" + "="*60 + "\n")

# ==============================================================================
# 2. 配置区域与路径检查
# ==============================================================================
print(">>> [DEBUG] 步骤 2: 配置模型和文件路径...")

YOLO_MODEL_PATH = '/root/autodl-tmp/epoch30.pt'
METRIC3D_MODEL_PATH = '/root/autodl-tmp/weights/metric_depth_vit_large_800k.pth'
METRIC3D_CONFIG_PATH = '/root/autodl-tmp/Metric3D/mono/configs/HourglassDecoder/vit.raft5.large.py'
INPUT_VIDEO_PATH = '/root/autodl-tmp/0000.mp4'
OUTPUT_VIDEO_PATH = '/root/autodl-tmp/output_video_kmeans_filtered.mp4' # <-- 修改输出文件名
TRACKER_CONFIG_PATH = '/root/autodl-tmp/bytetrack.yaml'


paths_to_check = {
    "YOLOv8 权重": YOLO_MODEL_PATH,
    "Metric3D 权重": METRIC3D_MODEL_PATH,
    "Metric3D 配置": METRIC3D_CONFIG_PATH,
    "输入视频": INPUT_VIDEO_PATH,
    "跟踪器配置": TRACKER_CONFIG_PATH,
}
all_paths_ok = True
for name, path in paths_to_check.items():
    if not os.path.exists(path):
        print(f"!!! [ERROR] 路径检查失败: {name} 文件未找到于 '{path}'")
        all_paths_ok = False
if not all_paths_ok:
    raise FileNotFoundError("一个或多个关键文件路径无效。请确保已创建 bytetrack.yaml 文件。")
else:
    print(">>> [DEBUG] 所有文件路径检查通过。")

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f">>> [DEBUG] 将要使用的设备: {DEVICE}")
if DEVICE.type == 'cpu':
    print("!!! [WARNING] 未检测到 CUDA 设备，将使用 CPU 运行。速度会很慢！")

print(">>> [DEBUG] 步骤 2: 配置完成。\n" + "="*60 + "\n")

# ==============================================================================
# 3. 模型加载
# ==============================================================================
print(">>> [DEBUG] 步骤 3: 开始加载深度学习模型...")
try:
    print(">>> [DEBUG] 正在加载 YOLOv8 模型...")
    yolo_model = YOLO(YOLO_MODEL_PATH)

    TARGET_CLASS_NAME = 'Car'
    TARGET_CLASS_ID = [k for k, v in yolo_model.names.items() if v == TARGET_CLASS_NAME][0]
    print(f">>> [INFO] 目标类别 '{TARGET_CLASS_NAME}' 已找到, ID为: {TARGET_CLASS_ID}")

except Exception as e:
    print(f"!!! [ERROR] 加载 YOLOv8 模型或查找类别ID时失败: {e}")
    raise

try:
    print(">>> [DEBUG] 正在加载 Metric3Dv2 模型...")
    
    cfg = Config.fromfile(METRIC3D_CONFIG_PATH)
    cfg.model.backbone.use_mask_token = False
    metric3d_model = MonoDepthModel(cfg).to(DEVICE)
    
    checkpoint = torch.load(METRIC3D_MODEL_PATH, map_location=DEVICE)
    state_dict = checkpoint.get('model_state_dict', checkpoint.get('model', checkpoint))
    metric3d_model.load_state_dict(state_dict, strict=False)
    
    metric3d_model.eval()
    print(">>> [SUCCESS] Metric3Dv2 模型加载并移动到 GPU 成功！")
except Exception as e:
    print(f"!!! [FATAL ERROR] 加载 Metric3Dv2 模型时出错: {e}")
    import traceback
    traceback.print_exc()
    raise

print(">>> [DEBUG] 步骤 3: 所有模型加载完成。\n" + "="*60 + "\n")

# ==============================================================================
# 4. 视频处理主函数
# ==============================================================================
print(">>> [DEBUG] 步骤 4: 定义视频处理函数...")
def process_video_debug(input_path, output_path):
    print("\n--- 开始视频处理 ---")
    cap = cv2.VideoCapture(input_path)
    if not cap.isOpened():
        print(f"!!! [ERROR] 无法打开视频文件: {input_path}")
        return

    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    metric3d_input_size = (cfg.data_basic['vit_size'][1], cfg.data_basic['vit_size'][0])
    
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    
    print(f">>> [INFO] 输入视频信息: {width}x{height} @ {fps:.2f} FPS, 共 {total_frames} 帧。")
    print(f">>> [INFO] 处理后的视频将保存至: {output_path}")

    kalman_filters = {}

    with tqdm(total=total_frames, desc="视频处理进度") as pbar:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            track_results = yolo_model.track(
                frame, 
                persist=True, 
                verbose=False, 
                tracker=TRACKER_CONFIG_PATH,
                classes=[TARGET_CLASS_ID] 
            )
            
            annotated_frame = frame.copy()

            with torch.no_grad():
                rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                rgb_frame_resized = cv2.resize(rgb_frame, metric3d_input_size)
                rgb_torch = torch.from_numpy(rgb_frame_resized).permute(2, 0, 1).unsqueeze(0).float().to(DEVICE) / 255.0
                pred_output = metric3d_model(data={'input': rgb_torch})
                pred_depth_np = pred_output[0].squeeze().cpu().numpy()
                pred_depth_resized = cv2.resize(pred_depth_np, (width, height)).astype(np.float32)
                pred_depth_filtered = cv2.bilateralFilter(pred_depth_resized, d=5, sigmaColor=0.2, sigmaSpace=15)

            boxes = track_results[0].boxes.xyxy.cpu().numpy()
            track_ids = track_results[0].boxes.id.int().cpu().tolist() if track_results[0].boxes.id is not None else []
            
            active_track_ids = set()

            if len(track_ids) > 0:
                for box, track_id in zip(boxes, track_ids):
                    active_track_ids.add(track_id)
                    x1, y1, x2, y2 = map(int, box)
                    
                    cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                    
                    box_w, box_h = x2 - x1, y2 - y1
                    roi_w, roi_h = int(box_w * 0.5), int(box_h * 0.5)
                    roi_x1 = max(x1 + (box_w - roi_w) // 2, 0)
                    roi_y1 = max(y1 + (box_h - roi_h) // 2, 0)
                    roi_x2 = min(roi_x1 + roi_w, width)
                    roi_y2 = min(roi_y1 + roi_h, height)

                    depth_roi = pred_depth_filtered[roi_y1:roi_y2, roi_x1:roi_x2]
                    
                    if depth_roi.size > 2: # K-Means至少需要K个点
                        # ============================================================
                        # === 新增: 使用 K-Means 聚类来确定前景深度 ===
                        # ============================================================
                        try:
                            # 1. 准备数据: 将ROI内的深度值展平为一维数组
                            pixels = depth_roi.flatten().reshape(-1, 1)

                            # 2. 执行K-Means聚类, k=2 表示期望分为前景和背景两个簇
                            # n_init='auto' 会自动选择最佳的初始化次数
                            kmeans = KMeans(n_clusters=2, n_init='auto', random_state=0).fit(pixels)
                            
                            # 3. 找到前景簇: 前景通常是距离更近的, 所以其簇中心的深度值更小
                            foreground_cluster_center = min(kmeans.cluster_centers_.flatten())
                            
                            observed_depth = foreground_cluster_center

                        except Exception as e:
                            # 如果K-Means失败(例如ROI内所有像素值都一样), 则回退到使用均值
                            observed_depth = np.mean(depth_roi)
                        # ============================================================
                    elif depth_roi.size > 0:
                        # 如果点太少, 直接用均值
                        observed_depth = np.mean(depth_roi)
                    else:
                        continue
                        
                    if observed_depth <= 0:
                        continue

                    if track_id not in kalman_filters:
                        kf = KalmanFilter(dim_x=2, dim_z=1)
                        kf.x = np.array([observed_depth, 0.])
                        kf.F = np.array([[1., 1.], [0., 1.]])
                        kf.H = np.array([[1., 0.]])
                        kf.P *= 100.
                        kf.R = 10
                        kf.Q = 0.1
                        kalman_filters[track_id] = kf
                    else:
                        kf = kalman_filters[track_id]

                    kf.predict()
                    kf.update(observed_depth)
                    smoothed_depth = kf.x[0]
                    
                    depth_text = f"ID:{track_id} D:{smoothed_depth:.2f}m"
                    (text_w, text_h), _ = cv2.getTextSize(depth_text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
                    cv2.rectangle(annotated_frame, (x1, y1 - 25), (x1 + text_w + 5, y1 - 5), (0, 100, 0), -1)
                    cv2.putText(annotated_frame, depth_text, (x1 + 2, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
            
            inactive_ids = set(kalman_filters.keys()) - active_track_ids
            for inactive_id in inactive_ids:
                del kalman_filters[inactive_id]

            out.write(annotated_frame)
            pbar.update(1)

    cap.release()
    out.release()
    cv2.destroyAllWindows()
    print(f"\n--- 视频处理完成！ ---")
    print(f">>> [SUCCESS] 输出视频已成功保存到: {output_path}")

print(">>> [DEBUG] 步骤 4: 视频处理函数定义完成。\n" + "="*60 + "\n")

# ==============================================================================
# 5. 运行主程序
# ==============================================================================
print(">>> [DEBUG] 步骤 5: 开始执行主程序...")
try:
    process_video_debug(INPUT_VIDEO_PATH, OUTPUT_VIDEO_PATH)
except Exception as e:
    print(f"!!! [FATAL ERROR] 在视频处理过程中发生严重错误: {e}")
    import traceback
    traceback.print_exc()
    print("!!! [HINT] 请检查上面的错误信息。可能的原因包括：CUDA内存不足、模型与输入数据维度不匹配等。")

print(">>> [DEBUG] 步骤 5: 主程序执行完毕。\n" + "="*60)

>>> [DEBUG] 步骤 0: 检查关键库版本...


  from pkg_resources import packaging  # type: ignore[attr-defined]
  from .autonotebook import tqdm as notebook_tqdm


>>> [INFO] mmcv version: 1.7.2
>>> [INFO] timm version: 0.6.12
>>> [INFO] filterpy 和 scikit-learn 库已成功导入。
>>> [DEBUG] 步骤 0: 检查完成。

>>> [DEBUG] 步骤 1: 开始导入核心库...
>>> [DEBUG] 核心库 cv2, torch, numpy, ultralytics, tqdm, mmcv.Config 导入成功。
>>> [DEBUG] 已将 '/root/autodl-tmp/Metric3D' 添加到系统路径。
>>> [DEBUG] Metric3D 模块 'DepthModel' (作为 MonoDepthModel) 导入成功。
>>> [DEBUG] 步骤 1: 所有库导入完成。

>>> [DEBUG] 步骤 2: 配置模型和文件路径...
>>> [DEBUG] 所有文件路径检查通过。
>>> [DEBUG] 将要使用的设备: cuda
>>> [DEBUG] 步骤 2: 配置完成。

>>> [DEBUG] 步骤 3: 开始加载深度学习模型...
>>> [DEBUG] 正在加载 YOLOv8 模型...
>>> [INFO] 目标类别 'Car' 已找到, ID为: 0
>>> [DEBUG] 正在加载 Metric3Dv2 模型...
>>> [SUCCESS] Metric3Dv2 模型加载并移动到 GPU 成功！
>>> [DEBUG] 步骤 3: 所有模型加载完成。

>>> [DEBUG] 步骤 4: 定义视频处理函数...
>>> [DEBUG] 步骤 4: 视频处理函数定义完成。

>>> [DEBUG] 步骤 5: 开始执行主程序...

--- 开始视频处理 ---
>>> [INFO] 输入视频信息: 1242x374 @ 1.00 FPS, 共 154 帧。
>>> [INFO] 处理后的视频将保存至: /root/autodl-tmp/output_video_kmeans_filtered.mp4


视频处理进度: 100%|██████████| 154/154 [01:09<00:00,  2.21it/s]


--- 视频处理完成！ ---
>>> [SUCCESS] 输出视频已成功保存到: /root/autodl-tmp/output_video_kmeans_filtered.mp4
>>> [DEBUG] 步骤 5: 主程序执行完毕。





In [4]:
# ==============================================================================
# 0. 关键依赖库检查 (用于调试)
# ==============================================================================
print(">>> [DEBUG] 步骤 0: 检查关键库版本...")
try:
    import mmcv
    import timm
    from filterpy.kalman import KalmanFilter
    # --- 导入 GMM ---
    from sklearn.mixture import GaussianMixture
    print(f">>> [INFO] mmcv version: {mmcv.__version__}")
    print(f">>> [INFO] timm version: {timm.__version__}")
    print(">>> [INFO] filterpy 和 scikit-learn (GMM) 库已成功导入。")
except ImportError as e:
    print(f"!!! [ERROR] 缺少核心库: {e}")
    print("!!! [HINT] 请确保已安装 filterpy (pip install filterpy) 和 scikit-learn (pip install scikit-learn)。")
    raise
print(">>> [DEBUG] 步骤 0: 检查完成。\n" + "="*60 + "\n")


# ==============================================================================
# 1. 导入必要的库
# ==============================================================================
print(">>> [DEBUG] 步骤 1: 开始导入核心库...")
try:
    import cv2
    import torch
    import numpy as np
    from ultralytics import YOLO
    import sys
    import os
    from tqdm import tqdm
    from mmcv import Config
    print(">>> [DEBUG] 核心库 cv2, torch, numpy, ultralytics, tqdm, mmcv.Config 导入成功。")
except ImportError as e:
    print(f"!!! [ERROR] 导入核心库失败: {e}")
    raise

# --- 导入 Metric3D 相关的模块 ---
METRIC3D_PATH = '/root/autodl-tmp/Metric3D'
if METRIC3D_PATH not in sys.path:
    sys.path.insert(0, METRIC3D_PATH)

try:
    from mono.model.monodepth_model import DepthModel as MonoDepthModel
    print(">>> [DEBUG] Metric3D 模块 'DepthModel' (作为 MonoDepthModel) 导入成功。")
except ImportError as e:
    print(f"!!! [ERROR] 从 Metric3D 导入模块失败: {e}")
    raise

print(">>> [DEBUG] 步骤 1: 所有库导入完成。\n" + "="*60 + "\n")

# ==============================================================================
# 2. 配置区域与路径检查
# ==============================================================================
print(">>> [DEBUG] 步骤 2: 配置模型和文件路径...")

YOLO_MODEL_PATH = '/root/autodl-tmp/epoch30.pt'
METRIC3D_MODEL_PATH = '/root/autodl-tmp/weights/metric_depth_vit_large_800k.pth'
METRIC3D_CONFIG_PATH = '/root/autodl-tmp/Metric3D/mono/configs/HourglassDecoder/vit.raft5.large.py'
INPUT_VIDEO_PATH = '/root/autodl-tmp/0000.mp4'
OUTPUT_VIDEO_PATH = '/root/autodl-tmp/output_video_gmm_filtered.mp4' # <-- 修改输出文件名
TRACKER_CONFIG_PATH = '/root/autodl-tmp/bytetrack.yaml'


paths_to_check = {
    "YOLOv8 权重": YOLO_MODEL_PATH,
    "Metric3D 权重": METRIC3D_MODEL_PATH,
    "Metric3D 配置": METRIC3D_CONFIG_PATH,
    "输入视频": INPUT_VIDEO_PATH,
    "跟踪器配置": TRACKER_CONFIG_PATH,
}
all_paths_ok = True
for name, path in paths_to_check.items():
    if not os.path.exists(path):
        print(f"!!! [ERROR] 路径检查失败: {name} 文件未找到于 '{path}'")
        all_paths_ok = False
if not all_paths_ok:
    raise FileNotFoundError("一个或多个关键文件路径无效。")
else:
    print(">>> [DEBUG] 所有文件路径检查通过。")

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f">>> [DEBUG] 将要使用的设备: {DEVICE}")

print(">>> [DEBUG] 步骤 2: 配置完成。\n" + "="*60 + "\n")

# ==============================================================================
# 3. 模型加载
# ==============================================================================
print(">>> [DEBUG] 步骤 3: 开始加载深度学习模型...")
try:
    yolo_model = YOLO(YOLO_MODEL_PATH)
    TARGET_CLASS_NAME = 'Car'
    TARGET_CLASS_ID = [k for k, v in yolo_model.names.items() if v == TARGET_CLASS_NAME][0]
    print(f">>> [INFO] 目标类别 '{TARGET_CLASS_NAME}' 已找到, ID为: {TARGET_CLASS_ID}")
except Exception as e:
    print(f"!!! [ERROR] 加载 YOLOv8 模型或查找类别ID时失败: {e}")
    raise

try:
    cfg = Config.fromfile(METRIC3D_CONFIG_PATH)
    cfg.model.backbone.use_mask_token = False
    metric3d_model = MonoDepthModel(cfg).to(DEVICE)
    checkpoint = torch.load(METRIC3D_MODEL_PATH, map_location=DEVICE)
    state_dict = checkpoint.get('model_state_dict', checkpoint.get('model', checkpoint))
    metric3d_model.load_state_dict(state_dict, strict=False)
    metric3d_model.eval()
    print(">>> [SUCCESS] Metric3Dv2 模型加载并移动到 GPU 成功！")
except Exception as e:
    print(f"!!! [FATAL ERROR] 加载 Metric3Dv2 模型时出错: {e}")
    raise

print(">>> [DEBUG] 步骤 3: 所有模型加载完成。\n" + "="*60 + "\n")

# ==============================================================================
# 4. 视频处理主函数
# ==============================================================================
print(">>> [DEBUG] 步骤 4: 定义视频处理函数...")
def process_video_debug(input_path, output_path):
    print("\n--- 开始视频处理 ---")
    cap = cv2.VideoCapture(input_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    metric3d_input_size = (cfg.data_basic['vit_size'][1], cfg.data_basic['vit_size'][0])
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    
    print(f">>> [INFO] 输入视频信息: {width}x{height} @ {fps:.2f} FPS, 共 {total_frames} 帧。")

    kalman_filters = {}

    with tqdm(total=total_frames, desc="视频处理进度") as pbar:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            track_results = yolo_model.track(frame, persist=True, verbose=False, tracker=TRACKER_CONFIG_PATH, classes=[TARGET_CLASS_ID])
            annotated_frame = frame.copy()

            with torch.no_grad():
                rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                rgb_frame_resized = cv2.resize(rgb_frame, metric3d_input_size)
                rgb_torch = torch.from_numpy(rgb_frame_resized).permute(2, 0, 1).unsqueeze(0).float().to(DEVICE) / 255.0
                pred_output = metric3d_model(data={'input': rgb_torch})
                pred_depth_np = pred_output[0].squeeze().cpu().numpy()
                pred_depth_resized = cv2.resize(pred_depth_np, (width, height)).astype(np.float32)
                pred_depth_filtered = cv2.bilateralFilter(pred_depth_resized, d=5, sigmaColor=0.2, sigmaSpace=15)

            boxes = track_results[0].boxes.xyxy.cpu().numpy()
            track_ids = track_results[0].boxes.id.int().cpu().tolist() if track_results[0].boxes.id is not None else []
            active_track_ids = set()

            if len(track_ids) > 0:
                for box, track_id in zip(boxes, track_ids):
                    active_track_ids.add(track_id)
                    x1, y1, x2, y2 = map(int, box)
                    
                    cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                    
                    # --- 修复语法错误：恢复ROI计算逻辑 ---
                    box_w, box_h = x2 - x1, y2 - y1
                    roi_w, roi_h = int(box_w * 0.5), int(box_h * 0.5)
                    roi_x1 = max(x1 + (box_w - roi_w) // 2, 0)
                    roi_y1 = max(y1 + (box_h - roi_h) // 2, 0)
                    roi_x2 = min(roi_x1 + roi_w, width)
                    roi_y2 = min(roi_y1 + roi_h, height)
                    # --- 修复结束 ---

                    depth_roi = pred_depth_filtered[roi_y1:roi_y2, roi_x1:roi_x2]
                    
                    observed_depth = 0.0
                    if depth_roi.size > 10: # GMM需要一定数量的点才能可靠工作
                        # ============================================================
                        # === 核心改进: 使用 GMM + BIC 动态寻找最佳深度 ===
                        # ============================================================
                        try:
                            pixels = depth_roi.flatten().reshape(-1, 1)
                            
                            # 1. 自动寻找最佳聚类数量 (1, 2, or 3)
                            n_components_range = range(1, 4)
                            lowest_bic = np.infty
                            best_gmm = None
                            for n_components in n_components_range:
                                gmm = GaussianMixture(n_components=n_components, random_state=0)
                                gmm.fit(pixels)
                                bic_score = gmm.bic(pixels)
                                if bic_score < lowest_bic:
                                    lowest_bic = bic_score
                                    best_gmm = gmm
                            
                            cluster_means = best_gmm.means_.flatten()
                            
                            # --- 修正与卡尔曼滤波的交互逻辑 ---
                            if track_id in kalman_filters:
                                kf = kalman_filters[track_id]
                                # 步骤A: 首先, 基于上一状态进行预测
                                kf.predict()
                                predicted_depth = kf.x[0] # 预测出的当前深度
                                
                                # 步骤B: 然后, 在GMM找到的多个聚类中心里, 找到与预测值最接近的一个
                                observed_depth = min(cluster_means, key=lambda x: abs(x - predicted_depth))
                            else:
                                # 如果是新目标, 没有历史信息, 只能假设最近的物体是目标
                                observed_depth = min(cluster_means)

                        except Exception:
                            # 如果GMM失败, 回退到均值
                            observed_depth = np.mean(depth_roi)
                        # ============================================================
                    elif depth_roi.size > 0:
                        observed_depth = np.mean(depth_roi)
                    else:
                        continue
                        
                    if observed_depth <= 0:
                        continue

                    if track_id not in kalman_filters:
                        # 初始化新的卡尔曼滤波器
                        kf = KalmanFilter(dim_x=2, dim_z=1)
                        kf.x = np.array([observed_depth, 0.])
                        kf.F = np.array([[1., 1.], [0., 1.]])
                        kf.H = np.array([[1., 0.]])
                        kf.P *= 100.
                        kf.R = 5 # GMM的结果更可信, 可以适当调低测量噪声
                        kf.Q = 0.1
                        kalman_filters[track_id] = kf
                    else:
                        # 对于已有目标, predict已在上面完成, 这里直接update
                        kf = kalman_filters[track_id]
                        kf.update(observed_depth)

                    smoothed_depth = kf.x[0]
                    
                    depth_text = f"ID:{track_id} D:{smoothed_depth:.2f}m"
                    (text_w, text_h), _ = cv2.getTextSize(depth_text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
                    cv2.rectangle(annotated_frame, (x1, y1 - 25), (x1 + text_w + 5, y1 - 5), (0, 100, 0), -1)
                    cv2.putText(annotated_frame, depth_text, (x1 + 2, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
            
            inactive_ids = set(kalman_filters.keys()) - active_track_ids
            for inactive_id in inactive_ids:
                del kalman_filters[inactive_id]

            out.write(annotated_frame)
            pbar.update(1)

    cap.release()
    out.release()
    cv2.destroyAllWindows()
    print(f"\n--- 视频处理完成！ ---")
    print(f">>> [SUCCESS] 输出视频已成功保存到: {output_path}")

# ==============================================================================
# 5. 运行主程序
# ==============================================================================
print(">>> [DEBUG] 步骤 5: 开始执行主程序...")
try:
    process_video_debug(INPUT_VIDEO_PATH, OUTPUT_VIDEO_PATH)
except Exception as e:
    print(f"!!! [FATAL ERROR] 在视频处理过程中发生严重错误: {e}")
    import traceback
    traceback.print_exc()

print(">>> [DEBUG] 步骤 5: 主程序执行完毕。\n" + "="*60)

>>> [DEBUG] 步骤 0: 检查关键库版本...
>>> [INFO] mmcv version: 1.7.2
>>> [INFO] timm version: 0.6.12
>>> [INFO] filterpy 和 scikit-learn (GMM) 库已成功导入。
>>> [DEBUG] 步骤 0: 检查完成。

>>> [DEBUG] 步骤 1: 开始导入核心库...
>>> [DEBUG] 核心库 cv2, torch, numpy, ultralytics, tqdm, mmcv.Config 导入成功。
>>> [DEBUG] Metric3D 模块 'DepthModel' (作为 MonoDepthModel) 导入成功。
>>> [DEBUG] 步骤 1: 所有库导入完成。

>>> [DEBUG] 步骤 2: 配置模型和文件路径...
>>> [DEBUG] 所有文件路径检查通过。
>>> [DEBUG] 将要使用的设备: cuda
>>> [DEBUG] 步骤 2: 配置完成。

>>> [DEBUG] 步骤 3: 开始加载深度学习模型...
>>> [INFO] 目标类别 'Car' 已找到, ID为: 0
>>> [SUCCESS] Metric3Dv2 模型加载并移动到 GPU 成功！
>>> [DEBUG] 步骤 3: 所有模型加载完成。

>>> [DEBUG] 步骤 4: 定义视频处理函数...
>>> [DEBUG] 步骤 5: 开始执行主程序...

--- 开始视频处理 ---
>>> [INFO] 输入视频信息: 1242x374 @ 1.00 FPS, 共 154 帧。


视频处理进度: 100%|██████████| 154/154 [01:25<00:00,  1.81it/s]


--- 视频处理完成！ ---
>>> [SUCCESS] 输出视频已成功保存到: /root/autodl-tmp/output_video_gmm_filtered.mp4
>>> [DEBUG] 步骤 5: 主程序执行完毕。





In [4]:
# ==============================================================================
# 0. 关键依赖库检查 (用于调试)
# ==============================================================================
print(">>> [DEBUG] 步骤 0: 检查关键库版本...")
try:
    import mmcv
    import timm
    from filterpy.kalman import KalmanFilter
    # --- 导入 GMM ---
    from sklearn.mixture import GaussianMixture
    print(f">>> [INFO] mmcv version: {mmcv.__version__}")
    print(f">>> [INFO] timm version: {timm.__version__}")
    print(">>> [INFO] filterpy 和 scikit-learn (GMM) 库已成功导入。")
except ImportError as e:
    print(f"!!! [ERROR] 缺少核心库: {e}")
    print("!!! [HINT] 请确保已安装 filterpy (pip install filterpy) 和 scikit-learn (pip install scikit-learn)。")
    raise
print(">>> [DEBUG] 步骤 0: 检查完成。\n" + "="*60 + "\n")


# ==============================================================================
# 1. 导入必要的库
# ==============================================================================
print(">>> [DEBUG] 步骤 1: 开始导入核心库...")
try:
    import cv2
    import torch
    import numpy as np
    from ultralytics import YOLO
    import sys
    import os
    from tqdm import tqdm
    from mmcv import Config
    print(">>> [DEBUG] 核心库 cv2, torch, numpy, ultralytics, tqdm, mmcv.Config 导入成功。")
except ImportError as e:
    print(f"!!! [ERROR] 导入核心库失败: {e}")
    raise

# --- 导入 Metric3D 相关的模块 ---
METRIC3D_PATH = '/root/autodl-tmp/Metric3D'
if METRIC3D_PATH not in sys.path:
    sys.path.insert(0, METRIC3D_PATH)

try:
    from mono.model.monodepth_model import DepthModel as MonoDepthModel
    print(">>> [DEBUG] Metric3D 模块 'DepthModel' (作为 MonoDepthModel) 导入成功。")
except ImportError as e:
    print(f"!!! [ERROR] 从 Metric3D 导入模块失败: {e}")
    raise

print(">>> [DEBUG] 步骤 1: 所有库导入完成。\n" + "="*60 + "\n")

# ==============================================================================
# 2. 配置区域与路径检查
# ==============================================================================
print(">>> [DEBUG] 步骤 2: 配置模型和文件路径...")

YOLO_MODEL_PATH = '/root/autodl-tmp/epoch30.pt'
METRIC3D_MODEL_PATH = '/root/autodl-tmp/weights/metric_depth_vit_large_800k.pth'
METRIC3D_CONFIG_PATH = '/root/autodl-tmp/Metric3D/mono/configs/HourglassDecoder/vit.raft5.large.py'
INPUT_VIDEO_PATH = '/root/autodl-tmp/0000.mp4'
OUTPUT_VIDEO_PATH = '/root/autodl-tmp/output_video_gmm_filtered.mp4' # <-- 修改输出文件名
TRACKER_CONFIG_PATH = '/root/autodl-tmp/bytetrack.yaml'


paths_to_check = {
    "YOLOv8 权重": YOLO_MODEL_PATH,
    "Metric3D 权重": METRIC3D_MODEL_PATH,
    "Metric3D 配置": METRIC3D_CONFIG_PATH,
    "输入视频": INPUT_VIDEO_PATH,
    "跟踪器配置": TRACKER_CONFIG_PATH,
}
all_paths_ok = True
for name, path in paths_to_check.items():
    if not os.path.exists(path):
        print(f"!!! [ERROR] 路径检查失败: {name} 文件未找到于 '{path}'")
        all_paths_ok = False
if not all_paths_ok:
    raise FileNotFoundError("一个或多个关键文件路径无效。")
else:
    print(">>> [DEBUG] 所有文件路径检查通过。")

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f">>> [DEBUG] 将要使用的设备: {DEVICE}")

print(">>> [DEBUG] 步骤 2: 配置完成。\n" + "="*60 + "\n")

# ==============================================================================
# 3. 模型加载
# ==============================================================================
print(">>> [DEBUG] 步骤 3: 开始加载深度学习模型...")
try:
    yolo_model = YOLO(YOLO_MODEL_PATH)
    TARGET_CLASS_NAME = 'Car'
    TARGET_CLASS_ID = [k for k, v in yolo_model.names.items() if v == TARGET_CLASS_NAME][0]
    print(f">>> [INFO] 目标类别 '{TARGET_CLASS_NAME}' 已找到, ID为: {TARGET_CLASS_ID}")
except Exception as e:
    print(f"!!! [ERROR] 加载 YOLOv8 模型或查找类别ID时失败: {e}")
    raise

try:
    cfg = Config.fromfile(METRIC3D_CONFIG_PATH)
    cfg.model.backbone.use_mask_token = False
    metric3d_model = MonoDepthModel(cfg).to(DEVICE)
    checkpoint = torch.load(METRIC3D_MODEL_PATH, map_location=DEVICE)
    state_dict = checkpoint.get('model_state_dict', checkpoint.get('model', checkpoint))
    metric3d_model.load_state_dict(state_dict, strict=False)
    metric3d_model.eval()
    print(">>> [SUCCESS] Metric3Dv2 模型加载并移动到 GPU 成功！")
except Exception as e:
    print(f"!!! [FATAL ERROR] 加载 Metric3Dv2 模型时出错: {e}")
    raise

print(">>> [DEBUG] 步骤 3: 所有模型加载完成。\n" + "="*60 + "\n")

# ==============================================================================
# 4. 视频处理主函数
# ==============================================================================
print(">>> [DEBUG] 步骤 4: 定义视频处理函数...")
def process_video_debug(input_path, output_path):
    print("\n--- 开始视频处理 ---")
    cap = cv2.VideoCapture(input_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    metric3d_input_size = (cfg.data_basic['vit_size'][1], cfg.data_basic['vit_size'][0])
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    
    print(f">>> [INFO] 输入视频信息: {width}x{height} @ {fps:.2f} FPS, 共 {total_frames} 帧。")

    kalman_filters = {}

    with tqdm(total=total_frames, desc="视频处理进度") as pbar:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            track_results = yolo_model.track(frame, persist=True, verbose=False, tracker=TRACKER_CONFIG_PATH, classes=[TARGET_CLASS_ID])
            annotated_frame = frame.copy()

            with torch.no_grad():
                rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                rgb_frame_resized = cv2.resize(rgb_frame, metric3d_input_size)
                rgb_torch = torch.from_numpy(rgb_frame_resized).permute(2, 0, 1).unsqueeze(0).float().to(DEVICE) / 255.0
                pred_output = metric3d_model(data={'input': rgb_torch})
                pred_depth_np = pred_output[0].squeeze().cpu().numpy()
                pred_depth_resized = cv2.resize(pred_depth_np, (width, height)).astype(np.float32)
                pred_depth_filtered = cv2.bilateralFilter(pred_depth_resized, d=5, sigmaColor=0.2, sigmaSpace=15)

            boxes = track_results[0].boxes.xyxy.cpu().numpy()
            track_ids = track_results[0].boxes.id.int().cpu().tolist() if track_results[0].boxes.id is not None else []
            active_track_ids = set()

            if len(track_ids) > 0:
                for box, track_id in zip(boxes, track_ids):
                    active_track_ids.add(track_id)
                    x1, y1, x2, y2 = map(int, box)
                    
                    cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                    
                    # --- 修复语法错误：恢复ROI计算逻辑 ---
                    box_w, box_h = x2 - x1, y2 - y1
                    roi_w, roi_h = int(box_w * 0.5), int(box_h * 0.5)
                    roi_x1 = max(x1 + (box_w - roi_w) // 2, 0)
                    roi_y1 = max(y1 + (box_h - roi_h) // 2, 0)
                    roi_x2 = min(roi_x1 + roi_w, width)
                    roi_y2 = min(roi_y1 + roi_h, height)
                    # --- 修复结束 ---

                    depth_roi = pred_depth_filtered[roi_y1:roi_y2, roi_x1:roi_x2]
                    
                    observed_depth = 0.0
                    if depth_roi.size > 10: # GMM需要一定数量的点才能可靠工作
                        # ============================================================
                        # === 核心改进: 使用 GMM + BIC 动态寻找最佳深度 ===
                        # ============================================================
                        try:
                            pixels = depth_roi.flatten().reshape(-1, 1)
                            
                            # 1. 自动寻找最佳聚类数量 (1, 2, or 3)
                            n_components_range = range(1, 4)
                            lowest_bic = np.infty
                            best_gmm = None
                            for n_components in n_components_range:
                                gmm = GaussianMixture(n_components=n_components, random_state=0)
                                gmm.fit(pixels)
                                bic_score = gmm.bic(pixels)
                                if bic_score < lowest_bic:
                                    lowest_bic = bic_score
                                    best_gmm = gmm
                            
                            cluster_means = best_gmm.means_.flatten()
                            
                            # --- 修正与卡尔曼滤波的交互逻辑 ---
                            if track_id in kalman_filters:
                                kf = kalman_filters[track_id]
                                # 步骤A: 首先, 基于上一状态进行预测
                                kf.predict()
                                predicted_depth = kf.x[0] # 预测出的当前深度
                                
                                # 步骤B: 然后, 在GMM找到的多个聚类中心里, 找到与预测值最接近的一个
                                observed_depth = min(cluster_means, key=lambda x: abs(x - predicted_depth))
                            else:
                                # 如果是新目标, 没有历史信息, 只能假设最近的物体是目标
                                observed_depth = min(cluster_means)

                        except Exception:
                            # 如果GMM失败, 回退到均值
                            observed_depth = np.mean(depth_roi)
                        # ============================================================
                    elif depth_roi.size > 0:
                        observed_depth = np.mean(depth_roi)
                    else:
                        continue
                        
                    if observed_depth <= 0:
                        continue

                    if track_id not in kalman_filters:
                        # 初始化新的卡尔曼滤波器
                        kf = KalmanFilter(dim_x=2, dim_z=1)
                        kf.x = np.array([observed_depth, 0.])
                        kf.F = np.array([[1., 1.], [0., 1.]])
                        kf.H = np.array([[1., 0.]])
                        kf.P *= 100.
                        kf.R = 5 # GMM的结果更可信, 可以适当调低测量噪声
                        kf.Q = 0.1
                        kalman_filters[track_id] = kf
                    else:
                        # 对于已有目标, predict已在上面完成, 这里直接update
                        kf = kalman_filters[track_id]
                        kf.update(observed_depth)

                    smoothed_depth = kf.x[0]
                    
                    depth_text = f"ID:{track_id} D:{smoothed_depth:.2f}m"
                    (text_w, text_h), _ = cv2.getTextSize(depth_text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
                    cv2.rectangle(annotated_frame, (x1, y1 - 25), (x1 + text_w + 5, y1 - 5), (0, 100, 0), -1)
                    cv2.putText(annotated_frame, depth_text, (x1 + 2, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
            
            inactive_ids = set(kalman_filters.keys()) - active_track_ids
            for inactive_id in inactive_ids:
                del kalman_filters[inactive_id]

            out.write(annotated_frame)
            pbar.update(1)

    cap.release()
    out.release()
    cv2.destroyAllWindows()
    print(f"\n--- 视频处理完成！ ---")
    print(f">>> [SUCCESS] 输出视频已成功保存到: {output_path}")

# ==============================================================================
# 5. 运行主程序
# ==============================================================================
print(">>> [DEBUG] 步骤 5: 开始执行主程序...")
try:
    process_video_debug(INPUT_VIDEO_PATH, OUTPUT_VIDEO_PATH)
except Exception as e:
    print(f"!!! [FATAL ERROR] 在视频处理过程中发生严重错误: {e}")
    import traceback
    traceback.print_exc()

print(">>> [DEBUG] 步骤 5: 主程序执行完毕。\n" + "="*60)

>>> [DEBUG] 步骤 0: 检查关键库版本...
>>> [INFO] mmcv version: 1.7.2
>>> [INFO] timm version: 0.6.12
>>> [INFO] filterpy 和 scikit-learn (GMM) 库已成功导入。
>>> [DEBUG] 步骤 0: 检查完成。

>>> [DEBUG] 步骤 1: 开始导入核心库...
>>> [DEBUG] 核心库 cv2, torch, numpy, ultralytics, tqdm, mmcv.Config 导入成功。
>>> [DEBUG] Metric3D 模块 'DepthModel' (作为 MonoDepthModel) 导入成功。
>>> [DEBUG] 步骤 1: 所有库导入完成。

>>> [DEBUG] 步骤 2: 配置模型和文件路径...
>>> [DEBUG] 所有文件路径检查通过。
>>> [DEBUG] 将要使用的设备: cuda
>>> [DEBUG] 步骤 2: 配置完成。

>>> [DEBUG] 步骤 3: 开始加载深度学习模型...
>>> [INFO] 目标类别 'Car' 已找到, ID为: 0
>>> [SUCCESS] Metric3Dv2 模型加载并移动到 GPU 成功！
>>> [DEBUG] 步骤 3: 所有模型加载完成。

>>> [DEBUG] 步骤 4: 定义视频处理函数...
>>> [DEBUG] 步骤 5: 开始执行主程序...

--- 开始视频处理 ---
>>> [INFO] 输入视频信息: 1242x374 @ 1.00 FPS, 共 154 帧。


视频处理进度: 100%|██████████| 154/154 [01:25<00:00,  1.81it/s]


--- 视频处理完成！ ---
>>> [SUCCESS] 输出视频已成功保存到: /root/autodl-tmp/output_video_gmm_filtered.mp4
>>> [DEBUG] 步骤 5: 主程序执行完毕。





In [4]:
# ==============================================================================
# 0. 关键依赖库检查 (用于调试)
# ==============================================================================
print(">>> [DEBUG] 步骤 0: 检查关键库版本...")
try:
    import mmcv
    import timm
    from filterpy.kalman import KalmanFilter
    # --- 导入 GMM ---
    from sklearn.mixture import GaussianMixture
    print(f">>> [INFO] mmcv version: {mmcv.__version__}")
    print(f">>> [INFO] timm version: {timm.__version__}")
    print(">>> [INFO] filterpy 和 scikit-learn (GMM) 库已成功导入。")
except ImportError as e:
    print(f"!!! [ERROR] 缺少核心库: {e}")
    print("!!! [HINT] 请确保已安装 filterpy (pip install filterpy) 和 scikit-learn (pip install scikit-learn)。")
    raise
print(">>> [DEBUG] 步骤 0: 检查完成。\n" + "="*60 + "\n")


# ==============================================================================
# 1. 导入必要的库
# ==============================================================================
print(">>> [DEBUG] 步骤 1: 开始导入核心库...")
try:
    import cv2
    import torch
    import numpy as np
    from ultralytics import YOLO
    import sys
    import os
    from tqdm import tqdm
    from mmcv import Config
    print(">>> [DEBUG] 核心库 cv2, torch, numpy, ultralytics, tqdm, mmcv.Config 导入成功。")
except ImportError as e:
    print(f"!!! [ERROR] 导入核心库失败: {e}")
    raise

# --- 导入 Metric3D 相关的模块 ---
METRIC3D_PATH = '/root/autodl-tmp/Metric3D'
if METRIC3D_PATH not in sys.path:
    sys.path.insert(0, METRIC3D_PATH)

try:
    from mono.model.monodepth_model import DepthModel as MonoDepthModel
    print(">>> [DEBUG] Metric3D 模块 'DepthModel' (作为 MonoDepthModel) 导入成功。")
except ImportError as e:
    print(f"!!! [ERROR] 从 Metric3D 导入模块失败: {e}")
    raise

print(">>> [DEBUG] 步骤 1: 所有库导入完成。\n" + "="*60 + "\n")

# ==============================================================================
# 2. 配置区域与路径检查
# ==============================================================================
print(">>> [DEBUG] 步骤 2: 配置模型和文件路径...")

YOLO_MODEL_PATH = '/root/autodl-tmp/epoch30.pt'
METRIC3D_MODEL_PATH = '/root/autodl-tmp/weights/metric_depth_vit_large_800k.pth'
METRIC3D_CONFIG_PATH = '/root/autodl-tmp/Metric3D/mono/configs/HourglassDecoder/vit.raft5.large.py'
INPUT_VIDEO_PATH = '/root/autodl-tmp/0000.mp4'
OUTPUT_VIDEO_PATH = '/root/autodl-tmp/output_video_gmm_filtered.mp4' # <-- 修改输出文件名
TRACKER_CONFIG_PATH = '/root/autodl-tmp/bytetrack.yaml'


paths_to_check = {
    "YOLOv8 权重": YOLO_MODEL_PATH,
    "Metric3D 权重": METRIC3D_MODEL_PATH,
    "Metric3D 配置": METRIC3D_CONFIG_PATH,
    "输入视频": INPUT_VIDEO_PATH,
    "跟踪器配置": TRACKER_CONFIG_PATH,
}
all_paths_ok = True
for name, path in paths_to_check.items():
    if not os.path.exists(path):
        print(f"!!! [ERROR] 路径检查失败: {name} 文件未找到于 '{path}'")
        all_paths_ok = False
if not all_paths_ok:
    raise FileNotFoundError("一个或多个关键文件路径无效。")
else:
    print(">>> [DEBUG] 所有文件路径检查通过。")

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f">>> [DEBUG] 将要使用的设备: {DEVICE}")

print(">>> [DEBUG] 步骤 2: 配置完成。\n" + "="*60 + "\n")

# ==============================================================================
# 3. 模型加载
# ==============================================================================
print(">>> [DEBUG] 步骤 3: 开始加载深度学习模型...")
try:
    yolo_model = YOLO(YOLO_MODEL_PATH)
    TARGET_CLASS_NAME = 'Car'
    TARGET_CLASS_ID = [k for k, v in yolo_model.names.items() if v == TARGET_CLASS_NAME][0]
    print(f">>> [INFO] 目标类别 '{TARGET_CLASS_NAME}' 已找到, ID为: {TARGET_CLASS_ID}")
except Exception as e:
    print(f"!!! [ERROR] 加载 YOLOv8 模型或查找类别ID时失败: {e}")
    raise

try:
    cfg = Config.fromfile(METRIC3D_CONFIG_PATH)
    cfg.model.backbone.use_mask_token = False
    metric3d_model = MonoDepthModel(cfg).to(DEVICE)
    checkpoint = torch.load(METRIC3D_MODEL_PATH, map_location=DEVICE)
    state_dict = checkpoint.get('model_state_dict', checkpoint.get('model', checkpoint))
    metric3d_model.load_state_dict(state_dict, strict=False)
    metric3d_model.eval()
    print(">>> [SUCCESS] Metric3Dv2 模型加载并移动到 GPU 成功！")
except Exception as e:
    print(f"!!! [FATAL ERROR] 加载 Metric3Dv2 模型时出错: {e}")
    raise

print(">>> [DEBUG] 步骤 3: 所有模型加载完成。\n" + "="*60 + "\n")

# ==============================================================================
# 4. 视频处理主函数
# ==============================================================================
print(">>> [DEBUG] 步骤 4: 定义视频处理函数...")
def process_video_debug(input_path, output_path):
    print("\n--- 开始视频处理 ---")
    cap = cv2.VideoCapture(input_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    metric3d_input_size = (cfg.data_basic['vit_size'][1], cfg.data_basic['vit_size'][0])
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    
    print(f">>> [INFO] 输入视频信息: {width}x{height} @ {fps:.2f} FPS, 共 {total_frames} 帧。")

    kalman_filters = {}

    with tqdm(total=total_frames, desc="视频处理进度") as pbar:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            track_results = yolo_model.track(frame, persist=True, verbose=False, tracker=TRACKER_CONFIG_PATH, classes=[TARGET_CLASS_ID])
            annotated_frame = frame.copy()

            with torch.no_grad():
                rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                rgb_frame_resized = cv2.resize(rgb_frame, metric3d_input_size)
                rgb_torch = torch.from_numpy(rgb_frame_resized).permute(2, 0, 1).unsqueeze(0).float().to(DEVICE) / 255.0
                pred_output = metric3d_model(data={'input': rgb_torch})
                pred_depth_np = pred_output[0].squeeze().cpu().numpy()
                pred_depth_resized = cv2.resize(pred_depth_np, (width, height)).astype(np.float32)
                pred_depth_filtered = cv2.bilateralFilter(pred_depth_resized, d=5, sigmaColor=0.2, sigmaSpace=15)

            boxes = track_results[0].boxes.xyxy.cpu().numpy()
            track_ids = track_results[0].boxes.id.int().cpu().tolist() if track_results[0].boxes.id is not None else []
            active_track_ids = set()

            if len(track_ids) > 0:
                for box, track_id in zip(boxes, track_ids):
                    active_track_ids.add(track_id)
                    x1, y1, x2, y2 = map(int, box)
                    
                    cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                    
                    # --- 修复语法错误：恢复ROI计算逻辑 ---
                    box_w, box_h = x2 - x1, y2 - y1
                    roi_w, roi_h = int(box_w * 0.5), int(box_h * 0.5)
                    roi_x1 = max(x1 + (box_w - roi_w) // 2, 0)
                    roi_y1 = max(y1 + (box_h - roi_h) // 2, 0)
                    roi_x2 = min(roi_x1 + roi_w, width)
                    roi_y2 = min(roi_y1 + roi_h, height)
                    # --- 修复结束 ---

                    depth_roi = pred_depth_filtered[roi_y1:roi_y2, roi_x1:roi_x2]
                    
                    observed_depth = 0.0
                    if depth_roi.size > 10: # GMM需要一定数量的点才能可靠工作
                        # ============================================================
                        # === 核心改进: 使用 GMM + BIC 动态寻找最佳深度 ===
                        # ============================================================
                        try:
                            pixels = depth_roi.flatten().reshape(-1, 1)
                            
                            # 1. 自动寻找最佳聚类数量 (1, 2, or 3)
                            n_components_range = range(1, 4)
                            lowest_bic = np.infty
                            best_gmm = None
                            for n_components in n_components_range:
                                gmm = GaussianMixture(n_components=n_components, random_state=0)
                                gmm.fit(pixels)
                                bic_score = gmm.bic(pixels)
                                if bic_score < lowest_bic:
                                    lowest_bic = bic_score
                                    best_gmm = gmm
                            
                            cluster_means = best_gmm.means_.flatten()
                            
                            # --- 修正与卡尔曼滤波的交互逻辑 ---
                            if track_id in kalman_filters:
                                kf = kalman_filters[track_id]
                                # 步骤A: 首先, 基于上一状态进行预测
                                kf.predict()
                                predicted_depth = kf.x[0] # 预测出的当前深度
                                
                                # 步骤B: 然后, 在GMM找到的多个聚类中心里, 找到与预测值最接近的一个
                                observed_depth = min(cluster_means, key=lambda x: abs(x - predicted_depth))
                            else:
                                # 如果是新目标, 没有历史信息, 只能假设最近的物体是目标
                                observed_depth = min(cluster_means)

                        except Exception:
                            # 如果GMM失败, 回退到均值
                            observed_depth = np.mean(depth_roi)
                        # ============================================================
                    elif depth_roi.size > 0:
                        observed_depth = np.mean(depth_roi)
                    else:
                        continue
                        
                    if observed_depth <= 0:
                        continue

                    if track_id not in kalman_filters:
                        # 初始化新的卡尔曼滤波器
                        kf = KalmanFilter(dim_x=2, dim_z=1)
                        kf.x = np.array([observed_depth, 0.])
                        kf.F = np.array([[1., 1.], [0., 1.]])
                        kf.H = np.array([[1., 0.]])
                        kf.P *= 100.
                        kf.R = 5 # GMM的结果更可信, 可以适当调低测量噪声
                        kf.Q = 0.1
                        kalman_filters[track_id] = kf
                    else:
                        # 对于已有目标, predict已在上面完成, 这里直接update
                        kf = kalman_filters[track_id]
                        kf.update(observed_depth)

                    smoothed_depth = kf.x[0]
                    
                    depth_text = f"ID:{track_id} D:{smoothed_depth:.2f}m"
                    (text_w, text_h), _ = cv2.getTextSize(depth_text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
                    cv2.rectangle(annotated_frame, (x1, y1 - 25), (x1 + text_w + 5, y1 - 5), (0, 100, 0), -1)
                    cv2.putText(annotated_frame, depth_text, (x1 + 2, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
            
            inactive_ids = set(kalman_filters.keys()) - active_track_ids
            for inactive_id in inactive_ids:
                del kalman_filters[inactive_id]

            out.write(annotated_frame)
            pbar.update(1)

    cap.release()
    out.release()
    cv2.destroyAllWindows()
    print(f"\n--- 视频处理完成！ ---")
    print(f">>> [SUCCESS] 输出视频已成功保存到: {output_path}")

# ==============================================================================
# 5. 运行主程序
# ==============================================================================
print(">>> [DEBUG] 步骤 5: 开始执行主程序...")
try:
    process_video_debug(INPUT_VIDEO_PATH, OUTPUT_VIDEO_PATH)
except Exception as e:
    print(f"!!! [FATAL ERROR] 在视频处理过程中发生严重错误: {e}")
    import traceback
    traceback.print_exc()

print(">>> [DEBUG] 步骤 5: 主程序执行完毕。\n" + "="*60)

>>> [DEBUG] 步骤 0: 检查关键库版本...
>>> [INFO] mmcv version: 1.7.2
>>> [INFO] timm version: 0.6.12
>>> [INFO] filterpy 和 scikit-learn (GMM) 库已成功导入。
>>> [DEBUG] 步骤 0: 检查完成。

>>> [DEBUG] 步骤 1: 开始导入核心库...
>>> [DEBUG] 核心库 cv2, torch, numpy, ultralytics, tqdm, mmcv.Config 导入成功。
>>> [DEBUG] Metric3D 模块 'DepthModel' (作为 MonoDepthModel) 导入成功。
>>> [DEBUG] 步骤 1: 所有库导入完成。

>>> [DEBUG] 步骤 2: 配置模型和文件路径...
>>> [DEBUG] 所有文件路径检查通过。
>>> [DEBUG] 将要使用的设备: cuda
>>> [DEBUG] 步骤 2: 配置完成。

>>> [DEBUG] 步骤 3: 开始加载深度学习模型...
>>> [INFO] 目标类别 'Car' 已找到, ID为: 0
>>> [SUCCESS] Metric3Dv2 模型加载并移动到 GPU 成功！
>>> [DEBUG] 步骤 3: 所有模型加载完成。

>>> [DEBUG] 步骤 4: 定义视频处理函数...
>>> [DEBUG] 步骤 5: 开始执行主程序...

--- 开始视频处理 ---
>>> [INFO] 输入视频信息: 1242x374 @ 1.00 FPS, 共 154 帧。


视频处理进度: 100%|██████████| 154/154 [01:25<00:00,  1.81it/s]


--- 视频处理完成！ ---
>>> [SUCCESS] 输出视频已成功保存到: /root/autodl-tmp/output_video_gmm_filtered.mp4
>>> [DEBUG] 步骤 5: 主程序执行完毕。



