In [2]:
# ==============================================================================
# 0. 关键依赖库检查 (用于调试)
# ==============================================================================
print(">>> [DEBUG] 步骤 0: 检查关键库版本...")
try:
    import mmcv
    import timm
    import ultralytics
    from filterpy.kalman import KalmanFilter
    from sklearn.mixture import GaussianMixture
    print(f">>> [INFO] mmcv version: {mmcv.__version__}")
    print(f">>> [INFO] timm version: {timm.__version__}")
    print(f">>> [INFO] ultralytics version: {ultralytics.__version__}")
    print(">>> [INFO] filterpy 和 scikit-learn (GMM) 库已成功导入。")
except ImportError as e:
    print(f"!!! [ERROR] 缺少核心库: {e}")
    raise
print(">>> [DEBUG] 步骤 0: 检查完成。\n" + "="*60 + "\n")

# ==============================================================================
# 1. 导入必要的库
# ==============================================================================
print(">>> [DEBUG] 步骤 1: 开始导入核心库...")
try:
    import cv2
    import torch
    import numpy as np
    from ultralytics import YOLO
    import sys
    import os
    from tqdm import tqdm
    from mmcv import Config
    from types import SimpleNamespace
    from filterpy.kalman import KalmanFilter as FilterPyKalmanFilter
    from sklearn.mixture import GaussianMixture
    # 导入我们自定义的、深度融合的跟踪器
    from custom_byte_tracker import ByteTracker
    print(">>> [DEBUG] 核心库导入成功。")
except ImportError as e:
    print(f"!!! [ERROR] 导入核心库失败: {e}")
    raise

# --- 导入 Metric3D 相关的模块 ---
METRIC3D_PATH = '/root/autodl-tmp/Metric3D'
if METRIC3D_PATH not in sys.path:
    sys.path.insert(0, METRIC3D_PATH)
try:
    from mono.model.monodepth_model import DepthModel as MonoDepthModel
    print(">>> [DEBUG] Metric3D 模块导入成功。")
except ImportError as e:
    print(f"!!! [ERROR] 从 Metric3D 导入模块失败: {e}")
    raise
print(">>> [DEBUG] 步骤 1: 所有库导入完成。\n" + "="*60 + "\n")

# ==============================================================================
# 2. 配置区域与路径检查
# ==============================================================================
print(">>> [DEBUG] 步骤 2: 配置模型和文件路径...")
YOLO_MODEL_PATH = '/root/autodl-tmp/weights/epoch30.pt'
METRIC3D_MODEL_PATH = '/root/autodl-tmp/weights/metric_depth_vit_large_800k.pth'
METRIC3D_CONFIG_PATH = '/root/autodl-tmp/Metric3D/mono/configs/HourglassDecoder/vit.raft5.large.py'
INPUT_VIDEO_PATH = '/root/autodl-tmp/kitti_videos/0002.mp4'
OUTPUT_VIDEO_PATH = '/root/autodl-tmp/output_final_robust_fusion2.mp4'

paths_to_check = {
    "YOLOv8 权重": YOLO_MODEL_PATH,
    "Metric3D 权重": METRIC3D_MODEL_PATH,
    "Metric3D 配置": METRIC3D_CONFIG_PATH,
    "输入视频": INPUT_VIDEO_PATH,
}
if not all(os.path.exists(p) for p in paths_to_check.values()):
    raise FileNotFoundError("一个或多个关键文件路径无效。")

print(">>> [DEBUG] 所有文件路径检查通过。")
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f">>> [DEBUG] 将要使用的设备: {DEVICE}")
print(">>> [DEBUG] 步骤 2: 配置完成。\n" + "="*60 + "\n")

# ==============================================================================
# 3. 模型加载
# ==============================================================================
print(">>> [DEBUG] 步骤 3: 开始加载深度学习模型...")
try:
    yolo_model = YOLO(YOLO_MODEL_PATH)
    TARGET_CLASS_NAME = 'Car'
    if hasattr(yolo_model, 'names') and isinstance(yolo_model.names, dict):
        TARGET_CLASS_ID = [k for k, v in yolo_model.names.items() if v == TARGET_CLASS_NAME][0]
        print(f">>> [INFO] 目标类别 '{TARGET_CLASS_NAME}' 已找到, ID为: {TARGET_CLASS_ID}")
    else:
        raise ValueError("YOLO 模型没有有效的 'names' 属性或格式不正确")
except Exception as e:
    print(f"!!! [ERROR] 加载 YOLOv8 模型失败: {e}")
    raise

try:
    cfg = Config.fromfile(METRIC3D_CONFIG_PATH)
    cfg.model.backbone.use_mask_token = False
    metric3d_model = MonoDepthModel(cfg).to(DEVICE)
    checkpoint = torch.load(METRIC3D_MODEL_PATH, map_location=DEVICE)
    state_dict = checkpoint.get('model_state_dict', checkpoint.get('model', checkpoint))
    metric3d_model.load_state_dict(state_dict, strict=False)
    metric3d_model.eval()
    print(">>> [SUCCESS] Metric3Dv2 模型加载成功！")
except Exception as e:
    print(f"!!! [FATAL ERROR] 加载 Metric3Dv2 模型时出错: {e}")
    raise
print(">>> [DEBUG] 步骤 3: 所有模型加载完成。\n" + "="*60 + "\n")

# ==============================================================================
# 4. 视频处理主函数 (最终整合版)
# ==============================================================================
print(">>> [DEBUG] 步骤 4: 定义视频处理函数...")
def process_video_with_robust_depth_fusion(input_path, output_path):
    print("\n--- 开始视频处理 (最终鲁棒深度融合追踪) ---")
    cap = cv2.VideoCapture(input_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    metric3d_input_size = (cfg.data_basic['vit_size'][1], cfg.data_basic['vit_size'][0])
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    print(f">>> [INFO] 输入视频信息: {width}x{height} @ {fps:.2f} FPS, 共 {total_frames} 帧。")

    # 初始化我们自定义的跟踪器
    tracker_args = SimpleNamespace(
        track_high_thresh=0.5,
        track_low_thresh=0.1,
        new_track_thresh=0.6,
        track_buffer=30,
        match_thresh=0.8,
        mot20=False
    )
    tracker = ByteTracker(args=tracker_args, frame_rate=fps)
    
    # 为精细化的后处理深度计算，独立维护一个卡尔曼滤波器字典
    robust_depth_filters = {}

    with tqdm(total=total_frames, desc="视频处理进度") as pbar:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            annotated_frame = frame.copy()

            # 步骤 1: 目标检测
            det_results = yolo_model(frame, classes=[TARGET_CLASS_ID], verbose=False)[0]

            # 步骤 2: 全局深度图预测
            with torch.no_grad():
                rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                rgb_frame_resized = cv2.resize(rgb_frame, metric3d_input_size)
                rgb_torch = torch.from_numpy(rgb_frame_resized).permute(2, 0, 1).unsqueeze(0).float().to(DEVICE) / 255.0
                pred_output = metric3d_model(data={'input': rgb_torch})
                pred_depth_np = pred_output[0].squeeze().cpu().numpy()
                pred_depth_resized = cv2.resize(pred_depth_np, (width, height)).astype(np.float32)
                pred_depth_filtered = cv2.bilateralFilter(pred_depth_resized, d=5, sigmaColor=0.2, sigmaSpace=15)

            # 步骤 3: 跟踪前 - 为每个检测框计算鲁棒的初始深度
            detections_with_depth = []
            if det_results.boxes.shape[0] > 0:
                for box in det_results.boxes:
                    x1, y1, x2, y2 = map(int, box.xyxy[0])
                    score = box.conf[0].item()
                    cls_id = box.cls[0].item()

                    box_w, box_h = x2 - x1, y2 - y1
                    if box_w <= 0 or box_h <= 0: continue
                    
                    # 提取中心区域用于计算初始深度
                    roi_w, roi_h = int(box_w * 0.25), int(box_h * 0.25)
                    roi_x1, roi_y1 = x1 + (box_w - roi_w) // 2, y1 + (box_h - roi_h) // 2
                    roi_x2, roi_y2 = roi_x1 + roi_w, roi_y1 + roi_h
                    
                    depth_roi = pred_depth_filtered[roi_y1:roi_y2, roi_x1:roi_x2]
                    
                    # <<< 核心改动：使用中位数替代均值，增强鲁棒性 >>>
                    initial_depth = np.median(depth_roi) if depth_roi.size > 0 else 0.0

                    detections_with_depth.append([x1, y1, x2, y2, score, cls_id, initial_depth])
            
            # 步骤 4: 跟踪中 - 调用自定义跟踪器进行数据关联
            tracks = tracker.update(np.array(detections_with_depth)) if len(detections_with_depth) > 0 else np.empty((0, 8))

            # 步骤 5: 跟踪后 - 对稳定航迹进行精细深度计算和可视化
            active_track_ids = set()
            if tracks.shape[0] > 0:
                for track in tracks:
                    x1, y1, x2, y2 = map(int, track[:4])
                    track_id = int(track[4])
                    active_track_ids.add(track_id)
                    cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                    
                    box_w, box_h = x2 - x1, y2 - y1
                    if box_w <= 0 or box_h <= 0: continue

                    # 提取更宽的中心区域用于GMM聚类
                    roi_w, roi_h = int(box_w * 0.5), int(box_h * 0.5)
                    roi_x1, roi_y1 = max(x1 + (box_w - roi_w) // 2, 0), max(y1 + (box_h - roi_h) // 2, 0)
                    roi_x2, roi_y2 = min(roi_x1 + roi_w, width), min(roi_y1 + roi_h, height)
                    depth_roi = pred_depth_filtered[roi_y1:roi_y2, roi_x1:roi_x2]
                    
                    observed_depth = 0.0
                    if depth_roi.size > 10:
                        try:
                            pixels = depth_roi.flatten().reshape(-1, 1)
                            # 使用GMM+BIC分离前景和背景
                            n_components_range = range(1, 4)
                            lowest_bic = np.infty
                            best_gmm = None
                            for n_components in n_components_range:
                                gmm = GaussianMixture(n_components=n_components, random_state=0)
                                gmm.fit(pixels)
                                bic_score = gmm.bic(pixels)
                                if bic_score < lowest_bic:
                                    lowest_bic, best_gmm = bic_score, gmm
                            
                            cluster_means = best_gmm.means_.flatten()
                            
                            # 利用历史信息选择最可靠的观测值
                            if track_id in robust_depth_filters:
                                kf = robust_depth_filters[track_id]
                                kf.predict()
                                predicted_depth = kf.x[0]
                                observed_depth = min(cluster_means, key=lambda x: abs(x - predicted_depth))
                            else:
                                observed_depth = min(cluster_means) # 对于新目标，选择最近的聚类中心
                        except Exception:
                            observed_depth = np.median(depth_roi) if depth_roi.size > 0 else 0
                    elif depth_roi.size > 0:
                        observed_depth = np.median(depth_roi)
                    
                    if observed_depth <= 0: continue

                    # 使用独立的卡尔曼滤波器进行时序平滑
                    if track_id not in robust_depth_filters:
                        kf = FilterPyKalmanFilter(dim_x=2, dim_z=1)
                        kf.x = np.array([observed_depth, 0.])
                        kf.F = np.array([[1., 1.], [0., 1.]]); kf.H = np.array([[1., 0.]])
                        kf.P *= 100.; kf.R = 5; kf.Q = 0.1
                        robust_depth_filters[track_id] = kf
                    else:
                        kf = robust_depth_filters[track_id]
                        kf.update(observed_depth)

                    smoothed_depth = kf.x[0]
                    
                    # 可视化最终结果
                    depth_text = f"ID:{track_id} D:{smoothed_depth:.2f}m"
                    (text_w, text_h), _ = cv2.getTextSize(depth_text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
                    cv2.rectangle(annotated_frame, (x1, y1 - 25), (x1 + text_w + 5, y1 - 5), (0, 100, 0), -1)
                    cv2.putText(annotated_frame, depth_text, (x1 + 2, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
            
            # 清理不再活跃的目标的滤波器
            for inactive_id in set(robust_depth_filters.keys()) - active_track_ids:
                del robust_depth_filters[inactive_id]

            out.write(annotated_frame)
            pbar.update(1)

    cap.release()
    out.release()
    cv2.destroyAllWindows()
    print(f"\n--- 视频处理完成！输出保存在: {output_path} ---")

print(">>> [DEBUG] 步骤 4: 视频处理函数定义完成。\n" + "="*60 + "\n")

# ==============================================================================
# 5. 运行主程序
# ==============================================================================
print(">>> [DEBUG] 步骤 5: 开始执行主程序...")
try:
    process_video_with_robust_depth_fusion(INPUT_VIDEO_PATH, OUTPUT_VIDEO_PATH)
except Exception as e:
    print(f"!!! [FATAL ERROR] 在视频处理过程中发生严重错误: {e}")
    import traceback
    traceback.print_exc()
print(">>> [DEBUG] 步骤 5: 主程序执行完毕。\n" + "="*60)

>>> [DEBUG] 步骤 0: 检查关键库版本...
>>> [INFO] mmcv version: 1.7.2
>>> [INFO] timm version: 0.6.12
>>> [INFO] ultralytics version: 8.3.213
>>> [INFO] filterpy 和 scikit-learn (GMM) 库已成功导入。
>>> [DEBUG] 步骤 0: 检查完成。

>>> [DEBUG] 步骤 1: 开始导入核心库...
>>> [DEBUG] 核心库导入成功。
>>> [DEBUG] Metric3D 模块导入成功。
>>> [DEBUG] 步骤 1: 所有库导入完成。

>>> [DEBUG] 步骤 2: 配置模型和文件路径...
>>> [DEBUG] 所有文件路径检查通过。
>>> [DEBUG] 将要使用的设备: cuda
>>> [DEBUG] 步骤 2: 配置完成。

>>> [DEBUG] 步骤 3: 开始加载深度学习模型...
>>> [INFO] 目标类别 'Car' 已找到, ID为: 0
>>> [SUCCESS] Metric3Dv2 模型加载成功！
>>> [DEBUG] 步骤 3: 所有模型加载完成。

>>> [DEBUG] 步骤 4: 定义视频处理函数...
>>> [DEBUG] 步骤 4: 视频处理函数定义完成。

>>> [DEBUG] 步骤 5: 开始执行主程序...

--- 开始视频处理 (最终鲁棒深度融合追踪) ---
>>> [INFO] 输入视频信息: 1242x374 @ 1.00 FPS, 共 233 帧。


视频处理进度:   4%|▍         | 9/233 [00:02<01:12,  3.11it/s]


KeyboardInterrupt: 

In [2]:
# ==============================================================================
# 0. 关键依赖库检查 (用于调试)
# ==============================================================================
print(">>> [DEBUG] 步骤 0: 检查关键库版本...")
try:
    import mmcv
    import timm
    import ultralytics
    from filterpy.kalman import KalmanFilter
    from sklearn.mixture import GaussianMixture
    print(f">>> [INFO] mmcv version: {mmcv.__version__}")
    print(f">>> [INFO] timm version: {timm.__version__}")
    print(f">>> [INFO] ultralytics version: {ultralytics.__version__}")
    print(">>> [INFO] filterpy 和 scikit-learn (GMM) 库已成功导入。")
except ImportError as e:
    print(f"!!! [ERROR] 缺少核心库: {e}")
    raise
print(">>> [DEBUG] 步骤 0: 检查完成。\n" + "="*60 + "\n")

# ==============================================================================
# 1. 导入必要的库
# ==============================================================================
print(">>> [DEBUG] 步骤 1: 开始导入核心库...")
try:
    import cv2
    import torch
    import numpy as np
    from ultralytics import YOLO
    import sys
    import os
    from tqdm import tqdm
    from mmcv import Config
    from types import SimpleNamespace
    from filterpy.kalman import KalmanFilter as FilterPyKalmanFilter
    from sklearn.mixture import GaussianMixture
    # 导入我们自定义的、深度融合的跟踪器
    from custom_byte_tracker import ByteTracker
    print(">>> [DEBUG] 核心库导入成功。")
except ImportError as e:
    print(f"!!! [ERROR] 导入核心库失败: {e}")
    raise

# --- 导入 Metric3D 相关的模块 ---
METRIC3D_PATH = '/root/autodl-tmp/Metric3D'
if METRIC3D_PATH not in sys.path:
    sys.path.insert(0, METRIC3D_PATH)
try:
    from mono.model.monodepth_model import DepthModel as MonoDepthModel
    print(">>> [DEBUG] Metric3D 模块导入成功。")
except ImportError as e:
    print(f"!!! [ERROR] 从 Metric3D 导入模块失败: {e}")
    raise
print(">>> [DEBUG] 步骤 1: 所有库导入完成。\n" + "="*60 + "\n")

# ==============================================================================
# 2. 配置区域与路径检查
# ==============================================================================
print(">>> [DEBUG] 步骤 2: 配置模型和文件路径...")
YOLO_MODEL_PATH = '/root/autodl-tmp/weights/epoch30.pt'
METRIC3D_MODEL_PATH = '/root/autodl-tmp/weights/metric_depth_vit_large_800k.pth'
METRIC3D_CONFIG_PATH = '/root/autodl-tmp/Metric3D/mono/configs/HourglassDecoder/vit.raft5.large.py'
INPUT_VIDEO_PATH = '/root/autodl-tmp/kitti_videos/0002.mp4'
OUTPUT_VIDEO_PATH = '/root/autodl-tmp/output_final_robust_fusion2.mp4'

paths_to_check = {
    "YOLOv8 权重": YOLO_MODEL_PATH,
    "Metric3D 权重": METRIC3D_MODEL_PATH,
    "Metric3D 配置": METRIC3D_CONFIG_PATH,
    "输入视频": INPUT_VIDEO_PATH,
}
if not all(os.path.exists(p) for p in paths_to_check.values()):
    raise FileNotFoundError("一个或多个关键文件路径无效。")

print(">>> [DEBUG] 所有文件路径检查通过。")
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f">>> [DEBUG] 将要使用的设备: {DEVICE}")
print(">>> [DEBUG] 步骤 2: 配置完成。\n" + "="*60 + "\n")

# ==============================================================================
# 3. 模型加载
# ==============================================================================
print(">>> [DEBUG] 步骤 3: 开始加载深度学习模型...")
try:
    yolo_model = YOLO(YOLO_MODEL_PATH)
    TARGET_CLASS_NAME = 'Car'
    if hasattr(yolo_model, 'names') and isinstance(yolo_model.names, dict):
        TARGET_CLASS_ID = [k for k, v in yolo_model.names.items() if v == TARGET_CLASS_NAME][0]
        print(f">>> [INFO] 目标类别 '{TARGET_CLASS_NAME}' 已找到, ID为: {TARGET_CLASS_ID}")
    else:
        raise ValueError("YOLO 模型没有有效的 'names' 属性或格式不正确")
except Exception as e:
    print(f"!!! [ERROR] 加载 YOLOv8 模型失败: {e}")
    raise

try:
    cfg = Config.fromfile(METRIC3D_CONFIG_PATH)
    cfg.model.backbone.use_mask_token = False
    metric3d_model = MonoDepthModel(cfg).to(DEVICE)
    checkpoint = torch.load(METRIC3D_MODEL_PATH, map_location=DEVICE)
    state_dict = checkpoint.get('model_state_dict', checkpoint.get('model', checkpoint))
    metric3d_model.load_state_dict(state_dict, strict=False)
    metric3d_model.eval()
    print(">>> [SUCCESS] Metric3Dv2 模型加载成功！")
except Exception as e:
    print(f"!!! [FATAL ERROR] 加载 Metric3Dv2 模型时出错: {e}")
    raise
print(">>> [DEBUG] 步骤 3: 所有模型加载完成。\n" + "="*60 + "\n")

# ==============================================================================
# 4. 视频处理主函数 (最终整合版)
# ==============================================================================
print(">>> [DEBUG] 步骤 4: 定义视频处理函数...")
def process_video_with_robust_depth_fusion(input_path, output_path):
    print("\n--- 开始视频处理 (最终鲁棒深度融合追踪) ---")
    cap = cv2.VideoCapture(input_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    metric3d_input_size = (cfg.data_basic['vit_size'][1], cfg.data_basic['vit_size'][0])
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    print(f">>> [INFO] 输入视频信息: {width}x{height} @ {fps:.2f} FPS, 共 {total_frames} 帧。")

    # 初始化我们自定义的跟踪器
    tracker_args = SimpleNamespace(
        track_high_thresh=0.5,
        track_low_thresh=0.1,
        new_track_thresh=0.6,
        track_buffer=30,
        match_thresh=0.8,
        mot20=False
    )
    tracker = ByteTracker(args=tracker_args, frame_rate=fps)
    
    # 为精细化的后处理深度计算，独立维护一个卡尔曼滤波器字典
    robust_depth_filters = {}

    with tqdm(total=total_frames, desc="视频处理进度") as pbar:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            annotated_frame = frame.copy()

            # 步骤 1: 目标检测
            det_results = yolo_model(frame, classes=[TARGET_CLASS_ID], verbose=False)[0]

            # 步骤 2: 全局深度图预测
            with torch.no_grad():
                rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                rgb_frame_resized = cv2.resize(rgb_frame, metric3d_input_size)
                rgb_torch = torch.from_numpy(rgb_frame_resized).permute(2, 0, 1).unsqueeze(0).float().to(DEVICE) / 255.0
                pred_output = metric3d_model(data={'input': rgb_torch})
                pred_depth_np = pred_output[0].squeeze().cpu().numpy()
                pred_depth_resized = cv2.resize(pred_depth_np, (width, height)).astype(np.float32)
                pred_depth_filtered = cv2.bilateralFilter(pred_depth_resized, d=5, sigmaColor=0.2, sigmaSpace=15)

            # 步骤 3: 跟踪前 - 为每个检测框计算鲁棒的初始深度
            detections_with_depth = []
            if det_results.boxes.shape[0] > 0:
                for box in det_results.boxes:
                    x1, y1, x2, y2 = map(int, box.xyxy[0])
                    score = box.conf[0].item()
                    cls_id = box.cls[0].item()

                    box_w, box_h = x2 - x1, y2 - y1
                    if box_w <= 0 or box_h <= 0: continue
                    
                    # 提取中心区域用于计算初始深度
                    roi_w, roi_h = int(box_w * 0.25), int(box_h * 0.25)
                    roi_x1, roi_y1 = x1 + (box_w - roi_w) // 2, y1 + (box_h - roi_h) // 2
                    roi_x2, roi_y2 = roi_x1 + roi_w, roi_y1 + roi_h
                    
                    depth_roi = pred_depth_filtered[roi_y1:roi_y2, roi_x1:roi_x2]
                    
                    # <<< 核心改动：使用中位数替代均值，增强鲁棒性 >>>
                    initial_depth = np.median(depth_roi) if depth_roi.size > 0 else 0.0

                    detections_with_depth.append([x1, y1, x2, y2, score, cls_id, initial_depth])
            
            # 步骤 4: 跟踪中 - 调用自定义跟踪器进行数据关联
            tracks = tracker.update(np.array(detections_with_depth)) if len(detections_with_depth) > 0 else np.empty((0, 8))

            # 步骤 5: 跟踪后 - 对稳定航迹进行精细深度计算和可视化
            active_track_ids = set()
            if tracks.shape[0] > 0:
                for track in tracks:
                    x1, y1, x2, y2 = map(int, track[:4])
                    track_id = int(track[4])
                    active_track_ids.add(track_id)
                    cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                    
                    box_w, box_h = x2 - x1, y2 - y1
                    if box_w <= 0 or box_h <= 0: continue

                    # 提取更宽的中心区域用于GMM聚类
                    roi_w, roi_h = int(box_w * 0.5), int(box_h * 0.5)
                    roi_x1, roi_y1 = max(x1 + (box_w - roi_w) // 2, 0), max(y1 + (box_h - roi_h) // 2, 0)
                    roi_x2, roi_y2 = min(roi_x1 + roi_w, width), min(roi_y1 + roi_h, height)
                    depth_roi = pred_depth_filtered[roi_y1:roi_y2, roi_x1:roi_x2]
                    
                    observed_depth = 0.0
                    if depth_roi.size > 10:
                        try:
                            pixels = depth_roi.flatten().reshape(-1, 1)
                            # 使用GMM+BIC分离前景和背景
                            n_components_range = range(1, 4)
                            lowest_bic = np.infty
                            best_gmm = None
                            for n_components in n_components_range:
                                gmm = GaussianMixture(n_components=n_components, random_state=0)
                                gmm.fit(pixels)
                                bic_score = gmm.bic(pixels)
                                if bic_score < lowest_bic:
                                    lowest_bic, best_gmm = bic_score, gmm
                            
                            cluster_means = best_gmm.means_.flatten()
                            
                            # 利用历史信息选择最可靠的观测值
                            if track_id in robust_depth_filters:
                                kf = robust_depth_filters[track_id]
                                kf.predict()
                                predicted_depth = kf.x[0]
                                observed_depth = min(cluster_means, key=lambda x: abs(x - predicted_depth))
                            else:
                                observed_depth = min(cluster_means) # 对于新目标，选择最近的聚类中心
                        except Exception:
                            observed_depth = np.median(depth_roi) if depth_roi.size > 0 else 0
                    elif depth_roi.size > 0:
                        observed_depth = np.median(depth_roi)
                    
                    if observed_depth <= 0: continue

                    # 使用独立的卡尔曼滤波器进行时序平滑
                    if track_id not in robust_depth_filters:
                        kf = FilterPyKalmanFilter(dim_x=2, dim_z=1)
                        kf.x = np.array([observed_depth, 0.])
                        kf.F = np.array([[1., 1.], [0., 1.]]); kf.H = np.array([[1., 0.]])
                        kf.P *= 100.; kf.R = 5; kf.Q = 0.1
                        robust_depth_filters[track_id] = kf
                    else:
                        kf = robust_depth_filters[track_id]
                        kf.update(observed_depth)

                    smoothed_depth = kf.x[0]
                    
                    # 可视化最终结果
                    depth_text = f"ID:{track_id} D:{smoothed_depth:.2f}m"
                    (text_w, text_h), _ = cv2.getTextSize(depth_text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
                    cv2.rectangle(annotated_frame, (x1, y1 - 25), (x1 + text_w + 5, y1 - 5), (0, 100, 0), -1)
                    cv2.putText(annotated_frame, depth_text, (x1 + 2, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
            
            # 清理不再活跃的目标的滤波器
            for inactive_id in set(robust_depth_filters.keys()) - active_track_ids:
                del robust_depth_filters[inactive_id]

            out.write(annotated_frame)
            pbar.update(1)

    cap.release()
    out.release()
    cv2.destroyAllWindows()
    print(f"\n--- 视频处理完成！输出保存在: {output_path} ---")

print(">>> [DEBUG] 步骤 4: 视频处理函数定义完成。\n" + "="*60 + "\n")

# ==============================================================================
# 5. 运行主程序
# ==============================================================================
print(">>> [DEBUG] 步骤 5: 开始执行主程序...")
try:
    process_video_with_robust_depth_fusion(INPUT_VIDEO_PATH, OUTPUT_VIDEO_PATH)
except Exception as e:
    print(f"!!! [FATAL ERROR] 在视频处理过程中发生严重错误: {e}")
    import traceback
    traceback.print_exc()
print(">>> [DEBUG] 步骤 5: 主程序执行完毕。\n" + "="*60)

>>> [DEBUG] 步骤 0: 检查关键库版本...
>>> [INFO] mmcv version: 1.7.2
>>> [INFO] timm version: 0.6.12
>>> [INFO] ultralytics version: 8.3.213
>>> [INFO] filterpy 和 scikit-learn (GMM) 库已成功导入。
>>> [DEBUG] 步骤 0: 检查完成。

>>> [DEBUG] 步骤 1: 开始导入核心库...
>>> [DEBUG] 核心库导入成功。
>>> [DEBUG] Metric3D 模块导入成功。
>>> [DEBUG] 步骤 1: 所有库导入完成。

>>> [DEBUG] 步骤 2: 配置模型和文件路径...
>>> [DEBUG] 所有文件路径检查通过。
>>> [DEBUG] 将要使用的设备: cuda
>>> [DEBUG] 步骤 2: 配置完成。

>>> [DEBUG] 步骤 3: 开始加载深度学习模型...
>>> [INFO] 目标类别 'Car' 已找到, ID为: 0
>>> [SUCCESS] Metric3Dv2 模型加载成功！
>>> [DEBUG] 步骤 3: 所有模型加载完成。

>>> [DEBUG] 步骤 4: 定义视频处理函数...
>>> [DEBUG] 步骤 4: 视频处理函数定义完成。

>>> [DEBUG] 步骤 5: 开始执行主程序...

--- 开始视频处理 (最终鲁棒深度融合追踪) ---
>>> [INFO] 输入视频信息: 1242x374 @ 1.00 FPS, 共 233 帧。


视频处理进度:   4%|▍         | 9/233 [00:02<01:12,  3.11it/s]


KeyboardInterrupt: 

In [2]:
# ==============================================================================
# 0. 关键依赖库检查 (用于调试)
# ==============================================================================
print(">>> [DEBUG] 步骤 0: 检查关键库版本...")
try:
    import mmcv
    import timm
    import ultralytics
    from filterpy.kalman import KalmanFilter
    from sklearn.mixture import GaussianMixture
    print(f">>> [INFO] mmcv version: {mmcv.__version__}")
    print(f">>> [INFO] timm version: {timm.__version__}")
    print(f">>> [INFO] ultralytics version: {ultralytics.__version__}")
    print(">>> [INFO] filterpy 和 scikit-learn (GMM) 库已成功导入。")
except ImportError as e:
    print(f"!!! [ERROR] 缺少核心库: {e}")
    raise
print(">>> [DEBUG] 步骤 0: 检查完成。\n" + "="*60 + "\n")

# ==============================================================================
# 1. 导入必要的库
# ==============================================================================
print(">>> [DEBUG] 步骤 1: 开始导入核心库...")
try:
    import cv2
    import torch
    import numpy as np
    from ultralytics import YOLO
    import sys
    import os
    from tqdm import tqdm
    from mmcv import Config
    from types import SimpleNamespace
    from filterpy.kalman import KalmanFilter as FilterPyKalmanFilter
    from sklearn.mixture import GaussianMixture
    # 导入我们自定义的、深度融合的跟踪器
    from custom_byte_tracker import ByteTracker
    print(">>> [DEBUG] 核心库导入成功。")
except ImportError as e:
    print(f"!!! [ERROR] 导入核心库失败: {e}")
    raise

# --- 导入 Metric3D 相关的模块 ---
METRIC3D_PATH = '/root/autodl-tmp/Metric3D'
if METRIC3D_PATH not in sys.path:
    sys.path.insert(0, METRIC3D_PATH)
try:
    from mono.model.monodepth_model import DepthModel as MonoDepthModel
    print(">>> [DEBUG] Metric3D 模块导入成功。")
except ImportError as e:
    print(f"!!! [ERROR] 从 Metric3D 导入模块失败: {e}")
    raise
print(">>> [DEBUG] 步骤 1: 所有库导入完成。\n" + "="*60 + "\n")

# ==============================================================================
# 2. 配置区域与路径检查
# ==============================================================================
print(">>> [DEBUG] 步骤 2: 配置模型和文件路径...")
YOLO_MODEL_PATH = '/root/autodl-tmp/weights/epoch30.pt'
METRIC3D_MODEL_PATH = '/root/autodl-tmp/weights/metric_depth_vit_large_800k.pth'
METRIC3D_CONFIG_PATH = '/root/autodl-tmp/Metric3D/mono/configs/HourglassDecoder/vit.raft5.large.py'
INPUT_VIDEO_PATH = '/root/autodl-tmp/kitti_videos/0002.mp4'
OUTPUT_VIDEO_PATH = '/root/autodl-tmp/output_final_robust_fusion2.mp4'

paths_to_check = {
    "YOLOv8 权重": YOLO_MODEL_PATH,
    "Metric3D 权重": METRIC3D_MODEL_PATH,
    "Metric3D 配置": METRIC3D_CONFIG_PATH,
    "输入视频": INPUT_VIDEO_PATH,
}
if not all(os.path.exists(p) for p in paths_to_check.values()):
    raise FileNotFoundError("一个或多个关键文件路径无效。")

print(">>> [DEBUG] 所有文件路径检查通过。")
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f">>> [DEBUG] 将要使用的设备: {DEVICE}")
print(">>> [DEBUG] 步骤 2: 配置完成。\n" + "="*60 + "\n")

# ==============================================================================
# 3. 模型加载
# ==============================================================================
print(">>> [DEBUG] 步骤 3: 开始加载深度学习模型...")
try:
    yolo_model = YOLO(YOLO_MODEL_PATH)
    TARGET_CLASS_NAME = 'Car'
    if hasattr(yolo_model, 'names') and isinstance(yolo_model.names, dict):
        TARGET_CLASS_ID = [k for k, v in yolo_model.names.items() if v == TARGET_CLASS_NAME][0]
        print(f">>> [INFO] 目标类别 '{TARGET_CLASS_NAME}' 已找到, ID为: {TARGET_CLASS_ID}")
    else:
        raise ValueError("YOLO 模型没有有效的 'names' 属性或格式不正确")
except Exception as e:
    print(f"!!! [ERROR] 加载 YOLOv8 模型失败: {e}")
    raise

try:
    cfg = Config.fromfile(METRIC3D_CONFIG_PATH)
    cfg.model.backbone.use_mask_token = False
    metric3d_model = MonoDepthModel(cfg).to(DEVICE)
    checkpoint = torch.load(METRIC3D_MODEL_PATH, map_location=DEVICE)
    state_dict = checkpoint.get('model_state_dict', checkpoint.get('model', checkpoint))
    metric3d_model.load_state_dict(state_dict, strict=False)
    metric3d_model.eval()
    print(">>> [SUCCESS] Metric3Dv2 模型加载成功！")
except Exception as e:
    print(f"!!! [FATAL ERROR] 加载 Metric3Dv2 模型时出错: {e}")
    raise
print(">>> [DEBUG] 步骤 3: 所有模型加载完成。\n" + "="*60 + "\n")

# ==============================================================================
# 4. 视频处理主函数 (最终整合版)
# ==============================================================================
print(">>> [DEBUG] 步骤 4: 定义视频处理函数...")
def process_video_with_robust_depth_fusion(input_path, output_path):
    print("\n--- 开始视频处理 (最终鲁棒深度融合追踪) ---")
    cap = cv2.VideoCapture(input_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    metric3d_input_size = (cfg.data_basic['vit_size'][1], cfg.data_basic['vit_size'][0])
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    print(f">>> [INFO] 输入视频信息: {width}x{height} @ {fps:.2f} FPS, 共 {total_frames} 帧。")

    # 初始化我们自定义的跟踪器
    tracker_args = SimpleNamespace(
        track_high_thresh=0.5,
        track_low_thresh=0.1,
        new_track_thresh=0.6,
        track_buffer=30,
        match_thresh=0.8,
        mot20=False
    )
    tracker = ByteTracker(args=tracker_args, frame_rate=fps)
    
    # 为精细化的后处理深度计算，独立维护一个卡尔曼滤波器字典
    robust_depth_filters = {}

    with tqdm(total=total_frames, desc="视频处理进度") as pbar:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            annotated_frame = frame.copy()

            # 步骤 1: 目标检测
            det_results = yolo_model(frame, classes=[TARGET_CLASS_ID], verbose=False)[0]

            # 步骤 2: 全局深度图预测
            with torch.no_grad():
                rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                rgb_frame_resized = cv2.resize(rgb_frame, metric3d_input_size)
                rgb_torch = torch.from_numpy(rgb_frame_resized).permute(2, 0, 1).unsqueeze(0).float().to(DEVICE) / 255.0
                pred_output = metric3d_model(data={'input': rgb_torch})
                pred_depth_np = pred_output[0].squeeze().cpu().numpy()
                pred_depth_resized = cv2.resize(pred_depth_np, (width, height)).astype(np.float32)
                pred_depth_filtered = cv2.bilateralFilter(pred_depth_resized, d=5, sigmaColor=0.2, sigmaSpace=15)

            # 步骤 3: 跟踪前 - 为每个检测框计算鲁棒的初始深度
            detections_with_depth = []
            if det_results.boxes.shape[0] > 0:
                for box in det_results.boxes:
                    x1, y1, x2, y2 = map(int, box.xyxy[0])
                    score = box.conf[0].item()
                    cls_id = box.cls[0].item()

                    box_w, box_h = x2 - x1, y2 - y1
                    if box_w <= 0 or box_h <= 0: continue
                    
                    # 提取中心区域用于计算初始深度
                    roi_w, roi_h = int(box_w * 0.25), int(box_h * 0.25)
                    roi_x1, roi_y1 = x1 + (box_w - roi_w) // 2, y1 + (box_h - roi_h) // 2
                    roi_x2, roi_y2 = roi_x1 + roi_w, roi_y1 + roi_h
                    
                    depth_roi = pred_depth_filtered[roi_y1:roi_y2, roi_x1:roi_x2]
                    
                    # <<< 核心改动：使用中位数替代均值，增强鲁棒性 >>>
                    initial_depth = np.median(depth_roi) if depth_roi.size > 0 else 0.0

                    detections_with_depth.append([x1, y1, x2, y2, score, cls_id, initial_depth])
            
            # 步骤 4: 跟踪中 - 调用自定义跟踪器进行数据关联
            tracks = tracker.update(np.array(detections_with_depth)) if len(detections_with_depth) > 0 else np.empty((0, 8))

            # 步骤 5: 跟踪后 - 对稳定航迹进行精细深度计算和可视化
            active_track_ids = set()
            if tracks.shape[0] > 0:
                for track in tracks:
                    x1, y1, x2, y2 = map(int, track[:4])
                    track_id = int(track[4])
                    active_track_ids.add(track_id)
                    cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                    
                    box_w, box_h = x2 - x1, y2 - y1
                    if box_w <= 0 or box_h <= 0: continue

                    # 提取更宽的中心区域用于GMM聚类
                    roi_w, roi_h = int(box_w * 0.5), int(box_h * 0.5)
                    roi_x1, roi_y1 = max(x1 + (box_w - roi_w) // 2, 0), max(y1 + (box_h - roi_h) // 2, 0)
                    roi_x2, roi_y2 = min(roi_x1 + roi_w, width), min(roi_y1 + roi_h, height)
                    depth_roi = pred_depth_filtered[roi_y1:roi_y2, roi_x1:roi_x2]
                    
                    observed_depth = 0.0
                    if depth_roi.size > 10:
                        try:
                            pixels = depth_roi.flatten().reshape(-1, 1)
                            # 使用GMM+BIC分离前景和背景
                            n_components_range = range(1, 4)
                            lowest_bic = np.infty
                            best_gmm = None
                            for n_components in n_components_range:
                                gmm = GaussianMixture(n_components=n_components, random_state=0)
                                gmm.fit(pixels)
                                bic_score = gmm.bic(pixels)
                                if bic_score < lowest_bic:
                                    lowest_bic, best_gmm = bic_score, gmm
                            
                            cluster_means = best_gmm.means_.flatten()
                            
                            # 利用历史信息选择最可靠的观测值
                            if track_id in robust_depth_filters:
                                kf = robust_depth_filters[track_id]
                                kf.predict()
                                predicted_depth = kf.x[0]
                                observed_depth = min(cluster_means, key=lambda x: abs(x - predicted_depth))
                            else:
                                observed_depth = min(cluster_means) # 对于新目标，选择最近的聚类中心
                        except Exception:
                            observed_depth = np.median(depth_roi) if depth_roi.size > 0 else 0
                    elif depth_roi.size > 0:
                        observed_depth = np.median(depth_roi)
                    
                    if observed_depth <= 0: continue

                    # 使用独立的卡尔曼滤波器进行时序平滑
                    if track_id not in robust_depth_filters:
                        kf = FilterPyKalmanFilter(dim_x=2, dim_z=1)
                        kf.x = np.array([observed_depth, 0.])
                        kf.F = np.array([[1., 1.], [0., 1.]]); kf.H = np.array([[1., 0.]])
                        kf.P *= 100.; kf.R = 5; kf.Q = 0.1
                        robust_depth_filters[track_id] = kf
                    else:
                        kf = robust_depth_filters[track_id]
                        kf.update(observed_depth)

                    smoothed_depth = kf.x[0]
                    
                    # 可视化最终结果
                    depth_text = f"ID:{track_id} D:{smoothed_depth:.2f}m"
                    (text_w, text_h), _ = cv2.getTextSize(depth_text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
                    cv2.rectangle(annotated_frame, (x1, y1 - 25), (x1 + text_w + 5, y1 - 5), (0, 100, 0), -1)
                    cv2.putText(annotated_frame, depth_text, (x1 + 2, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
            
            # 清理不再活跃的目标的滤波器
            for inactive_id in set(robust_depth_filters.keys()) - active_track_ids:
                del robust_depth_filters[inactive_id]

            out.write(annotated_frame)
            pbar.update(1)

    cap.release()
    out.release()
    cv2.destroyAllWindows()
    print(f"\n--- 视频处理完成！输出保存在: {output_path} ---")

print(">>> [DEBUG] 步骤 4: 视频处理函数定义完成。\n" + "="*60 + "\n")

# ==============================================================================
# 5. 运行主程序
# ==============================================================================
print(">>> [DEBUG] 步骤 5: 开始执行主程序...")
try:
    process_video_with_robust_depth_fusion(INPUT_VIDEO_PATH, OUTPUT_VIDEO_PATH)
except Exception as e:
    print(f"!!! [FATAL ERROR] 在视频处理过程中发生严重错误: {e}")
    import traceback
    traceback.print_exc()
print(">>> [DEBUG] 步骤 5: 主程序执行完毕。\n" + "="*60)

>>> [DEBUG] 步骤 0: 检查关键库版本...
>>> [INFO] mmcv version: 1.7.2
>>> [INFO] timm version: 0.6.12
>>> [INFO] ultralytics version: 8.3.213
>>> [INFO] filterpy 和 scikit-learn (GMM) 库已成功导入。
>>> [DEBUG] 步骤 0: 检查完成。

>>> [DEBUG] 步骤 1: 开始导入核心库...
>>> [DEBUG] 核心库导入成功。
>>> [DEBUG] Metric3D 模块导入成功。
>>> [DEBUG] 步骤 1: 所有库导入完成。

>>> [DEBUG] 步骤 2: 配置模型和文件路径...
>>> [DEBUG] 所有文件路径检查通过。
>>> [DEBUG] 将要使用的设备: cuda
>>> [DEBUG] 步骤 2: 配置完成。

>>> [DEBUG] 步骤 3: 开始加载深度学习模型...
>>> [INFO] 目标类别 'Car' 已找到, ID为: 0
>>> [SUCCESS] Metric3Dv2 模型加载成功！
>>> [DEBUG] 步骤 3: 所有模型加载完成。

>>> [DEBUG] 步骤 4: 定义视频处理函数...
>>> [DEBUG] 步骤 4: 视频处理函数定义完成。

>>> [DEBUG] 步骤 5: 开始执行主程序...

--- 开始视频处理 (最终鲁棒深度融合追踪) ---
>>> [INFO] 输入视频信息: 1242x374 @ 1.00 FPS, 共 233 帧。


视频处理进度:   4%|▍         | 9/233 [00:02<01:12,  3.11it/s]


KeyboardInterrupt: 

In [2]:
# ==============================================================================
# 0. 关键依赖库检查 (用于调试)//批量化
# ==============================================================================
print(">>> [DEBUG] 步骤 0: 检查关键库版本...")
try:
    import mmcv
    import timm
    import ultralytics
    from filterpy.kalman import KalmanFilter
    from sklearn.mixture import GaussianMixture
    print(f">>> [INFO] mmcv version: {mmcv.__version__}")
    print(f">>> [INFO] timm version: {timm.__version__}")
    print(f">>> [INFO] ultralytics version: {ultralytics.__version__}")
    print(">>> [INFO] filterpy 和 scikit-learn (GMM) 库已成功导入。")
except ImportError as e:
    print(f"!!! [ERROR] 缺少核心库: {e}")
    raise
print(">>> [DEBUG] 步骤 0: 检查完成。\n" + "="*60 + "\n")

# ==============================================================================
# 1. 导入必要的库
# ==============================================================================
print(">>> [DEBUG] 步骤 1: 开始导入核心库...")
try:
    import cv2
    import torch
    import numpy as np
    from ultralytics import YOLO
    import sys
    import os
    from tqdm import tqdm
    from mmcv import Config
    from types import SimpleNamespace
    from filterpy.kalman import KalmanFilter as FilterPyKalmanFilter
    from sklearn.mixture import GaussianMixture
    # 导入我们自定义的、深度融合的跟踪器
    from custom_byte_tracker import ByteTracker
    print(">>> [DEBUG] 核心库导入成功。")
except ImportError as e:
    print(f"!!! [ERROR] 导入核心库失败: {e}")
    raise

# --- 导入 Metric3D 相关的模块 ---
METRIC3D_PATH = '/root/autodl-tmp/Metric3D'
if METRIC3D_PATH not in sys.path:
    sys.path.insert(0, METRIC3D_PATH)
try:
    from mono.model.monodepth_model import DepthModel as MonoDepthModel
    print(">>> [DEBUG] Metric3D 模块导入成功。")
except ImportError as e:
    print(f"!!! [ERROR] 从 Metric3D 导入模块失败: {e}")
    raise
print(">>> [DEBUG] 步骤 1: 所有库导入完成。\n" + "="*60 + "\n")

# ==============================================================================
# 2. 配置区域与路径检查
# ==============================================================================
print(">>> [DEBUG] 步骤 2: 配置模型和文件路径...")
YOLO_MODEL_PATH = '/root/autodl-tmp/weights/epoch30.pt'
METRIC3D_MODEL_PATH = '/root/autodl-tmp/weights/metric_depth_vit_large_800k.pth'
METRIC3D_CONFIG_PATH = '/root/autodl-tmp/Metric3D/mono/configs/HourglassDecoder/vit.raft5.large.py'
INPUT_VIDEO_PATH = '/root/autodl-tmp/kitti_videos/0002.mp4'
OUTPUT_VIDEO_PATH = '/root/autodl-tmp/output_final_robust_fusion2.mp4'

paths_to_check = {
    "YOLOv8 权重": YOLO_MODEL_PATH,
    "Metric3D 权重": METRIC3D_MODEL_PATH,
    "Metric3D 配置": METRIC3D_CONFIG_PATH,
    "输入视频": INPUT_VIDEO_PATH,
}
if not all(os.path.exists(p) for p in paths_to_check.values()):
    raise FileNotFoundError("一个或多个关键文件路径无效。")

print(">>> [DEBUG] 所有文件路径检查通过。")
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f">>> [DEBUG] 将要使用的设备: {DEVICE}")
print(">>> [DEBUG] 步骤 2: 配置完成。\n" + "="*60 + "\n")

# ==============================================================================
# 3. 模型加载
# ==============================================================================
print(">>> [DEBUG] 步骤 3: 开始加载深度学习模型...")
try:
    yolo_model = YOLO(YOLO_MODEL_PATH)
    TARGET_CLASS_NAME = 'Car'
    if hasattr(yolo_model, 'names') and isinstance(yolo_model.names, dict):
        TARGET_CLASS_ID = [k for k, v in yolo_model.names.items() if v == TARGET_CLASS_NAME][0]
        print(f">>> [INFO] 目标类别 '{TARGET_CLASS_NAME}' 已找到, ID为: {TARGET_CLASS_ID}")
    else:
        raise ValueError("YOLO 模型没有有效的 'names' 属性或格式不正确")
except Exception as e:
    print(f"!!! [ERROR] 加载 YOLOv8 模型失败: {e}")
    raise

try:
    cfg = Config.fromfile(METRIC3D_CONFIG_PATH)
    cfg.model.backbone.use_mask_token = False
    metric3d_model = MonoDepthModel(cfg).to(DEVICE)
    checkpoint = torch.load(METRIC3D_MODEL_PATH, map_location=DEVICE)
    state_dict = checkpoint.get('model_state_dict', checkpoint.get('model', checkpoint))
    metric3d_model.load_state_dict(state_dict, strict=False)
    metric3d_model.eval()
    print(">>> [SUCCESS] Metric3Dv2 模型加载成功！")
except Exception as e:
    print(f"!!! [FATAL ERROR] 加载 Metric3Dv2 模型时出错: {e}")
    raise
print(">>> [DEBUG] 步骤 3: 所有模型加载完成。\n" + "="*60 + "\n")

# ==============================================================================
# 4. 视频处理主函数 (最终整合版)
# ==============================================================================
print(">>> [DEBUG] 步骤 4: 定义视频处理函数...")
def process_video_with_robust_depth_fusion(input_path, output_path):
    print("\n--- 开始视频处理 (最终鲁棒深度融合追踪) ---")
    cap = cv2.VideoCapture(input_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    metric3d_input_size = (cfg.data_basic['vit_size'][1], cfg.data_basic['vit_size'][0])
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    print(f">>> [INFO] 输入视频信息: {width}x{height} @ {fps:.2f} FPS, 共 {total_frames} 帧。")

    # 初始化我们自定义的跟踪器
    tracker_args = SimpleNamespace(
        track_high_thresh=0.5,
        track_low_thresh=0.1,
        new_track_thresh=0.6,
        track_buffer=30,
        match_thresh=0.8,
        mot20=False
    )
    tracker = ByteTracker(args=tracker_args, frame_rate=fps)
    
    # 为精细化的后处理深度计算，独立维护一个卡尔曼滤波器字典
    robust_depth_filters = {}

    with tqdm(total=total_frames, desc="视频处理进度") as pbar:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            annotated_frame = frame.copy()

            # 步骤 1: 目标检测
            det_results = yolo_model(frame, classes=[TARGET_CLASS_ID], verbose=False)[0]

            # 步骤 2: 全局深度图预测
            with torch.no_grad():
                rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                rgb_frame_resized = cv2.resize(rgb_frame, metric3d_input_size)
                rgb_torch = torch.from_numpy(rgb_frame_resized).permute(2, 0, 1).unsqueeze(0).float().to(DEVICE) / 255.0
                pred_output = metric3d_model(data={'input': rgb_torch})
                pred_depth_np = pred_output[0].squeeze().cpu().numpy()
                pred_depth_resized = cv2.resize(pred_depth_np, (width, height)).astype(np.float32)
                pred_depth_filtered = cv2.bilateralFilter(pred_depth_resized, d=5, sigmaColor=0.2, sigmaSpace=15)

            # 步骤 3: 跟踪前 - 为每个检测框计算鲁棒的初始深度
            detections_with_depth = []
            if det_results.boxes.shape[0] > 0:
                for box in det_results.boxes:
                    x1, y1, x2, y2 = map(int, box.xyxy[0])
                    score = box.conf[0].item()
                    cls_id = box.cls[0].item()

                    box_w, box_h = x2 - x1, y2 - y1
                    if box_w <= 0 or box_h <= 0: continue
                    
                    # 提取中心区域用于计算初始深度
                    roi_w, roi_h = int(box_w * 0.25), int(box_h * 0.25)
                    roi_x1, roi_y1 = x1 + (box_w - roi_w) // 2, y1 + (box_h - roi_h) // 2
                    roi_x2, roi_y2 = roi_x1 + roi_w, roi_y1 + roi_h
                    
                    depth_roi = pred_depth_filtered[roi_y1:roi_y2, roi_x1:roi_x2]
                    
                    # <<< 核心改动：使用中位数替代均值，增强鲁棒性 >>>
                    initial_depth = np.median(depth_roi) if depth_roi.size > 0 else 0.0

                    detections_with_depth.append([x1, y1, x2, y2, score, cls_id, initial_depth])
            
            # 步骤 4: 跟踪中 - 调用自定义跟踪器进行数据关联
            tracks = tracker.update(np.array(detections_with_depth)) if len(detections_with_depth) > 0 else np.empty((0, 8))

            # 步骤 5: 跟踪后 - 对稳定航迹进行精细深度计算和可视化
            active_track_ids = set()
            if tracks.shape[0] > 0:
                for track in tracks:
                    x1, y1, x2, y2 = map(int, track[:4])
                    track_id = int(track[4])
                    active_track_ids.add(track_id)
                    cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                    
                    box_w, box_h = x2 - x1, y2 - y1
                    if box_w <= 0 or box_h <= 0: continue

                    # 提取更宽的中心区域用于GMM聚类
                    roi_w, roi_h = int(box_w * 0.5), int(box_h * 0.5)
                    roi_x1, roi_y1 = max(x1 + (box_w - roi_w) // 2, 0), max(y1 + (box_h - roi_h) // 2, 0)
                    roi_x2, roi_y2 = min(roi_x1 + roi_w, width), min(roi_y1 + roi_h, height)
                    depth_roi = pred_depth_filtered[roi_y1:roi_y2, roi_x1:roi_x2]
                    
                    observed_depth = 0.0
                    if depth_roi.size > 10:
                        try:
                            pixels = depth_roi.flatten().reshape(-1, 1)
                            # 使用GMM+BIC分离前景和背景
                            n_components_range = range(1, 4)
                            lowest_bic = np.infty
                            best_gmm = None
                            for n_components in n_components_range:
                                gmm = GaussianMixture(n_components=n_components, random_state=0)
                                gmm.fit(pixels)
                                bic_score = gmm.bic(pixels)
                                if bic_score < lowest_bic:
                                    lowest_bic, best_gmm = bic_score, gmm
                            
                            cluster_means = best_gmm.means_.flatten()
                            
                            # 利用历史信息选择最可靠的观测值
                            if track_id in robust_depth_filters:
                                kf = robust_depth_filters[track_id]
                                kf.predict()
                                predicted_depth = kf.x[0]
                                observed_depth = min(cluster_means, key=lambda x: abs(x - predicted_depth))
                            else:
                                observed_depth = min(cluster_means) # 对于新目标，选择最近的聚类中心
                        except Exception:
                            observed_depth = np.median(depth_roi) if depth_roi.size > 0 else 0
                    elif depth_roi.size > 0:
                        observed_depth = np.median(depth_roi)
                    
                    if observed_depth <= 0: continue

                    # 使用独立的卡尔曼滤波器进行时序平滑
                    if track_id not in robust_depth_filters:
                        kf = FilterPyKalmanFilter(dim_x=2, dim_z=1)
                        kf.x = np.array([observed_depth, 0.])
                        kf.F = np.array([[1., 1.], [0., 1.]]); kf.H = np.array([[1., 0.]])
                        kf.P *= 100.; kf.R = 5; kf.Q = 0.1
                        robust_depth_filters[track_id] = kf
                    else:
                        kf = robust_depth_filters[track_id]
                        kf.update(observed_depth)

                    smoothed_depth = kf.x[0]
                    
                    # 可视化最终结果
                    depth_text = f"ID:{track_id} D:{smoothed_depth:.2f}m"
                    (text_w, text_h), _ = cv2.getTextSize(depth_text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
                    cv2.rectangle(annotated_frame, (x1, y1 - 25), (x1 + text_w + 5, y1 - 5), (0, 100, 0), -1)
                    cv2.putText(annotated_frame, depth_text, (x1 + 2, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
            
            # 清理不再活跃的目标的滤波器
            for inactive_id in set(robust_depth_filters.keys()) - active_track_ids:
                del robust_depth_filters[inactive_id]

            out.write(annotated_frame)
            pbar.update(1)

    cap.release()
    out.release()
    cv2.destroyAllWindows()
    print(f"\n--- 视频处理完成！输出保存在: {output_path} ---")

print(">>> [DEBUG] 步骤 4: 视频处理函数定义完成。\n" + "="*60 + "\n")

# ==============================================================================
# 5. 运行主程序
# ==============================================================================
print(">>> [DEBUG] 步骤 5: 开始执行主程序...")
try:
    process_video_with_robust_depth_fusion(INPUT_VIDEO_PATH, OUTPUT_VIDEO_PATH)
except Exception as e:
    print(f"!!! [FATAL ERROR] 在视频处理过程中发生严重错误: {e}")
    import traceback
    traceback.print_exc()
print(">>> [DEBUG] 步骤 5: 主程序执行完毕。\n" + "="*60)

>>> [DEBUG] 步骤 0: 检查关键库版本...
>>> [INFO] mmcv version: 1.7.2
>>> [INFO] timm version: 0.6.12
>>> [INFO] ultralytics version: 8.3.213
>>> [INFO] filterpy 和 scikit-learn (GMM) 库已成功导入。
>>> [DEBUG] 步骤 0: 检查完成。

>>> [DEBUG] 步骤 1: 开始导入核心库...
>>> [DEBUG] 核心库导入成功。
>>> [DEBUG] Metric3D 模块导入成功。
>>> [DEBUG] 步骤 1: 所有库导入完成。

>>> [DEBUG] 步骤 2: 配置模型和文件路径...
>>> [DEBUG] 所有文件路径检查通过。
>>> [DEBUG] 将要使用的设备: cuda
>>> [DEBUG] 步骤 2: 配置完成。

>>> [DEBUG] 步骤 3: 开始加载深度学习模型...
>>> [INFO] 目标类别 'Car' 已找到, ID为: 0
>>> [SUCCESS] Metric3Dv2 模型加载成功！
>>> [DEBUG] 步骤 3: 所有模型加载完成。

>>> [DEBUG] 步骤 4: 定义视频处理函数...
>>> [DEBUG] 步骤 4: 视频处理函数定义完成。

>>> [DEBUG] 步骤 5: 开始执行主程序...

--- 开始视频处理 (最终鲁棒深度融合追踪) ---
>>> [INFO] 输入视频信息: 1242x374 @ 1.00 FPS, 共 233 帧。


视频处理进度:   4%|▍         | 9/233 [00:02<01:12,  3.11it/s]


KeyboardInterrupt: 

In [2]:
# /root/autodl-tmp/batch_process.py
import cv2
import torch
import numpy as np
from ultralytics import YOLO
import sys
import os
import glob
from tqdm import tqdm
from mmcv import Config
from types import SimpleNamespace
# We still need STrack to reset the ID, but not for format conversion
from custom_byte_tracker import ByteTracker, STrack

# ==============================================================================
# 1. 导入 Metric3D 模块
# ==============================================================================
print(">>> [DEBUG] 步骤 1: 导入 Metric3D 模块...")
METRIC3D_PATH = '/root/autodl-tmp/Metric3D'
if METRIC3D_PATH not in sys.path:
    sys.path.insert(0, METRIC3D_PATH)
try:
    from mono.model.monodepth_model import DepthModel as MonoDepthModel
    print(">>> [INFO] Metric3D 模块导入成功。")
except ImportError as e:
    print(f"!!! [ERROR] 从 Metric3D 导入模块失败: {e}")
    raise

# ==============================================================================
# 2. 配置与路径定义
# ==============================================================================
print("\n>>> [DEBUG] 步骤 2: 配置模型和文件路径...")
YOLO_MODEL_PATH = '/root/autodl-tmp/weights/epoch30.pt'
METRIC3D_MODEL_PATH = '/root/autodl-tmp/weights/metric_depth_vit_large_800k.pth'
METRIC3D_CONFIG_PATH = '/root/autodl-tmp/Metric3D/mono/configs/HourglassDecoder/vit.raft5.large.py'
INPUT_VIDEOS_DIR = '/root/autodl-tmp/kitti_videos/' # <-- MAKE SURE THIS PATH IS CORRECT
OUTPUT_EVAL_DIR = '/root/autodl-tmp/eval_outputs/'

os.makedirs(OUTPUT_EVAL_DIR, exist_ok=True)
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f">>> [INFO] 将要使用的设备: {DEVICE}")

# ==============================================================================
# 3. 模型加载 (全局加载一次)
# ==============================================================================
print("\n>>> [DEBUG] 步骤 3: 开始加载深度学习模型...")
try:
    yolo_model = YOLO(YOLO_MODEL_PATH)
    TARGET_CLASS_NAME = 'Car'
    TARGET_CLASS_ID = [k for k, v in yolo_model.names.items() if v == TARGET_CLASS_NAME][0]
    print(f">>> [INFO] 目标类别 '{TARGET_CLASS_NAME}' ID为: {TARGET_CLASS_ID}")
except Exception as e:
    print(f"!!! [ERROR] 加载 YOLOv8 模型失败: {e}")
    raise

try:
    cfg = Config.fromfile(METRIC3D_CONFIG_PATH)
    cfg.model.backbone.use_mask_token = False
    metric3d_model = MonoDepthModel(cfg).to(DEVICE)
    checkpoint = torch.load(METRIC3D_MODEL_PATH, map_location=DEVICE)
    state_dict = checkpoint.get('model_state_dict', checkpoint.get('model', checkpoint))
    metric3d_model.load_state_dict(state_dict, strict=False)
    metric3d_model.eval()
    print(">>> [SUCCESS] Metric3Dv2 模型加载成功！")
except Exception as e:
    print(f"!!! [FATAL ERROR] 加载 Metric3Dv2 模型时出错: {e}")
    raise

# ==============================================================================
# 4. 视频处理主函数
# ==============================================================================
def process_video_for_eval(input_path, output_txt_path):
    print(f"\n--- 开始处理视频: {os.path.basename(input_path)} ---")
    cap = cv2.VideoCapture(input_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    metric3d_input_size = (cfg.data_basic['vit_size'][1], cfg.data_basic['vit_size'][0])
    
    tracker_args = SimpleNamespace(track_high_thresh=0.5, track_low_thresh=0.1, new_track_thresh=0.6, 
                                     track_buffer=30, match_thresh=0.8, mot20=False)
    tracker = ByteTracker(args=tracker_args, frame_rate=fps)
    STrack.release_id()
    
    # MODIFIED: Frame count now starts at 0 for KITTI format
    frame_count = 0
    with open(output_txt_path, 'w') as f_out:
        with tqdm(total=total_frames, desc=f"处理 {os.path.basename(input_path)}") as pbar:
            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break
                
                # a. 目标检测
                det_results = yolo_model(frame, classes=[TARGET_CLASS_ID], verbose=False)[0]

                # b. 深度估计
                with torch.no_grad():
                    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                    rgb_frame_resized = cv2.resize(rgb_frame, metric3d_input_size)
                    rgb_torch = torch.from_numpy(rgb_frame_resized).permute(2, 0, 1).unsqueeze(0).float().to(DEVICE) / 255.0
                    pred_output = metric3d_model(data={'input': rgb_torch})
                    pred_depth_np = pred_output[0].squeeze().cpu().numpy()
                    pred_depth_filtered = cv2.resize(pred_depth_np, (width, height))

                # c. 准备带深度的检测结果
                detections_with_depth = []
                if det_results.boxes.shape[0] > 0:
                    for box in det_results.boxes:
                        x1, y1, x2, y2 = map(int, box.xyxy[0])
                        score = box.conf[0].item()
                        cls_id = box.cls[0].item()
                        
                        roi_w, roi_h = int((x2 - x1) * 0.25), int((y2 - y1) * 0.25)
                        roi_x1, roi_y1 = x1 + ((x2-x1) - roi_w) // 2, y1 + ((y2-y1) - roi_h) // 2
                        depth_roi = pred_depth_filtered[roi_y1:roi_y1+roi_h, roi_x1:roi_x1+roi_w]
                        initial_depth = np.median(depth_roi) if depth_roi.size > 0 else 0.0
                        detections_with_depth.append([x1, y1, x2, y2, score, cls_id, initial_depth])

                # d. 更新跟踪器
                # The output format is [x1, y1, x2, y2, track_id, score, class_id, depth]
                tracks = tracker.update(np.array(detections_with_depth)) if len(detections_with_depth) > 0 else np.empty((0, 8))

                # ========================================================================
                # MODIFIED: Write results in the requested KITTI tracking format
                # ========================================================================
                if tracks.shape[0] > 0:
                    for track in tracks:
                        bb_left, bb_top, bb_right, bb_bottom = track[0], track[1], track[2], track[3]
                        track_id = int(track[4])
                        score = track[5]
                        
                        # Write the 17-column KITTI format string
                        f_out.write(
                            f"{frame_count} {track_id} {TARGET_CLASS_NAME} -1 -1 -10 "
                            f"{bb_left:.2f} {bb_top:.2f} {bb_right:.2f} {bb_bottom:.2f} "
                            f"-1 -1 -1 -1000 -1000 -1000 -10 {score:.4f}\n"
                        )
                
                # MODIFIED: Increment frame count at the end of the loop
                frame_count += 1
                pbar.update(1)

    cap.release()
    print(f"--- 处理完成！输出已保存至: {output_txt_path} ---")

# ==============================================================================
# 5. 批量处理主程序
# ==============================================================================
if __name__ == '__main__':
    print("\n>>> [DEBUG] 步骤 5: 开始执行批量处理主程序...")
    
    video_files = glob.glob(os.path.join(INPUT_VIDEOS_DIR, '*.mp4'))
    if not video_files:
        # Note: The error log showed kitti_videos, but doc specified input_videos. Double-check your path.
        print(f"!!! [WARNING] 在目录 {INPUT_VIDEOS_DIR} 中未找到任何 .mp4 视频文件。")
    else:
        print(f">>> [INFO] 找到 {len(video_files)} 个视频文件进行处理。")

    for video_path in sorted(video_files):
        try:
            video_name = os.path.basename(video_path)
            output_name = os.path.splitext(video_name)[0] + '.txt'
            output_path = os.path.join(OUTPUT_EVAL_DIR, output_name)
            
            process_video_for_eval(video_path, output_path)
            
        except Exception as e:
            print(f"!!! [FATAL ERROR] 处理视频 {video_path} 时发生严重错误: {e}")
            import traceback
            traceback.print_exc()
            continue

    print("\n>>> [DEBUG] 所有视频处理完毕。\n" + "="*60)

  from pkg_resources import packaging  # type: ignore[attr-defined]


>>> [DEBUG] 步骤 1: 导入 Metric3D 模块...
>>> [INFO] Metric3D 模块导入成功。

>>> [DEBUG] 步骤 2: 配置模型和文件路径...
>>> [INFO] 将要使用的设备: cuda

>>> [DEBUG] 步骤 3: 开始加载深度学习模型...
>>> [INFO] 目标类别 'Car' ID为: 0


  from .autonotebook import tqdm as notebook_tqdm


>>> [SUCCESS] Metric3Dv2 模型加载成功！

>>> [DEBUG] 步骤 5: 开始执行批量处理主程序...
>>> [INFO] 找到 21 个视频文件进行处理。

--- 开始处理视频: 0000.mp4 ---


处理 0000.mp4: 100%|██████████| 154/154 [00:39<00:00,  3.89it/s]


--- 处理完成！输出已保存至: /root/autodl-tmp/eval_outputs/0000.txt ---

--- 开始处理视频: 0001.mp4 ---


处理 0001.mp4: 100%|██████████| 447/447 [01:53<00:00,  3.94it/s]


--- 处理完成！输出已保存至: /root/autodl-tmp/eval_outputs/0001.txt ---

--- 开始处理视频: 0002.mp4 ---


处理 0002.mp4: 100%|██████████| 233/233 [00:59<00:00,  3.94it/s]


--- 处理完成！输出已保存至: /root/autodl-tmp/eval_outputs/0002.txt ---

--- 开始处理视频: 0003.mp4 ---


处理 0003.mp4: 100%|██████████| 144/144 [00:36<00:00,  3.93it/s]


--- 处理完成！输出已保存至: /root/autodl-tmp/eval_outputs/0003.txt ---

--- 开始处理视频: 0004.mp4 ---


处理 0004.mp4: 100%|██████████| 314/314 [01:19<00:00,  3.93it/s]


--- 处理完成！输出已保存至: /root/autodl-tmp/eval_outputs/0004.txt ---

--- 开始处理视频: 0005.mp4 ---


处理 0005.mp4: 100%|██████████| 297/297 [01:15<00:00,  3.91it/s]


--- 处理完成！输出已保存至: /root/autodl-tmp/eval_outputs/0005.txt ---

--- 开始处理视频: 0006.mp4 ---


处理 0006.mp4: 100%|██████████| 270/270 [01:09<00:00,  3.91it/s]


--- 处理完成！输出已保存至: /root/autodl-tmp/eval_outputs/0006.txt ---

--- 开始处理视频: 0007.mp4 ---


处理 0007.mp4: 100%|██████████| 800/800 [03:23<00:00,  3.93it/s]


--- 处理完成！输出已保存至: /root/autodl-tmp/eval_outputs/0007.txt ---

--- 开始处理视频: 0008.mp4 ---


处理 0008.mp4: 100%|██████████| 390/390 [01:39<00:00,  3.93it/s]


--- 处理完成！输出已保存至: /root/autodl-tmp/eval_outputs/0008.txt ---

--- 开始处理视频: 0009.mp4 ---


处理 0009.mp4: 100%|██████████| 803/803 [03:24<00:00,  3.92it/s]


--- 处理完成！输出已保存至: /root/autodl-tmp/eval_outputs/0009.txt ---

--- 开始处理视频: 0010.mp4 ---


处理 0010.mp4: 100%|██████████| 294/294 [01:15<00:00,  3.91it/s]


--- 处理完成！输出已保存至: /root/autodl-tmp/eval_outputs/0010.txt ---

--- 开始处理视频: 0011.mp4 ---


处理 0011.mp4: 100%|██████████| 373/373 [01:35<00:00,  3.89it/s]


--- 处理完成！输出已保存至: /root/autodl-tmp/eval_outputs/0011.txt ---

--- 开始处理视频: 0012.mp4 ---


处理 0012.mp4: 100%|██████████| 78/78 [00:19<00:00,  3.94it/s]


--- 处理完成！输出已保存至: /root/autodl-tmp/eval_outputs/0012.txt ---

--- 开始处理视频: 0013.mp4 ---


处理 0013.mp4: 100%|██████████| 340/340 [01:25<00:00,  3.96it/s]


--- 处理完成！输出已保存至: /root/autodl-tmp/eval_outputs/0013.txt ---

--- 开始处理视频: 0014.mp4 ---


处理 0014.mp4: 100%|██████████| 106/106 [00:27<00:00,  3.90it/s]


--- 处理完成！输出已保存至: /root/autodl-tmp/eval_outputs/0014.txt ---

--- 开始处理视频: 0015.mp4 ---


处理 0015.mp4: 100%|██████████| 376/376 [01:35<00:00,  3.94it/s]


--- 处理完成！输出已保存至: /root/autodl-tmp/eval_outputs/0015.txt ---

--- 开始处理视频: 0016.mp4 ---


处理 0016.mp4: 100%|██████████| 209/209 [00:53<00:00,  3.92it/s]


--- 处理完成！输出已保存至: /root/autodl-tmp/eval_outputs/0016.txt ---

--- 开始处理视频: 0017.mp4 ---


处理 0017.mp4: 100%|██████████| 145/145 [00:36<00:00,  3.93it/s]


--- 处理完成！输出已保存至: /root/autodl-tmp/eval_outputs/0017.txt ---

--- 开始处理视频: 0018.mp4 ---


处理 0018.mp4: 100%|██████████| 339/339 [01:26<00:00,  3.91it/s]


--- 处理完成！输出已保存至: /root/autodl-tmp/eval_outputs/0018.txt ---

--- 开始处理视频: 0019.mp4 ---


处理 0019.mp4: 100%|██████████| 1059/1059 [04:29<00:00,  3.93it/s]


--- 处理完成！输出已保存至: /root/autodl-tmp/eval_outputs/0019.txt ---

--- 开始处理视频: 0020.mp4 ---


处理 0020.mp4: 100%|██████████| 837/837 [03:34<00:00,  3.91it/s]

--- 处理完成！输出已保存至: /root/autodl-tmp/eval_outputs/0020.txt ---

>>> [DEBUG] 所有视频处理完毕。



