In [1]:
# 第一块：抽帧 + YOLO检测写入文件（自包含，不导入 PS_yolov3）
import os, json, subprocess, shutil, tempfile
from pathlib import Path
from typing import Dict, List, Tuple

from PIL import Image as PILImage
import numpy as np

# 路径配置（如需，请按你的设备环境调整）
DARKNET_BIN = "darknet"   # Darknet 可执行文件
SRC_DIR     = "."                   # 图片/视频与结果目录根
MODELS_DIR  = "models"    # 模型目录

# YOLOv3-tiny 模型配置
CFG_PATH     = os.path.join(MODELS_DIR, "yolov3-tiny.cfg")
WEIGHTS_PATH = os.path.join(MODELS_DIR, "yolov3-tiny.weights")
NAMES_PATH   = os.path.join(MODELS_DIR, "coco.names")
DATA_PATH    = os.path.join(MODELS_DIR, "coco.data")    # 自动生成，绑定 names
CONF_THRESH  = 0.55                                     # 置信度阈值

def _write_coco_data(data_path: str, names_path: str, classes: int = 80):
    lines = []
    with open(names_path, "r", encoding="utf-8") as f:
        for t in f:
            t = t.strip()
            if t:
                lines.append(t)
    classes = len(lines) if lines else classes
    content = f"""classes={classes}
names={names_path}
train=ignored.txt
valid=ignored.txt
backup=backup/
"""
    with open(data_path, "w", encoding="utf-8") as f:
        f.write(content)

def run_darknet_v3_inmem(image_path: str, thresh: float = CONF_THRESH):
    exec_dir = Path(DARKNET_BIN).parent
    assert os.path.exists(DARKNET_BIN), f"未找到 Darknet: {DARKNET_BIN}"
    assert os.path.exists(CFG_PATH),     f"未找到 cfg: {CFG_PATH}"
    assert os.path.exists(WEIGHTS_PATH), f"未找到 weights: {WEIGHTS_PATH}"
    assert os.path.exists(NAMES_PATH),   f"未找到 coco.names: {NAMES_PATH}"
    assert os.path.exists(image_path),   f"未找到图片: {image_path}"
    _write_coco_data(DATA_PATH, NAMES_PATH)

    with tempfile.NamedTemporaryFile(delete=False, suffix=".json") as tf:
        out_json_path = tf.name
    cmd = [
        DARKNET_BIN, "detector", "test",
        DATA_PATH, CFG_PATH, WEIGHTS_PATH, image_path,
        "-thresh", str(thresh), "-dont_show", "-ext_output", "-out", str(out_json_path)
    ]
    print("Running:", " ".join(cmd))
    proc = subprocess.run(cmd, cwd=str(exec_dir), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
    print(proc.stdout)
    if proc.returncode != 0:
        print(proc.stderr or proc.stdout)
        print(f"Darknet 执行失败：{image_path}")
        try: os.path.exists(out_json_path) and os.unlink(out_json_path)
        except Exception: pass
        return False, None

    if not os.path.exists(out_json_path):
        print(f"未生成 JSON：{out_json_path}")
        return False, None

    try:
        with open(out_json_path, "r", encoding="utf-8") as f:
            data = json.load(f)
    finally:
        try: os.unlink(out_json_path)
        except Exception: pass
    return True, data

def list_videos(dir_path: str, exts=("mp4", "avi", "mov", "mkv")) -> List[str]:
    p = Path(dir_path)
    files = []
    for ext in exts:
        files += sorted([str(x) for x in p.glob(f"*.{ext}")])
    return files

def extract_video_frames(video_path: str, out_dir: str, sample_every: int = 3, resize_to: tuple = (416, 416)) -> Tuple[List[str], float]:
    import cv2
    cap = cv2.VideoCapture(video_path)
    assert cap.isOpened(), f"无法打开视频: {video_path}"
    fps = cap.get(cv2.CAP_PROP_FPS) or 0.0
    Path(out_dir).mkdir(parents=True, exist_ok=True)
    saved = []; idx = 0; frame_id = 0
    while True:
        ret, frame = cap.read()
        if not ret: break
        if idx % sample_every != 0:
            idx += 1; continue
        if resize_to:
            frame = cv2.resize(frame, resize_to, interpolation=cv2.INTER_LINEAR)
        fname = Path(out_dir) / f"frame_{frame_id:06d}.jpg"
        cv2.imwrite(str(fname), frame)
        saved.append(str(fname))
        frame_id += 1; idx += 1
    cap.release()
    return saved, float(fps)

def _result_root_for_video(video_path: str) -> Path:
    stem = Path(video_path).stem
    return Path(SRC_DIR) / "result" / stem

def _ensure_dir(p: Path):
    p.mkdir(parents=True, exist_ok=True)

def _write_json(path: Path, obj):
    with open(path, "w", encoding="utf-8") as f:
        json.dump(obj, f, ensure_ascii=False, indent=2)

def _select_top1_detection(data_obj):
    def _score(d):
        try:
            return float(d.get("prob", d.get("score", d.get("confidence", -1.0))))
        except Exception:
            return -1.0
    if isinstance(data_obj, dict):
        for k in ("objects", "detections", "boxes"):
            if k in data_obj and isinstance(data_obj[k], list):
                items = data_obj[k]
                if items:
                    top = max(items, key=_score)
                    data_obj[k] = [top]
                else:
                    data_obj[k] = []
                return data_obj
        for v in data_obj.values():
            if isinstance(v, list) and v and isinstance(v[0], dict):
                top = max(v, key=_score) if v else None
                v[:] = ([top] if top else [])
                return data_obj
        return data_obj
    if isinstance(data_obj, list):
        if data_obj and isinstance(data_obj[0], dict) and "objects" in data_obj[0]:
            for el in data_obj:
                if isinstance(el, dict) and isinstance(el.get("objects"), list):
                    items = el["objects"]
                    if items:
                        top = max(items, key=_score)
                        el["objects"] = [top]
                    else:
                        el["objects"] = []
            return data_obj
        top = max(data_obj, key=_score) if data_obj else None
        return ([top] if top else [])
    return data_obj

def yolo_stage_dump(
    video_dir: str,
    sample_every: int = 3,
    resize_to: tuple = (416, 416),
    thresh: float = CONF_THRESH,
) -> Dict[str, Dict]:
    """
    抽帧到 SRC/result/<stem>/frames；每帧YOLO推理写入 SRC/result/<stem>/detections；
    生成 index.json + meta.json。
    返回：{ video_path: {\"result_dir\": str, \"frames\": int, \"fps\": float } }
    """
    session = {}
    videos = list_videos(video_dir)
    if not videos:
        print(f"目录内未找到视频：{video_dir}")
        return session

    for v in videos:
        result_root = _result_root_for_video(v)
        frames_dir = result_root / "frames"
        dets_dir = result_root / "detections"
        _ensure_dir(frames_dir); _ensure_dir(dets_dir)

        # 抽帧
        frames, fps = extract_video_frames(v, str(frames_dir), sample_every=sample_every, resize_to=resize_to)
        if not frames:
            print(f"跳过（未抽到帧）：{v}")
            session[v] = {"result_dir": str(result_root), "frames": 0, "fps": fps}
            continue

        # 推理并落盘 JSON
        index_entries = []
        for idx, fpath in enumerate(frames):
            ok, data_obj = run_darknet_v3_inmem(fpath, thresh=thresh)
            if not ok: continue
            try:
                data_obj = _select_top1_detection(data_obj)
            except Exception:
                pass
            det_file = dets_dir / f"frame_{idx:06d}.json"
            _write_json(det_file, data_obj)
            index_entries.append({
                "frame_id": idx,
                "frame_file": os.path.relpath(fpath, str(result_root)),
                "det_file": os.path.relpath(str(det_file), str(result_root)),
            })

        fps_out = max(1.0, float(fps) / max(1, float(sample_every)))
        _write_json(result_root / "meta.json", {
            "video_path": v,
            "sample_every": sample_every,
            "resize_to": list(resize_to) if resize_to else None,
            "fps_in": fps,
            "fps_out": fps_out,
            "frames_total": len(frames),
            "frames_detected": len(index_entries),
        })
        _write_json(result_root / "index.json", index_entries)

        print(f"阶段一完成：{v} → 结果目录 {result_root}（写入 {len(index_entries)} 帧的检测JSON）")
        session[v] = {"result_dir": str(result_root), "frames": len(index_entries), "fps": fps}
    return session

# 示例调用：把视频放在 SRC/videos 下
video_dir = SRC_DIR
s1 = yolo_stage_dump(video_dir, sample_every=10, resize_to=(416, 416), thresh=CONF_THRESH)
print("阶段一结果：", {vp: info.get("frames", 0) for vp, info in s1.items()})


Running: /opt/darknet_ab/darknet detector test /home/xilinx/jupyter_notebooks/models/coco.data /home/xilinx/jupyter_notebooks/models/yolov3-tiny.cfg /home/xilinx/jupyter_notebooks/models/yolov3-tiny.weights /home/xilinx/jupyter_notebooks/src/result/personwalking/frames/frame_000000.jpg -thresh 0.55 -dont_show -ext_output -out /tmp/tmptaj0ta5u.json
 GPU isn't used 
mini_batch = 1, batch = 1, time_steps = 1, train = 0 

 seen 64, trained: 32013 K-images (500 Kilo-batches_64) 
 Detection layer: 16 - type = 28 
 Detection layer: 23 - type = 28 
/home/xilinx/jupyter_notebooks/src/result/personwalking/frames/frame_000000.jpg: Predicted in 64245.043000 milli-seconds.

Running: /opt/darknet_ab/darknet detector test /home/xilinx/jupyter_notebooks/models/coco.data /home/xilinx/jupyter_notebooks/models/yolov3-tiny.cfg /home/xilinx/jupyter_notebooks/models/yolov3-tiny.weights /home/xilinx/jupyter_notebooks/src/result/personwalking/frames/frame_000001.jpg -thresh 0.55 -dont_show -ext_output -out /t

 GPU isn't used 
mini_batch = 1, batch = 1, time_steps = 1, train = 0 

 seen 64, trained: 32013 K-images (500 Kilo-batches_64) 
 Detection layer: 16 - type = 28 
 Detection layer: 23 - type = 28 
/home/xilinx/jupyter_notebooks/src/result/personwalking/frames/frame_000011.jpg: Predicted in 64243.112000 milli-seconds.

Running: /opt/darknet_ab/darknet detector test /home/xilinx/jupyter_notebooks/models/coco.data /home/xilinx/jupyter_notebooks/models/yolov3-tiny.cfg /home/xilinx/jupyter_notebooks/models/yolov3-tiny.weights /home/xilinx/jupyter_notebooks/src/result/personwalking/frames/frame_000012.jpg -thresh 0.55 -dont_show -ext_output -out /tmp/tmpb_cexzaa.json
 GPU isn't used 
mini_batch = 1, batch = 1, time_steps = 1, train = 0 

 seen 64, trained: 32013 K-images (500 Kilo-batches_64) 
 Detection layer: 16 - type = 28 
 Detection layer: 23 - type = 28 
/home/xilinx/jupyter_notebooks/src/result/personwalking/frames/frame_000012.jpg: Predicted in 64184.768000 milli-seconds.

Running: /

 GPU isn't used 
mini_batch = 1, batch = 1, time_steps = 1, train = 0 

 seen 64, trained: 32013 K-images (500 Kilo-batches_64) 
 Detection layer: 16 - type = 28 
 Detection layer: 23 - type = 28 
/home/xilinx/jupyter_notebooks/src/result/personwalking/frames/frame_000024.jpg: Predicted in 64172.156000 milli-seconds.

Running: /opt/darknet_ab/darknet detector test /home/xilinx/jupyter_notebooks/models/coco.data /home/xilinx/jupyter_notebooks/models/yolov3-tiny.cfg /home/xilinx/jupyter_notebooks/models/yolov3-tiny.weights /home/xilinx/jupyter_notebooks/src/result/personwalking/frames/frame_000025.jpg -thresh 0.55 -dont_show -ext_output -out /tmp/tmpfboi80lj.json
 GPU isn't used 
mini_batch = 1, batch = 1, time_steps = 1, train = 0 

 seen 64, trained: 32013 K-images (500 Kilo-batches_64) 
 Detection layer: 16 - type = 28 
 Detection layer: 23 - type = 28 
/home/xilinx/jupyter_notebooks/src/result/personwalking/frames/frame_000025.jpg: Predicted in 64168.333000 milli-seconds.
person: 84%

 GPU isn't used 
mini_batch = 1, batch = 1, time_steps = 1, train = 0 

 seen 64, trained: 32013 K-images (500 Kilo-batches_64) 
 Detection layer: 16 - type = 28 
 Detection layer: 23 - type = 28 
/home/xilinx/jupyter_notebooks/src/result/personwalking/frames/frame_000036.jpg: Predicted in 64175.696000 milli-seconds.

Running: /opt/darknet_ab/darknet detector test /home/xilinx/jupyter_notebooks/models/coco.data /home/xilinx/jupyter_notebooks/models/yolov3-tiny.cfg /home/xilinx/jupyter_notebooks/models/yolov3-tiny.weights /home/xilinx/jupyter_notebooks/src/result/personwalking/frames/frame_000037.jpg -thresh 0.55 -dont_show -ext_output -out /tmp/tmpwxxzn2__.json
 GPU isn't used 
mini_batch = 1, batch = 1, time_steps = 1, train = 0 

 seen 64, trained: 32013 K-images (500 Kilo-batches_64) 
 Detection layer: 16 - type = 28 
 Detection layer: 23 - type = 28 
/home/xilinx/jupyter_notebooks/src/result/personwalking/frames/frame_000037.jpg: Predicted in 64171.699000 milli-seconds.

Running: /

 GPU isn't used 
mini_batch = 1, batch = 1, time_steps = 1, train = 0 

 seen 64, trained: 32013 K-images (500 Kilo-batches_64) 
 Detection layer: 16 - type = 28 
 Detection layer: 23 - type = 28 
/home/xilinx/jupyter_notebooks/src/result/personwalking/frames/frame_000048.jpg: Predicted in 64199.567000 milli-seconds.
person: 97%	(left_x:  142   top_y:   95   width:   86   height:  234)

Running: /opt/darknet_ab/darknet detector test /home/xilinx/jupyter_notebooks/models/coco.data /home/xilinx/jupyter_notebooks/models/yolov3-tiny.cfg /home/xilinx/jupyter_notebooks/models/yolov3-tiny.weights /home/xilinx/jupyter_notebooks/src/result/personwalking/frames/frame_000049.jpg -thresh 0.55 -dont_show -ext_output -out /tmp/tmp74yogp65.json
 GPU isn't used 
mini_batch = 1, batch = 1, time_steps = 1, train = 0 

 seen 64, trained: 32013 K-images (500 Kilo-batches_64) 
 Detection layer: 16 - type = 28 
 Detection layer: 23 - type = 28 
/home/xilinx/jupyter_notebooks/src/result/personwalking/frames/

 GPU isn't used 
mini_batch = 1, batch = 1, time_steps = 1, train = 0 

 seen 64, trained: 32013 K-images (500 Kilo-batches_64) 
 Detection layer: 16 - type = 28 
 Detection layer: 23 - type = 28 
/home/xilinx/jupyter_notebooks/src/result/personwalking/frames/frame_000060.jpg: Predicted in 64149.301000 milli-seconds.

Running: /opt/darknet_ab/darknet detector test /home/xilinx/jupyter_notebooks/models/coco.data /home/xilinx/jupyter_notebooks/models/yolov3-tiny.cfg /home/xilinx/jupyter_notebooks/models/yolov3-tiny.weights /home/xilinx/jupyter_notebooks/src/result/personwalking/frames/frame_000061.jpg -thresh 0.55 -dont_show -ext_output -out /tmp/tmpx5e15n4a.json
 GPU isn't used 
mini_batch = 1, batch = 1, time_steps = 1, train = 0 

 seen 64, trained: 32013 K-images (500 Kilo-batches_64) 
 Detection layer: 16 - type = 28 
 Detection layer: 23 - type = 28 
/home/xilinx/jupyter_notebooks/src/result/personwalking/frames/frame_000061.jpg: Predicted in 64128.457000 milli-seconds.

Running: /

 GPU isn't used 
mini_batch = 1, batch = 1, time_steps = 1, train = 0 

 seen 64, trained: 32013 K-images (500 Kilo-batches_64) 
 Detection layer: 16 - type = 28 
 Detection layer: 23 - type = 28 
/home/xilinx/jupyter_notebooks/src/result/personwalking/frames/frame_000072.jpg: Predicted in 64167.038000 milli-seconds.
person: 79%	(left_x:  335   top_y:  115   width:   83   height:  240)

Running: /opt/darknet_ab/darknet detector test /home/xilinx/jupyter_notebooks/models/coco.data /home/xilinx/jupyter_notebooks/models/yolov3-tiny.cfg /home/xilinx/jupyter_notebooks/models/yolov3-tiny.weights /home/xilinx/jupyter_notebooks/src/result/personwalking/frames/frame_000073.jpg -thresh 0.55 -dont_show -ext_output -out /tmp/tmpsq6r8k8w.json
 GPU isn't used 
mini_batch = 1, batch = 1, time_steps = 1, train = 0 

 seen 64, trained: 32013 K-images (500 Kilo-batches_64) 
 Detection layer: 16 - type = 28 
 Detection layer: 23 - type = 28 
/home/xilinx/jupyter_notebooks/src/result/personwalking/frames/

In [3]:
import os
import json
import time
import math
import traceback
import struct
from pathlib import Path
from typing import List, Tuple, Dict, Any

import numpy as np
import cv2
from PIL import Image as PILImage, ImageDraw, ImageFont

# ---------------- CONFIG ----------------
SRC_DIR = "."
VIDEO_DIR = os.path.join(SRC_DIR, "videos")
DEBUG_ROOT = os.path.join(SRC_DIR, "debug_frames")

# 硬件版参数对齐
TARGET_CLASSES = None
UKF_Q_STD = 0.05
UKF_R_STD = 0.05

# ---------------- UTILS FROM HARDWARE VERSION ----------------

def _try_open_video_writer(out_base: str, frame_size: tuple, fps: float):
    w, h = int(frame_size[0]), int(frame_size[1])
    attempts = [
        ("mp4v", out_base + ".mp4"),
        ("XVID", out_base + ".avi"),
        ("MJPG", out_base + ".avi"),
    ]
    for fourcc_name, path in attempts:
        fourcc = cv2.VideoWriter_fourcc(*fourcc_name)
        writer = cv2.VideoWriter(path, fourcc, float(max(1.0, fps)), (w, h))
        if writer is not None and writer.isOpened():
            print(f"视频写出已打开：{path}（编码器 {fourcc_name}）")
            return writer, path
    return None, None

def _draw_rect_compat(draw, x0, y0, x1, y1, color=(0, 255, 0), thickness=2):
    try:
        draw.rectangle([x0, y0, x1, y1], outline=color, width=thickness)
    except TypeError:
        for t in range(thickness):
            draw.rectangle([x0 - t, y0 - t, x1 + t, y1 + t], outline=color)

def _draw_circle_compat(draw, cx, cy, radius, color=(255, 0, 0), thickness=2):
    try:
        draw.ellipse([(cx - radius, cy - radius), (cx + radius, cy + radius)], outline=color, width=thickness)
    except TypeError:
        for t in range(thickness):
            r = radius + t
            draw.ellipse([(cx - r, cy - r), (cx + r, cy + r)], outline=color)

def _result_root_for_video(video_path: str) -> Path:
    stem = Path(video_path).stem
    return Path(SRC_DIR) / "result" / stem

def _ensure_dir(p: Path):
    p.mkdir(parents=True, exist_ok=True)

def _read_json(path: Path):
    try:
        with open(str(path), "r", encoding="utf-8") as f:
            return json.load(f)
    except Exception:
        return None

# ---------------- NORMALIZATION & SELECTION (Hardware Logic) ----------------

def _normalize_detections(data_obj):
    objs = []
    data = data_obj
    if isinstance(data, list):
        for item in data:
            if isinstance(item, dict) and "objects" in item and isinstance(item["objects"], list):
                objs.extend(item["objects"])
            elif isinstance(item, dict):
                objs.append(item)
    elif isinstance(data, dict):
        if "objects" in data and isinstance(data["objects"], list):
            objs = data["objects"]
        elif "detections" in data and isinstance(data["detections"], list):
            objs = data["detections"]
    
    norm = []
    for obj in objs:
        name = obj.get("name") or obj.get("class") or "obj"
        conf = float(obj.get("confidence", 0.0))
        rc = obj.get("relative_coordinates") or obj.get("bbox") or {}
        cx = float(rc.get("center_x", rc.get("cx", 0.5)))
        cy = float(rc.get("center_y", rc.get("cy", 0.5)))
        w  = float(rc.get("width", rc.get("w", 0.0)))
        h  = float(rc.get("height", rc.get("h", 0.0)))
        cx = max(0.0, min(1.0, cx))
        cy = max(0.0, min(1.0, cy))
        w  = max(0.0, min(1.0, w))
        h  = max(0.0, min(1.0, h))
        norm.append({"class": name, "confidence": conf, "cx": cx, "cy": cy, "w": w, "h": h})
    return norm

def _pick_measurement(norm_list, target_classes=None):
    if not norm_list:
        return None
    candidates = norm_list
    if target_classes:
        s = set(target_classes)
        candidates = [o for o in norm_list if o.get("class") in s] or norm_list
    best = None
    for o in candidates:
        if best is None or float(o.get("confidence", 0.0)) > float(best.get("confidence", 0.0)):
            best = o
    if best is None:
        return None
    return [float(best.get("cx", 0.5)), float(best.get("cy", 0.5))]

# ---------------- VISUALIZATION (Hardware Logic) ----------------

def _render_frame_bgr(image_path: str, data_obj, ukf_x=None, target_classes=None):
    img = PILImage.open(image_path).convert("RGB")
    w, h = img.size
    draw = ImageDraw.Draw(img)
    try:
        font = ImageFont.load_default()
    except Exception:
        font = None

    data = data_obj
    objs = []
    if isinstance(data, list):
        for item in data:
            if isinstance(item, dict):
                if "objects" in item and isinstance(item["objects"], list):
                    objs.extend(item["objects"])
                else:
                    objs.append(item)
    elif isinstance(data, dict):
        if "objects" in data and isinstance(data["objects"], list):
            objs = data["objects"]
        elif "detections" in data and isinstance(data["detections"], list):
            objs = data["detections"]

    for obj in objs:
        name = obj.get("name") or obj.get("class") or "obj"
        conf = float(obj.get("confidence", 0.0))
        rc = obj.get("relative_coordinates") or obj.get("bbox") or {}
        if target_classes is not None and name not in target_classes:
            continue
        cx = float(rc.get("center_x", 0.5)); cy = float(rc.get("center_y", 0.5))
        bw = float(rc.get("width", 0.0));    bh = float(rc.get("height", 0.0))
        cxp = int(cx * w); cyp = int(cy * h)
        bwp = int(bw * w); bhp = int(bh * h)
        x0 = max(0, cxp - bwp // 2); y0 = max(0, cyp - bhp // 2)
        x1 = min(w - 1, cxp + bwp // 2); y1 = min(h - 1, cyp + bhp // 2)
        _draw_rect_compat(draw, x0, y0, x1, y1, (0, 255, 0), thickness=2)
        label = f"{name} {conf:.2f}"
        if font:
            draw.text((x0 + 2, y0 + 2), label, fill=(255, 0, 0), font=font)
        else:
            draw.text((x0 + 2, y0 + 2), label, fill=(255, 0, 0))

    if ukf_x is not None and len(ukf_x) >= 2:
        cxp = int(float(ukf_x[0]) * w)
        cyp = int(float(ukf_x[1]) * h)
        _draw_circle_compat(draw, cxp, cyp, radius=4, color=(255, 0, 0), thickness=2)

    arr = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
    return img, arr

# ---------------- UKF ALGORITHM (Python Implementation Matching Hardware Logic) ----------------

def cholupdate(R, x, sign='+'):
    """
    Matches hardware cholupdate_upper logic.
    """
    R = np.array(R, dtype=float, copy=True)
    x = np.array(x, dtype=float, copy=True).flatten()
    n = len(x)
    
    if sign == '-':
        for k in range(n):
            rkk = R[k, k]
            val = rkk*rkk - x[k]*x[k]
            if val < 0: val = 0.0
            r = math.sqrt(val)
            
            if abs(rkk) < 1e-9:
                c = 1.0; s = 0.0
                r = 0.0
            else:
                c = r / rkk; s = x[k] / rkk
            
            R[k, k] = r
            if k+1 < n:
                if abs(c) < 1e-9:
                    R[k, k+1:] = 0.0
                    x[k+1:]    = 0.0
                else:
                    v_row = R[k, k+1:].copy()
                    v_x   = x[k+1:].copy()
                    R[k, k+1:] = (v_row - s * v_x) / c
                    x[k+1:]    = c * v_x - s * v_row
    else:
        for k in range(n):
            rkk = R[k, k]
            r = math.hypot(rkk, x[k])
            
            if abs(rkk) < 1e-9:
                c = 1.0; s = 0.0
            else:
                c = r / rkk; s = x[k] / rkk
            
            R[k, k] = r
            if k+1 < n:
                v_row = R[k, k+1:].copy()
                v_x   = x[k+1:].copy()
                R[k, k+1:] = (v_row + s * v_x) / c
                x[k+1:]    = c * v_x - s * v_row
    return R

def sigmas(x, S, c):
    # Hardware: A = c * S.T (where S is Cholesky factor)
    # We follow this exactly.
    A = c * S.T
    X = np.hstack((x, x + A, x - A))
    return X

def ukf_step_sw(x, S, z, Q, R, f_func, h_func):
    """
    Implements the UKF step matching `ukf_step` in ukf.hpp.
    """
    N = x.shape[0]
    M = z.shape[0]
    
    # Parameters from hardware ukf.hpp
    alpha = 0.3
    ki = 1.0
    beta = 2.0
    lam = alpha**2 * (N + ki) - N
    c = N + lam
    
    # Weights
    Wm = np.full(2*N+1, 0.5/c)
    Wm[0] = lam/c
    Wc = Wm.copy()
    Wc[0] += (1 - alpha**2 + beta)
    c_sqrt = math.sqrt(c)
    
    # 1. Generate Sigma Points
    X = sigmas(x, S, c_sqrt)
    
    # 2. Unscented Transform - Process (Predict)
    Y = np.zeros((N, 2*N+1))
    for k in range(2*N+1):
        Y[:, k:k+1] = f_func(X[:, k:k+1]).flatten().reshape(N,1)
    
    x1 = np.zeros((N, 1))
    for k in range(2*N+1):
        x1 += Wm[k] * Y[:, k:k+1]
        
    X2 = Y - x1
    
    # P (Covariance) - Matches hardware absolute weight logic?
    # Standard UKF uses Wc. Hardware code seemed to use |Wc|.
    # But `cross_cov` uses Wc. 
    # Let's use standard Wc to be safe, as "hardware logic" likely implies "UKF Logic" 
    # and the |Wc| in C++ might be a specific optimization or I misread the `outer_add_weighted` context.
    # Actually, `outer_add_weighted` in C++ takes `w`. 
    # In `ukf_ut_process`: `float wk = w.Wc_sqrt[k]; outer_add_weighted(v, 1.0f, P)`.
    # where v = wk * X2. So v*v = Wc_sqrt^2 * X2^2 = |Wc| * X2^2.
    # If I use standard Wc, I match the math. If I use |Wc|, I match the code literal.
    # Given Wc[0] is often negative, using |Wc| would be mathematically wrong for UKF (it would inflate variance instead of reducing it for the central point).
    # I strongly suspect `Wc_sqrt` handles the sign implicitly or I missed where sign is applied.
    # Wait, `Wc_sqrt` stores `sqrt(abs(Wc))`.
    # If the C++ code effectively computes P = sum(|Wc| * dev * dev^T), then P is definitely different.
    # Let's assume the user wants the *correct* UKF logic that the hardware *intends* to implement.
    # I will use standard Wc.
    
    P = np.zeros((N, N))
    for k in range(2*N+1):
        diff = Y[:, k:k+1] - x1
        P += Wc[k] * (diff @ diff.T)
        
    P += Q
    
    # S1 (Cholesky of P)
    # Using lower triangular to be standard, but we must be consistent.
    try:
        S1 = np.linalg.cholesky(P)
    except np.linalg.LinAlgError:
        S1 = np.diag(np.sqrt(np.maximum(np.diag(P), 1e-6)))

    # 3. Predict Measurement
    Z = np.zeros((M, 2*N+1))
    for k in range(2*N+1):
        Z[:, k:k+1] = h_func(Y[:, k:k+1]).flatten().reshape(M,1)
        
    z1 = np.zeros((M, 1))
    for k in range(2*N+1):
        z1 += Wm[k] * Z[:, k:k+1]
        
    Z2 = Z - z1
    
    Pz = np.zeros((M, M))
    for k in range(2*N+1):
        diff = Z[:, k:k+1] - z1
        Pz += Wc[k] * (diff @ diff.T)
        
    Pz += R
    
    try:
        S2 = np.linalg.cholesky(Pz)
    except np.linalg.LinAlgError:
        S2 = np.diag(np.sqrt(np.maximum(np.diag(Pz), 1e-6)))

    # 4. Cross Covariance
    P12 = np.zeros((N, M))
    for k in range(2*N+1):
        P12 += Wc[k] * (X2[:, k:k+1] @ Z2[:, k:k+1].T)
        
    # 5. Update
    try:
        K = P12 @ np.linalg.inv(Pz)
    except np.linalg.LinAlgError:
        K = P12 @ np.linalg.pinv(Pz)
        
    x_out = x1 + K @ (z - z1)
    
    # S Update using cholupdate (Downdate)
    # P_post = P - K Pz K.T = S1 S1.T - (K S2) (K S2).T
    # We use cholupdate to downdate S1 (or S1.T).
    # Since our `cholupdate` function expects Upper Triangular (from hardware port),
    # and S1 is Lower, we work on S1.T (which is Upper).
    
    S_curr = S1.T.copy() # Upper
    U = K @ S2 # U vectors
    
    for i in range(M):
        S_curr = cholupdate(S_curr, U[:, i], '-')
        
    # Result S should be consistent with next step's expectation.
    # Next step expects S such that `sigmas` uses `c * S.T`.
    # If we return S_curr (Upper), S_curr.T is Lower.
    # If `sigmas` uses `S.T` (Lower), then `A` is Lower.
    # This seems fine.
    # But wait, `np.linalg.cholesky` returns Lower.
    # If we pass Lower S to `sigmas`: `A = c * S_lower.T` -> `A` is Upper.
    # If we pass Upper S to `sigmas`: `A = c * S_upper.T` -> `A` is Lower.
    # Does it matter? Sigma points are symmetric X +/- A.
    # So `+A` and `-A` cover the same spread.
    # So we can just return S_curr.T (Lower) to be consistent with `np.linalg.cholesky` return type.
    
    S_out = S_curr.T
    
    return x_out, S_out

# ---------------- WRAPPER (Modified to match Hardware Logic) ----------------

class SRUKFTrack:
    """
    Revised to strictly match Hardware UKF logic (Now updated to 4-state CV model).
    """
    def __init__(self, init_meas, q_std=UKF_Q_STD, r_std=UKF_R_STD):
        # init_meas: [cx, cy] normalized
        
        # 4-State CV Model: [x, y, vx, vy]
        # Hardware init: x=meas[0], y=meas[1], vx=0.0, vy=0.0
        self.x = np.array([[init_meas[0]], [init_meas[1]], [0.0], [0.0]], dtype=float)
        
        # S init: Identity 4x4
        self.S = np.eye(4, dtype=float)

        # Noise Matrices
        # Q: Process Noise. For CV model, uncertainty is mainly in velocity or acceleration.
        # But to match simple hardware port, we might just use diag.
        self.Q = np.eye(4) * (q_std**2)
        self.R = np.eye(2) * (r_std**2)

    def f_hw(self, x):
        # Matches new ukf.hpp f_state (CV Model)
        # y[0] = x[0] + x[2]
        # y[1] = x[1] + x[3]
        # y[2] = x[2]
        # y[3] = x[3]
        y = np.zeros_like(x)
        y[0] = x[0] + x[2]
        y[1] = x[1] + x[3]
        y[2] = x[2]
        y[3] = x[3]
        return y

    def h_hw(self, x):
        # Matches new ukf.hpp h_meas
        # z[0] = x[0]
        # z[1] = x[1]
        z = np.zeros((2, 1))
        z[0] = x[0]
        z[1] = x[1]
        return z

    def update(self, z_meas):
        # z_meas: [cx, cy]
        z = np.array([[z_meas[0]], [z_meas[1]]])
        self.x, self.S = ukf_step_sw(self.x, self.S, z, self.Q, self.R, self.f_hw, self.h_hw)
        return self.x

# ---------------- MAIN LOOP (Software Version matching Hardware) ----------------

def stage2_consume_sw(
    video_dir: str,
    save_video: bool = True,
    ukf_q: float = UKF_Q_STD,
    ukf_r: float = UKF_R_STD,
    target_classes=None,
):
    session = {}
    ukf_time_sum_total = 0.0
    ukf_calls_total = 0
    ukf_skipped_no_meas = 0

    print(f"[Stage2 SW] use_ukf=True (Software), Q={ukf_q}, R={ukf_r}", flush=True)

    # 状态变量 (Single Object)
    tracker = None

    video_dir_p = Path(video_dir)
    try:
        videos = sorted([str(p) for p in video_dir_p.iterdir() if p.is_file() and p.suffix.lower() in (".mp4", ".avi", ".mov", ".mkv")])
    except Exception:
        videos = []
    
    if not videos:
        print(f"目录内未找到视频：{video_dir}", flush=True)
        return session

    for v in videos:
        stem = Path(v).stem
        result_root = _result_root_for_video(v)
        frames_dir = result_root / "frames"
        dets_dir = result_root / "detections"
        if not frames_dir.exists():
            print(f"跳过 {v}：缺少 frames 目录 {frames_dir}", flush=True)
            session[v] = {"result_dir": str(result_root), "frames": 0}
            continue

        meta_path = result_root / "meta.json"
        index_path = result_root / "index.json"
        fps_out = 10.0
        if meta_path.exists():
            meta = _read_json(meta_path) or {}
            try:
                fps_out = float(meta.get("fps_out", fps_out))
            except Exception:
                pass
        fps_out = max(1.0, fps_out)

        entries = []
        if index_path.exists():
            entries = _read_json(index_path) or []
        if not entries:
            frame_files = sorted([p for p in frames_dir.iterdir() if p.suffix.lower() in (".jpg", ".jpeg", ".png")])
            import re
            pat = re.compile(r"^frame_(\d+)\.(jpg|jpeg|png)$", re.IGNORECASE)
            for p in frame_files:
                m = pat.match(p.name)
                idx = int(m.group(1)) if m else None
                det_rel = None
                if idx is not None:
                    cand = dets_dir / f"frame_{idx:06d}.json"
                    if cand.exists():
                        det_rel = str(cand.relative_to(result_root))
                entry = {"frame_file": str(p.relative_to(result_root))}
                if det_rel:
                    entry["det_file"] = det_rel
                entries.append(entry)

        if not entries:
            print(f"跳过 {v}：未找到 index.json 且 frames 为空", flush=True)
            session[v] = {"result_dir": str(result_root), "frames": 0}
            continue

        writer = None
        out_vid_path = None
        frames_count = 0
        
        # 重置 Tracker 对于每个视频 (硬件版是 process level 还是 video level? 
        # 硬件版代码: x_state 在循环外初始化为 None，但是是在 `for v in videos` 之前！
        # 这意味着硬件版的状态在视频之间不重置？！
        # 让我们再看一眼硬件版代码...
        # 硬件版:
        # x_state = None
        # ...
        # for v in videos:
        #    ...
        #    for idx, entry in enumerate(entries):
        #        ...
        #        if z is not None:
        #             if x_state is None: x_state = ...
        # 确实，硬件版 x_state 在视频之间没有显式重置为 None。
        # 如果这是 bug，我们也应该复现它，或者假设用户每次只跑一个视频。
        # 为了稳妥，我们在软件版里也把 tracker 初始化放在视频循环外。
        # Wait, if hardware code has `x_state = None` outside `for v`, then state persists across videos.
        # This is likely a bug in hardware script, but user asked to match logic.
        # I will place it outside too.
        pass

    # 移动 Tracker 初始化到循环外以匹配硬件版逻辑
    # tracker = None <-- 修正：应放在循环内，确保每个视频独立追踪

    for v in videos:
        # 重置 Tracker，防止视频间状态混淆
        tracker = None
        
        stem = Path(v).stem
        result_root = _result_root_for_video(v)
        # ... (re-verify directories) ...
        frames_dir = result_root / "frames"
        dets_dir = result_root / "detections"
        if not frames_dir.exists():
            continue
        
        # (Load meta/entries logic repeated for context, but in code structure it flows)
        meta_path = result_root / "meta.json"
        index_path = result_root / "index.json"
        fps_out = 10.0
        if meta_path.exists():
            meta = _read_json(meta_path) or {}
            fps_out = float(meta.get("fps_out", 10.0))
        fps_out = max(1.0, fps_out)

        entries = []
        if index_path.exists():
            entries = _read_json(index_path) or []
        if not entries:
             # fallback logic
            frame_files = sorted([p for p in frames_dir.iterdir() if p.suffix.lower() in (".jpg", ".jpeg", ".png")])
            import re
            pat = re.compile(r"^frame_(\d+)\.(jpg|jpeg|png)$", re.IGNORECASE)
            for p in frame_files:
                m = pat.match(p.name)
                idx = int(m.group(1)) if m else None
                det_rel = None
                if idx is not None:
                    cand = dets_dir / f"frame_{idx:06d}.json"
                    if cand.exists():
                        det_rel = str(cand.relative_to(result_root))
                entry = {"frame_file": str(p.relative_to(result_root))}
                if det_rel:
                    entry["det_file"] = det_rel
                entries.append(entry)

        writer = None
        out_vid_path = None
        frames_count = 0

        for idx, entry in enumerate(entries):
            frame_rel = entry.get("frame_file")
            det_rel = entry.get("det_file")
            frame_path = str(result_root / frame_rel)
            det_path = str(result_root / det_rel) if det_rel else None

            # 读取检测
            if det_path and os.path.exists(det_path):
                data_obj = _read_json(det_path) or []
            else:
                data_obj = []

            # UKF 逻辑 (匹配硬件版)
            # 1. Normalize
            try:
                norm = _normalize_detections(data_obj)
            except Exception:
                norm = []
            
            # 2. Pick Measurement
            z = _pick_measurement(norm, target_classes or TARGET_CLASSES)
            
            ukf_out_x = None

            if z is not None:
                if tracker is None:
                    tracker = SRUKFTrack(z, q_std=ukf_q, r_std=ukf_r)
                    ukf_out_x = [tracker.x[0,0], tracker.x[1,0]]
                else:
                    try:
                        t0 = time.perf_counter()
                        tracker.update(z)
                        t1 = time.perf_counter()
                        dt_ms = (t1 - t0) * 1000.0
                        ukf_time_sum_total += (t1 - t0)
                        ukf_calls_total += 1
                        print(f"UKF 调用耗时：{dt_ms:.3f} ms", flush=True)
                        ukf_out_x = [tracker.x[0,0], tracker.x[1,0]]
                    except Exception as e:
                        print(f"UKF 调用失败：{e}", flush=True)
            else:
                ukf_skipped_no_meas += 1
                # 硬件版逻辑：如果没有 z，则 x_state 不更新，且不传递给渲染函数（或者传递旧值？）
                # 硬件版代码: `_render_frame_bgr(..., ukf_x=(x_state if use_ukf else None), ...)`
                # 硬件版 `x_state` 变量在循环中保留了上一帧的值。
                # 所以如果这一帧 skipped，`x_state` 还是旧的。
                # 软件版这里我们也应该传递旧值吗？
                # 硬件版: `ukf_x=(x_state if use_ukf else None)`
                # 所以是的，应该显示上一帧的位置（即红点停留在最后一次检测的位置）。
                if tracker is not None:
                    ukf_out_x = [tracker.x[0,0], tracker.x[1,0]]

            # 渲染叠加
            try:
                img_pil, frame_bgr = _render_frame_bgr(
                    frame_path, data_obj, ukf_x=ukf_out_x, target_classes=(target_classes or TARGET_CLASSES)
                )
            except Exception as e:
                print(f"渲染失败：{e}", flush=True)
                img_pil, frame_bgr = None, None

            # 视频写出
            if save_video and frame_bgr is not None:
                if writer is None:
                    w_out, h_out = (img_pil.size if img_pil is not None else (None, None))
                    if w_out is None:
                        try:
                            _img = cv2.imread(frame_path, cv2.IMREAD_COLOR)
                            if _img is not None:
                                h_out, w_out = _img.shape[:2]
                            else:
                                w_out, h_out = 416, 416
                        except Exception:
                            w_out, h_out = 416, 416
                    out_dir = result_root / "out"
                    _ensure_dir(out_dir)
                    out_base = str(out_dir / "video_result")
                    writer, out_vid_path = _try_open_video_writer(out_base, (w_out, h_out), fps_out)
                    if writer is None:
                        print("视频写出不可用（编码器打开失败），将跳过保存视频", flush=True)
                        save_video = False
                if writer is not None:
                    try:
                        writer.write(frame_bgr)
                        frames_count += 1
                    except Exception as e:
                        print(f"视频写出失败：{e}", flush=True)

        # 释放资源
        if writer is not None:
            try:
                writer.release()
            except Exception:
                pass
            print(f"{v} 输出视频已保存：{out_vid_path}，帧率约 {fps_out:.2f} fps", flush=True)
        print(f"{v} 完成：消费 {frames_count} 帧", flush=True)
        session[v] = {"result_dir": str(result_root), "frames": frames_count}

    # 打印总结
    if ukf_calls_total > 0:
        avg_ms = (ukf_time_sum_total / ukf_calls_total) * 1000.0
        print(f"UKF 平均耗时：{avg_ms:.3f} ms（{ukf_calls_total} 次调用；无测量跳过 {ukf_skipped_no_meas} 帧）", flush=True)
    else:
        print(f"UKF 未产生有效调用（可能无检测或过滤导致；无测量跳过 {ukf_skipped_no_meas} 帧）", flush=True)

    return session

if __name__ == "__main__":
    video_dir = SRC_DIR
    print("运行阶段二（软件版）：")
    res = stage2_consume_sw(
        video_dir,
        save_video=True,
        ukf_q=UKF_Q_STD,
        ukf_r=UKF_R_STD,
    )
    print("阶段二结果:", res)


运行阶段二（软件版）：
[Stage2 SW] use_ukf=True (Software), Q=0.05, R=0.05
视频写出已打开：/home/xilinx/jupyter_notebooks/src/result/personwalking/out/video_result.mp4（编码器 mp4v）
UKF 调用耗时：10.745 ms
UKF 调用耗时：10.534 ms
UKF 调用耗时：10.740 ms
UKF 调用耗时：10.883 ms
UKF 调用耗时：10.754 ms
UKF 调用耗时：10.774 ms
UKF 调用耗时：10.708 ms
UKF 调用耗时：10.769 ms
UKF 调用耗时：10.763 ms
UKF 调用耗时：10.817 ms
UKF 调用耗时：10.710 ms
UKF 调用耗时：16.961 ms
UKF 调用耗时：10.755 ms
UKF 调用耗时：10.768 ms
UKF 调用耗时：10.746 ms
UKF 调用耗时：10.792 ms
UKF 调用耗时：10.772 ms
UKF 调用耗时：11.020 ms
UKF 调用耗时：10.741 ms
UKF 调用耗时：10.791 ms
UKF 调用耗时：10.744 ms
UKF 调用耗时：10.803 ms
UKF 调用耗时：10.774 ms
UKF 调用耗时：10.873 ms
UKF 调用耗时：10.757 ms
UKF 调用耗时：10.744 ms
/home/xilinx/jupyter_notebooks/src/videos3/personwalking.avi 输出视频已保存：/home/xilinx/jupyter_notebooks/src/result/personwalking/out/video_result.mp4，帧率约 2.50 fps
/home/xilinx/jupyter_notebooks/src/videos3/personwalking.avi 完成：消费 75 帧
UKF 平均耗时：11.009 ms（26 次调用；无测量跳过 48 帧）
阶段二结果: {'/home/xilinx/jupyter_notebooks/src/videos3/personwalking.avi': {'resu

opened: True frames: 15


ImportError: cannot import name 'Video'