In [46]:
import cv2

if hasattr(cv2, "aruco"):
    print("Aruco is available.")


Aruco is available.


In [47]:
# %%  ----------------------  QR-DRIVEN ArUco overlay  ----------------------
import cv2, json, numpy as np, logging, math, os, sys

VIDEO_IN = "aruco.mp4"
VIDEO_OUT = "aruco_green_overlay.mp4"

# ── logging ────────────────────────────────────────────────────────────────
logging.basicConfig(
    filename="aruco_debug.log", filemode="w", level=logging.DEBUG, format="%(message)s"
)
log = logging.getLogger("aruco")

# ═════ PASS-0 : scan whole video for the metadata QR ═══════════════════════
cap = cv2.VideoCapture(VIDEO_IN)
qr = cv2.QRCodeDetector()

meta = None
while True:
    ok, frm = cap.read()
    if not ok:
        break
    payload, *_ = qr.detectAndDecode(frm)
    if payload:
        meta = json.loads(payload)  # raises if JSON invalid
        break

if meta is None:
    raise RuntimeError("No metadata QR code found in the video")

needed = {
    "dictionary",
    "marker_size_px",
    "margin_px",
    "corner_markers",
    "screen_aspect",
}
missing = needed - meta.keys()
if missing:
    raise RuntimeError(f"QR metadata missing keys: {missing}")

# ── constants coming *only* from the QR ────────────────────────────────────
dict_name = meta["dictionary"]
dictionary = cv2.aruco.getPredefinedDictionary(getattr(cv2.aruco, dict_name))

marker_px = int(meta["marker_size_px"])
margin_px = int(meta["margin_px"])
BASE_RATIO = float(meta["screen_aspect"])

corner_json = meta["corner_markers"]  # TL / TR / BR / BL
corner_map = {
    corner_json[lbl]["id"]: corner_json[lbl]["corner_index"]
    for lbl in ("TL", "TR", "BR", "BL")
}

# thresholds that scale with marker size
JUMP_PX = max(marker_px // 2, 20)
OUTLIER_PX = max(marker_px // 3, 10)
MAX_STEP_PX = max(marker_px // 10, 4)
RATIO_TOL = 0.10
ALPHA = 0.30

# ── helper to shift the *inner* marker corner to the real screen corner ────
NEIGH_H = {0: 1, 1: 0, 2: 3, 3: 2}
NEIGH_V = {0: 3, 1: 2, 2: 1, 3: 0}


def screen_corner_from_marker(c4, inner, margin):
    """
    c4    : 4×2 array of the marker's detected corners
    inner : index (0-3) of the corner that touches the screen *inner* edge
    margin: printed white margin (px) between marker and screen edge
    """
    c = c4[inner]
    h = c4[NEIGH_H[inner]]
    v = c4[NEIGH_V[inner]]
    u_h = (c - h) / np.linalg.norm(c - h)
    u_v = (c - v) / np.linalg.norm(c - v)
    diag = u_h + u_v
    diag /= np.linalg.norm(diag)  # unit vector toward screen corner
    return c + margin * diag  # shift exactly “margin” pixels


# ═════ PASS-1 : detect & clean tracks ══════════════════════════════════════
fps = cap.get(cv2.CAP_PROP_FPS)
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
N = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
cap.set(cv2.CAP_PROP_POS_FRAMES, 0)  # rewind

detector = cv2.aruco.ArucoDetector(dictionary, cv2.aruco.DetectorParameters())
tracks = np.full((N, 4, 2), np.nan, np.float32)
last_good = [None] * 4  # previous accepted point per corner

for f in range(N):
    ok, frame = cap.read()
    if not ok:
        break
    corners, ids, _ = detector.detectMarkers(frame)
    if ids is None:
        continue

    for idx, mid in enumerate(ids.flatten()):
        if mid not in corner_map:
            continue
        inner = corner_map[mid]  # 0-3 index from QR
        sc = screen_corner_from_marker(corners[idx][0], inner, margin_px)

        if (
            last_good[inner] is not None
            and np.linalg.norm(sc - last_good[inner]) > JUMP_PX
        ):
            continue  # ignore large single-frame jump
        tracks[f, inner] = sc
        last_good[inner] = sc

# kill isolated spikes & linearly fill gaps
for k in range(4):
    seq = tracks[:, k, :]
    for f in range(1, N - 1):
        if (
            np.isnan(seq[f]).any()
            or np.isnan(seq[f - 1]).any()
            or np.isnan(seq[f + 1]).any()
        ):
            continue
        if np.linalg.norm(seq[f] - (seq[f - 1] + seq[f + 1]) / 2) > OUTLIER_PX:
            seq[f] = np.nan
    tracks[:, k] = seq
    for d in range(2):
        s, m = tracks[:, k, d], ~np.isnan(tracks[:, k, d])
        if m.any():
            tracks[:, k, d] = np.interp(np.arange(N), np.where(m)[0], s[m])

# ═════ PASS-2 : stabilise & draw overlay ═══════════════════════════════════
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out = cv2.VideoWriter(VIDEO_OUT, fourcc, fps, (w, h))
cap.set(cv2.CAP_PROP_POS_FRAMES, 0)  # rewind again
prev_draw = None

for f in range(N):
    ok, frame = cap.read()
    if not ok:
        break
    quad = tracks[f].copy()

    # aspect-ratio snap-back (fix TL when needed)
    w_now = (
        np.linalg.norm(quad[1] - quad[0]) + np.linalg.norm(quad[2] - quad[3])
    ) * 0.5
    h_now = (
        np.linalg.norm(quad[3] - quad[0]) + np.linalg.norm(quad[2] - quad[1])
    ) * 0.5
    if h_now > 0 and abs(w_now / h_now - BASE_RATIO) > BASE_RATIO * RATIO_TOL:
        alt_tl = quad[1] + quad[3] - quad[2]
        w_alt = (
            np.linalg.norm(quad[1] - alt_tl) + np.linalg.norm(quad[2] - quad[3])
        ) * 0.5
        h_alt = (
            np.linalg.norm(quad[3] - alt_tl) + np.linalg.norm(quad[2] - quad[1])
        ) * 0.5
        if h_alt > 0 and abs(w_alt / h_alt - BASE_RATIO) < abs(
            w_now / h_now - BASE_RATIO
        ):
            quad[0] = alt_tl

    # gentle per-frame clamp
    if prev_draw is not None:
        for i in range(4):
            move = quad[i] - prev_draw[i]
            d = np.linalg.norm(move)
            if d > MAX_STEP_PX:
                quad[i] = prev_draw[i] + move / d * MAX_STEP_PX
    prev_draw = quad

    # draw
    mask = np.zeros_like(frame)
    cv2.fillPoly(mask, [quad.astype(np.int32)], (0, 255, 0))
    blended = cv2.addWeighted(frame, 1 - ALPHA, mask, ALPHA, 0)
    cv2.polylines(blended, [quad.astype(np.int32)], True, (0, 255, 0), 2)
    out.write(blended)

cap.release()
out.release()
print("✅  Finished – overlay written to", VIDEO_OUT)


✅  Finished – overlay written to aruco_green_overlay.mp4
