In [39]:
import cv2

if hasattr(cv2, "aruco"):
    print("Aruco is available.")


Aruco is available.


In [41]:
# Cell: find-QR-then-overlay
import cv2, json, numpy as np, logging, math, os

# ── files ───────────────────────────────────────────────────────────
VIDEO_IN  = "aruco.mp4"                 # input clip
VIDEO_OUT = "aruco_green_overlay.mp4"   # output with overlay

# ── logging ─────────────────────────────────────────────────────────
logging.basicConfig(filename="aruco_debug.log",
                    filemode="w",
                    level=logging.DEBUG,
                    format="%(message)s")
log = logging.getLogger("aruco")

# ═════════ 1st pass : hunt for the QR with metadata ═════════════════
cap = cv2.VideoCapture(VIDEO_IN)
qr   = cv2.QRCodeDetector()

meta = None
while True:
    ok, frame = cap.read()
    if not ok:                                      # end-of-video
        break
    payload, *_ = qr.detectAndDecode(frame)
    if payload:
        try:
            meta = json.loads(payload)
        except json.JSONDecodeError:
            raise RuntimeError("QR decoded but payload is not valid JSON")
        break                                       # stop at first good QR

if meta is None:
    raise RuntimeError("No metadata QR code found in the entire video")

# ── verify mandatory keys in the JSON ───────────────────────────────
need = {"dictionary", "marker_size_px", "margin_px",
        "corner_markers", "screen_aspect"}
missing = need - meta.keys()
if missing:
    raise RuntimeError(f"QR metadata missing keys: {missing}")

# ═════════ constants derived ONLY from the QR ═══════════════════════
aruco_dict_name = meta["dictionary"]
dictionary      = cv2.aruco.getPredefinedDictionary(
                      getattr(cv2.aruco, aruco_dict_name))
margin_px  = int(meta["margin_px"])
BASE_RATIO = float(meta["screen_aspect"])

# corner mapping  — ID → (internal-corner-idx, (dx,dy shift))
sign = {"TL":(-1,-1), "TR":(1,-1), "BR":(1,1), "BL":(-1,1)}
corner_map = {}
for lbl in ("TL","TR","BR","BL"):
    info = meta["corner_markers"][lbl]
    corner_map[info["id"]] = (info["corner_index"],
                              (sign[lbl][0]*margin_px,
                               sign[lbl][1]*margin_px))

# heuristics scale with marker size
marker_sz  = int(meta["marker_size_px"])
JUMP_PX    = max(marker_sz//2, 20)
OUTLIER_PX = max(marker_sz//3, 10)
MAX_STEP_PX= max(marker_sz//10, 4)
RATIO_TOL  = 0.10                         # keep ±10 %

# ═════════ 2nd pass : full tracking & drawing ═══════════════════════
fps = cap.get(cv2.CAP_PROP_FPS)
w   = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h   = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
N   = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
cap.set(cv2.CAP_PROP_POS_FRAMES, 0)       # rewind

detector = cv2.aruco.ArucoDetector(dictionary, cv2.aruco.DetectorParameters())

def shift(pt, dx, dy):
    return np.array([pt[0]+dx, pt[1]+dy], np.float32)

tracks    = np.full((N,4,2), np.nan, np.float32)
last_good = [None]*4

# ── pass 2a : collect / clean measurements ──────────────────────────
for f in range(N):
    ok, frm = cap.read()
    if not ok: break
    corners, ids, _ = detector.detectMarkers(frm)
    if ids is None: continue
    for idx, mid in enumerate(ids.flatten()):
        if mid not in corner_map: continue
        c,(dx,dy) = corner_map[mid]
        raw = shift(corners[idx][0][c], dx, dy)
        if last_good[c] is not None and np.linalg.norm(raw-last_good[c]) > JUMP_PX:
            continue                              # spike → ignore
        tracks[f,c] = raw
        last_good[c] = raw

# kill single-frame spikes, then linear-interpolate gaps
for k in range(4):
    seq = tracks[:,k,:]
    for f in range(1,N-1):
        if np.isnan(seq[f]).any() or np.isnan(seq[f-1]).any() or np.isnan(seq[f+1]).any():
            continue
        if np.linalg.norm(seq[f]-(seq[f-1]+seq[f+1])/2) > OUTLIER_PX:
            seq[f] = np.nan
    tracks[:,k] = seq
    for d in range(2):
        s, m = tracks[:,k,d], ~np.isnan(tracks[:,k,d])
        if m.any():
            tracks[:,k,d] = np.interp(np.arange(N), np.where(m)[0], s[m])

# ── pass 2b : draw overlay with snap-back & clamp ───────────────────
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out    = cv2.VideoWriter(VIDEO_OUT, fourcc, fps, (w,h))
cap.set(cv2.CAP_PROP_POS_FRAMES, 0)       # rewind again
prev_draw = None
ALPHA = 0.30

for f in range(N):
    ok, frame = cap.read()
    if not ok: break
    quad = tracks[f].copy()

    # aspect-ratio snap using TL = TR + BL – BR
    w_now = (np.linalg.norm(quad[1]-quad[0])+np.linalg.norm(quad[2]-quad[3]))*0.5
    h_now = (np.linalg.norm(quad[3]-quad[0])+np.linalg.norm(quad[2]-quad[1]))*0.5
    if h_now>0 and abs(w_now/h_now - BASE_RATIO) > BASE_RATIO*RATIO_TOL:
        alt_tl = quad[1] + quad[3] - quad[2]
        w_alt  = (np.linalg.norm(quad[1]-alt_tl)+np.linalg.norm(quad[2]-quad[3]))*0.5
        h_alt  = (np.linalg.norm(quad[3]-alt_tl)+np.linalg.norm(quad[2]-quad[1]))*0.5
        if h_alt>0 and abs(w_alt/h_alt - BASE_RATIO) < abs(w_now/h_now - BASE_RATIO):
            quad[0] = alt_tl

    # gentle per-frame clamp
    if prev_draw is not None:
        for i in range(4):
            move = quad[i]-prev_draw[i]
            d = np.linalg.norm(move)
            if d > MAX_STEP_PX:
                quad[i] = prev_draw[i] + move/d*MAX_STEP_PX
    prev_draw = quad

    # draw
    overlay = frame.copy()
    cv2.fillPoly(overlay, [quad.astype(np.int32)], (0,255,0))
    frame = cv2.addWeighted(frame, 1-ALPHA, overlay, ALPHA, 0)
    cv2.polylines(frame, [quad.astype(np.int32)], True, (0,255,0), 2)
    out.write(frame)

cap.release(); out.release()
print("✅  Finished – written to", VIDEO_OUT)


✅  Finished – written to aruco_green_overlay.mp4
