In [123]:
from IPython.display import display
from PIL import Image
import cv2
import numpy as np
import matplotlib.pyplot as plt

def imshow(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    pil_img = Image.fromarray(image)
    plt.figure(figsize=(10,10))
    plt.imshow(pil_img)
    plt.axis('off')
    plt.show()

def preprocess(image):
    image = cv2.GaussianBlur(image, (5, 5), 0)
    return image

In [124]:
# clicked_points = []
_last_H = None

# def _on_mouse(event, x, y, flags, param):
#     if event == cv2.EVENT_LBUTTONDOWN:
#         clicked_points.append((x, y))
#         msg = f"click: ({x}, {y})"
#         # Also print board-space if homography is available
#         try:
#             if _last_H is not None:
#                 H_inv = np.linalg.inv(_last_H)  # camera -> board
#                 p = np.array([[[x, y]]], dtype=np.float32)
#                 q = cv2.perspectiveTransform(p, H_inv)[0, 0]
#                 msg += f" | board: ({int(q[0])}, {int(q[1])})"
#         except np.linalg.LinAlgError:
#             pass
#         print(msg)

def process_video(file, show=True):
    in_path = f"data/{file}.mp4"
    cap = cv2.VideoCapture(in_path)

    fps = cap.get(cv2.CAP_PROP_FPS)
    fps = fps if fps and fps > 0 else 25.0
    delay_ms = int(1000 / fps)

    paused = False
    frame_out = None

    if show:
        cv2.namedWindow("video")
        # cv2.setMouseCallback("video", _on_mouse)

    while True:
        if not paused or frame_out is None:
            ret, frame = cap.read()
            if not ret:
                break
            frame_out = pipeline(frame)

        # # draw clicked points on the shown frame
        # for pt in clicked_points:
        #     if 0 <= pt[0] < frame_out.shape[1] and 0 <= pt[1] < frame_out.shape[0]:
        #         cv2.circle(frame_out, pt, 5, (0, 255, 0), -1)

        if show:
            cv2.imshow("video", frame_out)

        key = cv2.waitKey(0 if paused else delay_ms) & 0xFF
        if key == ord("q"):
            break
        elif key == ord(" "):  # spacebar
            paused = not paused

    cap.release()
    cv2.destroyAllWindows()

In [125]:
import subprocess
import os

def process_video(file, show=True, duration_sec=30):
    in_path = f"data/{file}.mp4"
    temp_path = f"temp_{file}.avi"
    out_path = f"data/{file}_output.mp4"
    
    cap = cv2.VideoCapture(in_path)
    if not cap.isOpened():
        print(f"Error: Cannot open {in_path}")
        return

    fps = cap.get(cv2.CAP_PROP_FPS)
    fps = fps if fps and fps > 0 else 25.0
    delay_ms = int(1000 / fps)
    
    total_frames = int(fps * duration_sec)
    frame_count = 0

    # Use MJPEG codec (more reliable than mp4v)
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    out = cv2.VideoWriter(temp_path, fourcc, fps, (frame_width, frame_height))

    if not out.isOpened():
        print("Error: Cannot open VideoWriter")
        cap.release()
        return

    paused = False
    frame_out = None

    if show:
        cv2.namedWindow("video")

    print(f"Processing {total_frames} frames...")
    
    while frame_count < total_frames:
        if not paused or frame_out is None:
            ret, frame = cap.read()
            if not ret:
                break
            frame_out = pipeline(frame)
            out.write(frame_out)
            frame_count += 1

        if show:
            cv2.imshow("video", frame_out)

        key = cv2.waitKey(0 if paused else delay_ms) & 0xFF
        if key == ord("q"):
            break
        elif key == ord(" "):
            paused = not paused

    cap.release()
    out.release()
    cv2.destroyAllWindows()

    print(f"✓ Temp video saved: {temp_path}")
    
    # Convert to MP4 with ffmpeg
    print(f"Converting to {out_path}...")
    cmd = [
        "ffmpeg", "-i", temp_path, 
        "-c:v", "libx264", "-crf", "23",
        "-y", out_path
    ]
    
    try:
        subprocess.run(cmd, check=True, capture_output=True)
        print(f"✓ Video saved: {out_path}")
        os.remove(temp_path)
    except subprocess.CalledProcessError as e:
        print(f"Error: ffmpeg failed - {e.stderr.decode()}")

In [126]:
def get_mask_of_board(frame_bgr):
    hsv = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2HSV)
    
    mask = cv2.inRange(hsv, np.array([20, 40, 40]), np.array([140, 255, 255]))
    
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7))
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
    
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    filtered = [c for c in contours if cv2.contourArea(c) > 500]
    
    all_points = np.vstack(filtered)
    hull = cv2.convexHull(all_points)
    
    board_mask = np.zeros_like(mask)
    cv2.drawContours(board_mask, [hull], -1, 255, -1)
    
    return board_mask

BOARD_IMG = cv2.imread("data/board.jpg")
BOARD_IMG_MASK = get_mask_of_board(BOARD_IMG)

In [127]:
def preprocess_for_features(bgr):
    gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    gray = clahe.apply(gray)
    gray = cv2.GaussianBlur(gray, (3, 3), 0)
    return gray

def find_H(template_bgr, frame_bgr, template_mask=None):
    # ORB works best on single-channel images
    img1 = preprocess_for_features(template_bgr)
    img2 = preprocess_for_features(frame_bgr)

    orb = cv2.ORB_create(
        nfeatures=6000,
        scaleFactor=1.2,
        nlevels=8,
        fastThreshold=10,
    )

    k1, d1 = orb.detectAndCompute(img1, template_mask)
    k2, d2 = orb.detectAndCompute(img2, None)

    if d1 is None or d2 is None or len(k1) < 8 or len(k2) < 8:
        return None, 0

    bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=False)
    matches = bf.knnMatch(d1, d2, k=2)

    good = []
    for pair in matches:
        if len(pair) != 2:
            continue
        m, n = pair
        if m.distance < 0.75 * n.distance:
            good.append(m)

    if len(good) < 25:
        return None, 0

    src = np.float32([k1[m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
    dst = np.float32([k2[m.trainIdx].pt for m in good]).reshape(-1, 1, 2)

    # Prefer USAC if available (OpenCV builds vary)
    method = cv2.RANSAC
    if hasattr(cv2, "USAC_MAGSAC"):
        method = cv2.USAC_MAGSAC

    H, inlier_mask = cv2.findHomography(src, dst, method, 3.0)

    if H is None or inlier_mask is None:
        return None, 0

    inliers = int(inlier_mask.ravel().sum())
    
    return H, inliers


_last_H = None

In [128]:
def draw_board(frame, H_use):
    points = [(480,590), (3000,570), (2987,3141), (445,3088)]
    points_cam = []
    center = (1707,1849)
    output = frame.copy()
    for pt in points:
        board_point = np.array([[[pt[0], pt[1]]]], dtype=np.float32)
        camera_point = cv2.perspectiveTransform(board_point, H_use)
        pt_cam = tuple(map(int, camera_point[0][0]))
        points_cam.append(pt_cam)
    #draw a box around the board
    # put text "BOARD" above the box
    # cv2.putText(output, "BOARD", (points_cam[3][0], points_cam[3][1] - 20), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 0), 2)
    cv2.line(output, points_cam[0], points_cam[1], (255, 0, 0), 5)
    cv2.line(output, points_cam[1], points_cam[2], (255, 0, 0), 5)
    cv2.line(output, points_cam[2], points_cam[3], (255, 0, 0), 5)
    cv2.line(output, points_cam[3], points_cam[0], (255, 0, 0), 5)

    board_center = np.array([[[center[0], center[1]]]], dtype=np.float32)
    camera_center = cv2.perspectiveTransform(board_center, H_use)
    center_cam = tuple(map(int, camera_center[0][0]))
    # cv2.circle(output, center_cam, 7, (0, 0, 255), -1)
    return output

In [129]:
def mask_all_red_areas(frame, H_use):
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (11, 11))

    red_mask = cv2.inRange(rgb, np.array([40, 0, 0]), np.array([200, 50, 50]))
    red_mask = cv2.morphologyEx(red_mask, cv2.MORPH_CLOSE, kernel)
    red_mask = cv2.morphologyEx(red_mask, cv2.MORPH_OPEN, kernel)
    red_mask = cv2.morphologyEx(red_mask, cv2.MORPH_CLOSE, kernel)

    green_mask = cv2.inRange(rgb, np.array([0, 40, 0]), np.array([150, 200, 150]))
    green_mask = cv2.morphologyEx(green_mask, cv2.MORPH_CLOSE, kernel)
    green_mask = cv2.morphologyEx(green_mask, cv2.MORPH_OPEN, kernel)
    green_mask = cv2.morphologyEx(green_mask, cv2.MORPH_CLOSE, kernel)

    yellow_mask = cv2.inRange(rgb, np.array([40, 0, 0]), np.array([200, 100, 30]))
    yellow_mask = cv2.morphologyEx(yellow_mask, cv2.MORPH_CLOSE, kernel)
    yellow_mask = cv2.morphologyEx(yellow_mask, cv2.MORPH_OPEN, kernel)
    yellow_mask = cv2.morphologyEx(yellow_mask, cv2.MORPH_CLOSE, kernel)

    r = 5
    board_points = [(1529, 2852), (1524, 2691), (1529, 2517), (1529, 2348), (1538, 2188), (1364, 2179), (1364, 2005), (1194, 2005), (1030, 2009), (856, 2014), (700, 2009), (530, 2001), (535, 1831), (713, 1657), (865, 1662), (1038, 1666), (1199, 1671), (1359, 1662), (1359, 1497), (1542, 1506), (1542, 1332), (1551, 1163), (1542, 1002), (1542, 833), (1556, 664), (1730, 655), (1872, 1501), (1872, 1341), (1881, 1163), (1881, 993), (2037, 1506), (2051, 1671), (2211, 1684), (2394, 1693), (2563, 1693), (2737, 1689), (2907, 1689), (2907, 1689), (2911, 1858), (2911, 2032), (2711, 2032), (2550, 2027), (2394, 2023), (2211, 2023), (2033, 2023), (2037, 2188), (1868, 2183), (1863, 2353), (1868, 2535), (1859, 2682), (1859, 2870), (1863, 3034), (1694, 3052)]

    # points_with_mask = []
    for pt in board_points:
        board_point = np.array([[[pt[0], pt[1]]]], dtype=np.float32)
        camera_point = cv2.perspectiveTransform(board_point, H_use)
        pt_cam = tuple(map(int, camera_point[0][0]))
        x, y = pt_cam
        # Create a circular mask around the point
        circle_mask = np.zeros_like(red_mask)
        cv2.circle(circle_mask, (x, y), r, 255, -1)
        
        # Check if there's any red mask within the circle
        if np.any(cv2.bitwise_and(red_mask, circle_mask)):
            cv2.putText(frame, "RED PIECE", (x - 40, y - 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
            cv2.circle(frame, pt_cam, 7, (0, 0, 255), -1)        
        elif np.any(cv2.bitwise_and(green_mask, circle_mask)):
            cv2.putText(frame, "GREEN PIECE", (x - 40, y - 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
            cv2.circle(frame, pt_cam, 7, (0, 255, 0), -1)    
        elif np.any(cv2.bitwise_and(yellow_mask, circle_mask)):
            cv2.putText(frame, "YELLOW PIECE", (x - 40, y - 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
            cv2.circle(frame, pt_cam, 7, (0, 255, 255), -1) 
    
    return frame

In [130]:
def pipeline(frame):
    global _last_H
    H, inliers = find_H(BOARD_IMG, frame, template_mask=BOARD_IMG_MASK)

    if H is not None and inliers >= 30:
        _last_H = H
    elif _last_H is None:
        return frame

    H_use = _last_H

    # Check if H is invertible before using it
    try:
        H_inv = np.linalg.inv(H_use)
    except np.linalg.LinAlgError:
        print("Warning: Singular homography matrix, skipping frame")
        return frame

    out_w, out_h = BOARD_IMG.shape[1], BOARD_IMG.shape[0]
    rectified = cv2.warpPerspective(frame, H_inv, (out_w, out_h))
    frame = mask_all_red_areas(frame, H_use)
    frame = draw_board(frame, H_use)
    return frame
process_video("easy1", show=True, duration_sec=30)

Processing 900 frames...
✓ Temp video saved: temp_easy1.avi
Converting to data/easy1_output.mp4...
✓ Video saved: data/easy1_output.mp4
