# MMPose — Video Pose Demo (YOLO/OpenCV-style overlay)

This notebook installs **MMPose** and runs 2D pose estimation on an input video,
then draws a simple white **nodes + connections + labels** overlay on a dimmed, blurred background.

Works in **Google Colab** or local Jupyter. Just set `input_path` and `output_path` and run the last cell.

### Setup

In [None]:
# Step 1: Install dependencies (Colab/Jupyter)
%%bash
pip -q install -U openmim
mim -q install 'mmengine>=0.10.0' 'mmcv>=2.0.0' 'mmdet>=3.3.0'
pip -q install -U mmpose opencv-python

In [None]:
# Step 2: Imports
import os, math
import numpy as np
import cv2
from IPython.display import display, Video
from mmpose.apis import MMPoseInferencer

### Paths

In [None]:
# Edit these paths
input_path  = '/content/input.mp4'  # <-- set your input video path
output_path = '/content/mmpose_out.mp4'

### Visualization settings (white nodes/links + labels on dimmed/blurred background)

In [None]:
# Config for the simple visualization
conf_thresh = 0.1       # minimum keypoint confidence to draw
node_radius = 2         # circle radius for each node
line_thickness = 1      # line thickness for connections
font = cv2.FONT_HERSHEY_SIMPLEX
font_scale = 0.35
font_thickness = 1

# Background styling
bg_blur_sigma   = 7     # Gaussian blur sigma for background
bg_dim_factor   = 0.35  # 0..1, multiply background brightness

# Optional: double-stroke lines (wide faint band under thin line)
wide_line_px = 10
wide_line_alpha = 0.3

# COCO keypoint names (17 points)
KP_NAMES = [
    'nose', 'left_eye', 'right_eye', 'left_ear', 'right_ear',
    'left_shoulder', 'right_shoulder', 'left_elbow', 'right_elbow',
    'left_wrist', 'right_wrist', 'left_hip', 'right_hip',
    'left_knee', 'right_knee', 'left_ankle', 'right_ankle'
]

# Skeleton edges (pairs of indices)
SKELETON = [
    (5, 6),                 # shoulders
    (5, 7), (7, 9),         # left arm
    (6, 8), (8,10),         # right arm
    (11,12),                # hips
    (5,11), (6,12),         # torso
    (11,13), (13,15),       # left leg
    (12,14), (14,16),       # right leg
    (0,1), (0,2), (1,3), (2,4)  # head/face links
]

def dim_and_blur(bgr, sigma=7, dim=0.35):
    out = bgr
    if sigma > 0:
        out = cv2.GaussianBlur(out, (0, 0), sigma)
    out = np.clip(out.astype(np.float32) * float(dim), 0, 255).astype(np.uint8)
    return out

def draw_pose_overlay(base_bgr, kpts_xyc, overlay=None):
    """Draw white nodes + connections + labels on overlay (or copy of base frame if overlay is None).
    kpts_xyc: np.ndarray [num_points, 3] -> (x,y,conf)
    """
    overlay = base_bgr.copy() if overlay is None else overlay

    # Lines with double-stroke (wide faint band + thin crisp line)
    for a, b in SKELETON:
        if a >= kpts_xyc.shape[0] or b >= kpts_xyc.shape[0]:
            continue
        x1, y1, c1 = kpts_xyc[a]
        x2, y2, c2 = kpts_xyc[b]
        if c1 < conf_thresh or c2 < conf_thresh:
            continue
        # wide, faint band
        tmp = overlay.copy()
        cv2.line(tmp, (int(x1), int(y1)), (int(x2), int(y2)), (255,255,255), wide_line_px, lineType=cv2.LINE_AA)
        cv2.addWeighted(tmp, wide_line_alpha, overlay, 1.0-wide_line_alpha, 0, overlay)
        # thin crisp line
        cv2.line(overlay, (int(x1), int(y1)), (int(x2), int(y2)), (255,255,255), line_thickness, lineType=cv2.LINE_AA)

    # Nodes + labels
    for i, (x, y, c) in enumerate(kpts_xyc):
        if c < conf_thresh:
            continue
        cv2.circle(overlay, (int(x), int(y)), node_radius, (255,255,255), -1, lineType=cv2.LINE_AA)
        name = KP_NAMES[i] if i < len(KP_NAMES) else f'k{i}'
        tx, ty = int(x) + 6, max(12, int(y) - 6)
        cv2.putText(overlay, name, (tx, ty), font, font_scale, (255,255,255), font_thickness, lineType=cv2.LINE_AA)

    return overlay

### Inference + Rendering

In [None]:
def infer_and_render_mmpose_video(input_video_path, output_path,
                                  pose_alias='rtmpose-s', det_alias='rtmdet-tiny'):
    """
    Runs MMPose (with internal detector) on a video and writes a rendered MP4.
    - pose_alias: MMPose model alias (e.g., 'rtmpose-s', 'rtmpose-m').
    - det_alias: detector alias (e.g., 'rtmdet-tiny', 'rtmdet-s').
    """
    # Initialize MMPose high-level inferencer. It will auto-download weights.
    inferencer = MMPoseInferencer(pose2d=pose_alias, det_model=det_alias)

    # Probe video size & fps
    cap = cv2.VideoCapture(input_video_path)
    if not cap.isOpened():
        raise RuntimeError(f'Cannot open video: {input_video_path}')
    width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps    = cap.get(cv2.CAP_PROP_FPS) or 30
    cap.release()

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    writer = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    assert writer.isOpened(), f'Could not open VideoWriter for {output_path}'

    # The inferencer takes a video path and yields result dicts per frame
    results_gen = inferencer(input_video_path, stream=True)

    for idx, res in enumerate(results_gen):
        # res contains: 'predictions' (list per instance) and may include 'frame'
        frame_bgr = res.get('frame', None)
        if frame_bgr is None:
            # Safety fallback — skip if no frame present (shouldn't happen with inferencer)
            continue

        # Prepare the background (dim + blur)
        bg = dim_and_blur(frame_bgr, sigma=bg_blur_sigma, dim=bg_dim_factor)
        overlay = bg.copy()

        preds = res.get('predictions', [])
        if preds is None:
            preds = []

        # Draw all detected people in the frame
        for det in preds:
            # 'det' usually has: 'keypoints' (Kx2), 'keypoint_scores' (K,), etc.
            kxy = det.get('keypoints', None)
            ksc = det.get('keypoint_scores', None)
            if kxy is None:
                continue
            kxy = np.array(kxy, dtype=np.float32)
            if ksc is None:
                ksc = np.ones((kxy.shape[0],), dtype=np.float32)
            else:
                ksc = np.array(ksc, dtype=np.float32)
            if ksc.ndim == 1:
                ksc = ksc[:, None]
            kpts_xyc = np.concatenate([kxy, ksc], axis=1).astype(np.float32)
            overlay = draw_pose_overlay(bg, kpts_xyc, overlay=overlay)

        writer.write(overlay)

    writer.release()
    print('[OK] Wrote:', output_path)

In [None]:
# Run it
infer_and_render_mmpose_video(input_path, output_path,
                              pose_alias='rtmpose-s', det_alias='rtmdet-tiny')

# Preview inline (autoplay loop)
if os.path.exists(output_path):
    display(Video(output_path, embed=True, html_attributes='autoplay loop controls'))