# Final version that works on all operating systems with images and videos

In [None]:
# -------------------------------------------------------
# MediaPipe PoseLandmarker
# Works with images OR videos
# Cross-platform + notebook-safe
# -------------------------------------------------------

import cv2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe.framework.formats import landmark_pb2
import numpy as np
import csv
import sys
import subprocess
import platform
import os
from pathlib import Path

# ----------------------------
# BASE DIRECTORY (safe for script & notebook)
# ----------------------------
if "__file__" in globals():
    BASE_DIR = Path(__file__).resolve().parent
else:
    BASE_DIR = Path.cwd()

# ----------------------------
# CONFIG  (CHANGE THESE)
# ----------------------------
INPUT_PATH = BASE_DIR / "../data/videos/video.mp4"   # <-- image OR video ---  change file name
MODEL_PATH = BASE_DIR / "../models/pose_landmarker_heavy.task" # <--- change model 

OUT_DIR = BASE_DIR / "../data/output" #<---- output dir
OUT_DIR.mkdir(parents=True, exist_ok=True)

# ----------------------------
# FILE TYPE DETECTION
# ----------------------------
INPUT_PATH = INPUT_PATH.resolve()
ext = INPUT_PATH.suffix.lower()

IMAGE_EXTS = {".jpg", ".jpeg", ".png"}
VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv"}

if ext in IMAGE_EXTS:
    MODE = "image"
elif ext in VIDEO_EXTS:
    MODE = "video"
else:
    raise ValueError(f"Unsupported file type: {ext}")

# ----------------------------
# OUTPUT PATHS
# ----------------------------
OUT_IMAGE = OUT_DIR / "annotated_image.png" #<---- name of annotated output image
OUT_VIDEO = OUT_DIR / "annotated_video.mp4" # <---- name of annotated output video
OUT_CSV   = OUT_DIR / "pose_landmarks.csv" # <----- name of ouput csv data with landmarks

# ----------------------------
# OS DETECTION + CODEC
# ----------------------------
system = platform.system()

# IMPORTANT: mp4v works on Windows/macOS/Linux
fourcc = cv2.VideoWriter_fourcc(*"mp4v")

# ----------------------------
# STEP 1: PoseLandmarker
# ----------------------------
if not MODEL_PATH.exists():
    raise FileNotFoundError(f"Model not found: {MODEL_PATH}")

base_options = python.BaseOptions(model_asset_path=str(MODEL_PATH))

if MODE == "image":
    options = vision.PoseLandmarkerOptions(
        base_options=base_options,
        running_mode=vision.RunningMode.IMAGE,
        output_segmentation_masks=False,
    )
else:
    options = vision.PoseLandmarkerOptions(
        base_options=base_options,
        running_mode=vision.RunningMode.VIDEO,
        output_segmentation_masks=False,
    )

detector = vision.PoseLandmarker.create_from_options(options)

# ----------------------------
# STEP 2: CSV writer
# ----------------------------
csv_file = open(OUT_CSV, "w", newline="")
csv_writer = csv.writer(csv_file)

csv_writer.writerow([
    "frame",
    "timestamp_ms",
    "landmark_id",
    "x_norm",
    "y_norm",
    "z_norm",
    "visibility",
    "x_px",
    "y_px",
])

# ----------------------------
# COLORS (RGB)
# ----------------------------
LANDMARK_COLOR = (0, 255, 0)
CONNECTION_COLOR = (255, 0, 0)

# ======================================================
# IMAGE MODE
# ======================================================
if MODE == "image":
    print("üñº Processing image...")

    frame = cv2.imread(str(INPUT_PATH))
    if frame is None:
        raise RuntimeError("Could not read image")

    height, width = frame.shape[:2]
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    mp_image = mp.Image(
        image_format=mp.ImageFormat.SRGB,
        data=frame_rgb
    )

    result = detector.detect(mp_image)
    annotated_rgb = frame_rgb.copy()

    if result.pose_landmarks:
        pose_landmarks = result.pose_landmarks[0]

        for lm_id, lm in enumerate(pose_landmarks):
            csv_writer.writerow([
                0, 0, lm_id,
                lm.x, lm.y, lm.z, lm.visibility,
                int(lm.x * width), int(lm.y * height)
            ])

        pose_proto = landmark_pb2.NormalizedLandmarkList()
        pose_proto.landmark.extend([
            landmark_pb2.NormalizedLandmark(
                x=lm.x, y=lm.y, z=lm.z
            ) for lm in pose_landmarks
        ])

        mp.solutions.drawing_utils.draw_landmarks(
            annotated_rgb,
            pose_proto,
            mp.solutions.pose.POSE_CONNECTIONS,
            landmark_drawing_spec=mp.solutions.drawing_utils.DrawingSpec(
                color=LANDMARK_COLOR, thickness=2, circle_radius=2
            ),
            connection_drawing_spec=mp.solutions.drawing_utils.DrawingSpec(
                color=CONNECTION_COLOR, thickness=2
            ),
        )

    annotated_bgr = cv2.cvtColor(annotated_rgb, cv2.COLOR_RGB2BGR)
    cv2.imwrite(str(OUT_IMAGE), annotated_bgr)

    output_path = OUT_IMAGE
    print("‚úÖ Image written:", OUT_IMAGE)

# ======================================================
# VIDEO MODE
# ======================================================
else:
    print("üé¨ Processing video...")

    cap = cv2.VideoCapture(str(INPUT_PATH))
    if not cap.isOpened():
        raise RuntimeError("Could not open video")

    width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps    = cap.get(cv2.CAP_PROP_FPS)
    if fps <= 0:
        fps = 30.0

    writer = cv2.VideoWriter(
        str(OUT_VIDEO),
        fourcc,
        fps,
        (width, height)
    )

    frame_idx = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        mp_image = mp.Image(
            image_format=mp.ImageFormat.SRGB,
            data=frame_rgb
        )

        timestamp_ms = int((frame_idx / fps) * 1000)
        result = detector.detect_for_video(mp_image, timestamp_ms)
        annotated_rgb = frame_rgb.copy()

        if result.pose_landmarks:
            pose_landmarks = result.pose_landmarks[0]

            for lm_id, lm in enumerate(pose_landmarks):
                csv_writer.writerow([
                    frame_idx, timestamp_ms, lm_id,
                    lm.x, lm.y, lm.z, lm.visibility,
                    int(lm.x * width), int(lm.y * height)
                ])

            pose_proto = landmark_pb2.NormalizedLandmarkList()
            pose_proto.landmark.extend([
                landmark_pb2.NormalizedLandmark(
                    x=lm.x, y=lm.y, z=lm.z
                ) for lm in pose_landmarks
            ])

            mp.solutions.drawing_utils.draw_landmarks(
                annotated_rgb,
                pose_proto,
                mp.solutions.pose.POSE_CONNECTIONS,
                landmark_drawing_spec=mp.solutions.drawing_utils.DrawingSpec(
                    color=LANDMARK_COLOR, thickness=2, circle_radius=2
                ),
                connection_drawing_spec=mp.solutions.drawing_utils.DrawingSpec(
                    color=CONNECTION_COLOR, thickness=2
                ),
            )

        annotated_bgr = cv2.cvtColor(annotated_rgb, cv2.COLOR_RGB2BGR)
        writer.write(annotated_bgr)

        if frame_idx % 30 == 0:
            print(f"\r‚è≥ Frame {frame_idx}", end="")

        frame_idx += 1

    cap.release()
    writer.release()

    output_path = OUT_VIDEO
    print("\n‚úÖ Video written:", OUT_VIDEO)

# ----------------------------
# CLEANUP
# ----------------------------
csv_file.close()

# ----------------------------
# AUTO OPEN OUTPUT (WORKS ON WINDOWS) - Disable if no output is desired
# ----------------------------
if output_path.exists():
    try:
        if system == "Windows":
            os.startfile(output_path)
        elif system == "Darwin":
            subprocess.run(["open", str(output_path)])
        elif system == "Linux":
            subprocess.run(["xdg-open", str(output_path)])
    except Exception as e:
        print("‚ö†Ô∏è Could not auto-open file:", e)
else:
    print("‚ö†Ô∏è Output file not found:", output_path)


I0000 00:00:1766169652.288339  111825 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 90.5), renderer: Apple M1 Max
W0000 00:00:1766169652.353844  352481 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1766169652.397496  352481 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


üé¨ Processing video...
‚è≥ Frame 180
‚úÖ Video written: /Users/marcbp/spiced_bootcamp/Capstone Project/GAITy-Capstone-Modeling/notebooks/../data/output/annotated_video.mp4
