In [1]:
import os
import cv2
import json
from tqdm import tqdm
import mediapipe as mp

2025-01-30 07:52:12.982796: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-30 07:52:13.051230: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1738223533.082522   30905 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1738223533.093458   30905 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-30 07:52:13.153765: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [2]:
# Mediapipe Pose setup for body and hands
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=True, min_detection_confidence=0.5, min_tracking_confidence=0.5)

mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=True, max_num_hands=2, min_detection_confidence=0.5)




















INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [3]:
# Initialize output paths
IMAGE_PATH = '../dataset/augmented_images'  # Path to your dataset
OUTPUT_PATH = '../dataset/keypoints'  # Path to save JSON files

In [4]:
def create_dir(directory_path):
    """Creates a directory if it does not already exist."""
    os.makedirs(directory_path, exist_ok=True)

In [5]:
def validate_json(output_path, label):
    """
    Validates the JSON file for the given label and prints the total number of images.
    
    Args:
        output_path (str): Path to the folder containing JSON files.
        label (str): Label of the folder (e.g., 'A', '1').
    """
    json_path = os.path.join(output_path, f"{label}.json")
    
    # Check if the file exists
    if os.path.exists(json_path):
        with open(json_path, 'r') as json_file:
            data = json.load(json_file)
            total_images = len(data)
            print(f"Total images written for label {label}: {total_images}")
    else:
        print(f"JSON file not found for label {label}: {json_path}")

In [6]:
def extract_body_keypoints(image_path):
    """
    Extracts keypoints for the body from the image.
    
    Args:
        image_path (str): Path to the image.
    
    Returns:
        list: A list of keypoints (33 keypoints for full body pose).
    """
    image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = pose.process(image_rgb)

    keypoints_data = []

    if results.pose_landmarks:
        for landmark in results.pose_landmarks.landmark:
            # Append each x, y, z for each keypoint (normalized coordinates)
            keypoints_data.append({"x": landmark.x, "y": landmark.y, "z": landmark.z})

    # Ensure we have 33 keypoints (if any keypoint is missing, it is padded with zeros)
    if len(keypoints_data) < 33:
        keypoints_data.extend([{"x": 0.0, "y": 0.0, "z": 0.0}] * (33 - len(keypoints_data)))  # Pad with zeros if less than 33 keypoints

    return keypoints_data

W0000 00:00:1738223537.591102   31007 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


In [7]:
def extract_hand_keypoints(image_path):
    """
    Extracts keypoints for the hands from the image.
    
    Args:
        image_path (str): Path to the image.
    
    Returns:
        list: A list of keypoints (21 keypoints per hand, up to two hands).
    """
    image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = hands.process(image_rgb)

    keypoints_data = []

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            for lm in hand_landmarks.landmark:
                keypoints_data.append({"x": lm.x, "y": lm.y, "z": lm.z})  # Append each x, y, z for each keypoint

    # Ensure we have 42 keypoints (21 per hand, up to two hands detected)
    if len(keypoints_data) < 42:
        keypoints_data.extend([{"x": 0.0, "y": 0.0, "z": 0.0}] * (42 - len(keypoints_data)))  # Pad with zeros if less than 42 keypoints

    return keypoints_data

In [8]:
def process_images_with_validation(base_path, output_path):
    """
    Processes images to extract body and hand keypoints, then saves them as JSON files.
    
    Args:
        base_path (str): Path to the base folder containing images.
        output_path (str): Path to the folder to save JSON files.
    """
    create_dir(output_path)

    # Label map for directories (you can customize this if needed)
    labels = list("123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ")

    # Initialize a dictionary to hold data for each label
    label_data = {label: [] for label in labels}

    for label in tqdm(labels, desc="Processing labels"):
        input_folder = os.path.join(base_path, label)

        if not os.path.exists(input_folder):
            print(f"Folder not found: {input_folder}")
            continue

        for filename in os.listdir(input_folder):
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                image_path = os.path.join(input_folder, filename)

                # Extract keypoints for both body and hands
                body_keypoints = extract_body_keypoints(image_path)
                hand_keypoints = extract_hand_keypoints(image_path)

                # Create the formatted dictionary for the image
                image_data = {
                    "image_name": filename,
                    "keypoints": {
                        "body": body_keypoints,  # Store body keypoints
                        "hands": hand_keypoints   # Store hand keypoints (empty if no hands detected)
                    }
                }

                # Append the data to the label's list
                label_data[label].append(image_data)

        # Save the accumulated data for the label to a JSON file
        json_path = os.path.join(output_path, f"{label}.json")
        with open(json_path, 'w') as json_file:
            json.dump(label_data[label], json_file, indent=4)
        
        # Validate and print total images written
        validate_json(output_path, label)

W0000 00:00:1738223537.619681   31007 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


In [9]:
if __name__ == "__main__":
    process_images_with_validation(IMAGE_PATH, OUTPUT_PATH)
    print("Keypoint extraction completed.")

Processing labels:   0%|          | 0/35 [00:00<?, ?it/s]W0000 00:00:1738223537.639028   30991 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1738223537.678774   30998 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1738223537.725648   31001 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.
Processing labels:   3%|▎         | 1/35 [05:00<2:50:10, 300.31s/it]

Total images written for label 1: 5185


Processing labels:   6%|▌         | 2/35 [10:00<2:45:04, 300.13s/it]

Total images written for label 2: 5000


Processing labels:   9%|▊         | 3/35 [14:51<2:37:56, 296.14s/it]

Total images written for label 3: 5000


Processing labels:  11%|█▏        | 4/35 [19:49<2:33:22, 296.86s/it]

Total images written for label 4: 5000


Processing labels:  14%|█▍        | 5/35 [24:41<2:27:35, 295.19s/it]

Total images written for label 5: 5000


Processing labels:  17%|█▋        | 6/35 [29:29<2:21:25, 292.59s/it]

Total images written for label 6: 5000


Processing labels:  20%|██        | 7/35 [34:14<2:15:27, 290.27s/it]

Total images written for label 7: 5000


Processing labels:  23%|██▎       | 8/35 [39:04<2:10:34, 290.18s/it]

Total images written for label 8: 5000


Processing labels:  26%|██▌       | 9/35 [43:51<2:05:16, 289.11s/it]

Total images written for label 9: 5000


Processing labels:  29%|██▊       | 10/35 [49:26<2:06:21, 303.25s/it]

Total images written for label A: 5000


Processing labels:  31%|███▏      | 11/35 [55:00<2:05:05, 312.71s/it]

Total images written for label B: 5000


Processing labels:  34%|███▍      | 12/35 [59:49<1:57:01, 305.30s/it]

Total images written for label C: 5000


Processing labels:  37%|███▋      | 13/35 [1:05:13<1:54:00, 310.94s/it]

Total images written for label D: 5000


Processing labels:  40%|████      | 14/35 [1:10:39<1:50:30, 315.74s/it]

Total images written for label E: 5000


Processing labels:  43%|████▎     | 15/35 [1:15:46<1:44:17, 312.88s/it]

Total images written for label F: 5000


Processing labels:  46%|████▌     | 16/35 [1:20:58<1:39:00, 312.68s/it]

Total images written for label G: 5000


Processing labels:  49%|████▊     | 17/35 [1:26:17<1:34:25, 314.76s/it]

Total images written for label H: 5000


Processing labels:  51%|█████▏    | 18/35 [1:31:05<1:26:50, 306.52s/it]

Total images written for label I: 5000


Processing labels:  54%|█████▍    | 19/35 [1:36:38<1:23:52, 314.51s/it]

Total images written for label J: 5000


Processing labels:  57%|█████▋    | 20/35 [1:41:59<1:19:08, 316.54s/it]

Total images written for label K: 5000


Processing labels:  60%|██████    | 21/35 [1:46:54<1:12:20, 310.04s/it]

Total images written for label L: 5000


Processing labels:  63%|██████▎   | 22/35 [1:52:13<1:07:45, 312.77s/it]

Total images written for label M: 5000


Processing labels:  66%|██████▌   | 23/35 [1:57:35<1:03:05, 315.48s/it]

Total images written for label N: 5000


Processing labels:  69%|██████▊   | 24/35 [2:02:26<56:29, 308.11s/it]  

Total images written for label O: 5000


Processing labels:  71%|███████▏  | 25/35 [2:07:48<52:03, 312.31s/it]

Total images written for label P: 5000


Processing labels:  74%|███████▍  | 26/35 [2:13:07<47:09, 314.36s/it]

Total images written for label Q: 5000


Processing labels:  77%|███████▋  | 27/35 [2:18:22<41:56, 314.58s/it]

Total images written for label R: 5000


Processing labels:  80%|████████  | 28/35 [2:23:29<36:25, 312.19s/it]

Total images written for label S: 5000


Processing labels:  83%|████████▎ | 29/35 [2:28:47<31:24, 314.03s/it]

Total images written for label T: 5000


Processing labels:  86%|████████▌ | 30/35 [2:33:36<25:32, 306.44s/it]

Total images written for label U: 5000


Processing labels:  89%|████████▊ | 31/35 [2:38:24<20:03, 300.87s/it]

Total images written for label V: 5000


Processing labels:  91%|█████████▏| 32/35 [2:43:33<15:10, 303.41s/it]

Total images written for label W: 5000


Processing labels:  94%|█████████▍| 33/35 [2:48:52<10:16, 308.02s/it]

Total images written for label X: 5000


Processing labels:  97%|█████████▋| 34/35 [2:54:11<05:11, 311.27s/it]

Total images written for label Y: 5000


Processing labels: 100%|██████████| 35/35 [2:59:20<00:00, 307.43s/it]

Total images written for label Z: 5000





Keypoint extraction completed.
