In [None]:
import os
import numpy as np
import torch

# Define paths
DATA_PATH = "../data/keypoints/"
CONFIG_URL = "https://raw.githubusercontent.com/AI4Bharat/OpenHands/main/examples/configs/autsl/decoupled_gcn.yaml"
CONFIG_PATH = "../config/autsl_decoupled_gcn.yaml"
CHECKPOINT_PATH = "../checkpoints/autsl/sl_gcn/epoch=72-step=64239.ckpt"


  from pkg_resources import DistributionNotFound, get_distribution

A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.6 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/teamspace/studios/this_studio/fsl-bisindo/.venv/lib/python3.11/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/teamspace/studios/this_studio/fsl-bisindo/.venv/lib/python3.11/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start()
  File "/teamspace/studios/th

In [None]:
# Download config if not exists
if not os.path.exists(CONFIG_PATH):
    !wget -O {CONFIG_PATH} {CONFIG_URL}
    print(f"Downloaded config to {CONFIG_PATH}")

In [None]:
!mkdir -p ../checkpoints

!wget -P ../checkpoints https://github.com/AI4Bharat/OpenHands/releases/download/checkpoints_vl/autsl_slgcn.zip
!unzip ../checkpoints/autsl_slgcn.zip -d ../checkpoints


--2025-12-14 17:54:53--  https://github.com/AI4Bharat/OpenHands/releases/download/checkpoints_vl/autsl_slgcn.zip
Resolving github.com (github.com)... 140.82.113.4
Connecting to github.com (github.com)|140.82.113.4|:443... connected.
HTTP request sent, awaiting response... 404 Not Found
2025-12-14 17:54:54 ERROR 404: Not Found.

unzip:  cannot find or open ../checkpoints/autsl_slgen.zip, ../checkpoints/autsl_slgen.zip.zip or ../checkpoints/autsl_slgen.zip.ZIP.


In [21]:
!mkdir -p ../data/AUTSL
!wget -P ../data/AUTSL https://github.com/AI4Bharat/OpenHands/releases/download/checkpoints_v1/autsl_metadata.zip
!unzip ../data/AUTSL/autsl_metadata.zip -d ../data/AUTSL
!mv ../data/AUTSL/AUTSL ../data/AUTSL/metadata

--2025-12-14 18:31:35--  https://github.com/AI4Bharat/OpenHands/releases/download/checkpoints_v1/autsl_metadata.zip
Resolving github.com (github.com)... 140.82.114.4
Connecting to github.com (github.com)|140.82.114.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://release-assets.githubusercontent.com/github-production-release-asset/369090740/252abc9b-bb2d-43f8-8520-27a75c9f7c26?sp=r&sv=2018-11-09&sr=b&spr=https&se=2025-12-14T19%3A09%3A00Z&rscd=attachment%3B+filename%3Dautsl_metadata.zip&rsct=application%2Foctet-stream&skoid=96c2d410-5711-43a1-aedd-ab1947aa7ab0&sktid=398a6654-997b-47e9-b12b-9515b896b4de&skt=2025-12-14T18%3A08%3A50Z&ske=2025-12-14T19%3A09%3A00Z&sks=b&skv=2018-11-09&sig=yKcquTuIYQjtR1x37I%2Bem6Giq5FXRF%2FOwrp8TMAoXo4%3D&jwt=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmVsZWFzZS1hc3NldHMuZ2l0aHVidXNlcmNvbnRlbnQuY29tIiwia2V5Ijoia2V5MSIsImV4cCI6MTc2NTczNzMzMywibmJmIjoxNzY1NzM3MDMzLCJwYXRoIjoicmVsZWFzZWFzc2V0

In [None]:
!curl -L -o ../data/AUTSL/AUTSL.zip "https://zenodo.org/records/6674324/files/AUTSL.zip?download=1"
!unzip ../data/AUTSL/AUTSL.zip -d ../data/AUTSL -q

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1411M  100 1411M    0     0  20.1M      0  0:01:10  0:01:10 --:--:-- 20.7M


In [22]:
!rm ../data/AUTSL/**.zip

## Data Preprocessing

The pretrained SL-GCN model on AUTSL uses a specific set of 27 keypoints (Body + Hands).
However, the `mediapipe_extract.py` script typically extracts keypoints in the order: `Body (33) + Face (468) + Left Hand (21) + Right Hand (21)`.

We need to:
1.  Reorder the data to `Body + Left Hand + Right Hand`.
2.  Select the specific 27 keypoints used by the model.
3.  Normalize the keypoints.

In [None]:
def reorder_and_select_keypoints(data):
    """
    Reorders data from [Body, Face, LH, RH] to [Body, LH, RH] and selects 27 keypoints.
    """
    # Indices for the 27 keypoints (from OpenHands 'mediapipe_holistic_minimal_27' preset)
    # These indices assume the data is ordered as [Body(33), LH(21), RH(21)]
    MINIMAL_27_INDICES = [
        0, 2, 5, 11, 12, 13, 14, 33, 37, 38, 41, 42, 45, 46, 49, 50, 53, 54,
        58, 59, 62, 63, 66, 67, 70, 71, 74
    ]

    # Data shape: (T, 543, 3)
    # 0-32: Body
    # 33-500: Face
    # 501-521: Left Hand
    # 522-542: Right Hand

    body = data[:, :33, :]
    lh = data[:, 501:522, :]
    rh = data[:, 522:543, :]

    # Concatenate to form [Body, LH, RH] (T, 75, 3)
    combined = np.concatenate([body, lh, rh], axis=1)

    # Debug: Print shapes
    print(f"Body shape: {body.shape}")
    print(f"LH shape: {lh.shape}")
    print(f"RH shape: {rh.shape}")
    print(f"Combined shape: {combined.shape}")
    print(f"Number of indices to select: {len(MINIMAL_27_INDICES)}")
    print(f"Max index: {max(MINIMAL_27_INDICES)}, Combined V dim: {combined.shape[1]}")

    # Select the 27 keypoints
    selected = combined[:, MINIMAL_27_INDICES, :]
    print(f"Selected shape: {selected.shape}")

    return selected


from openhands.datasets.pose_transforms import PoseSelect, CenterAndScaleNormalize
import omegaconf
from openhands.apis.inference import InferenceModel

def preprocess_keypoints(file_path):
    # Load data
    data = np.load(file_path) # Shape: (T, 543, 3)

    # Fix: Ensure data has 3 channels (X, Y, Z).
    # The model expects 3 channels (81 elements / 27 keypoints = 3).
    # If the input only has X, Y, we pad with Z=0.
    if data.shape[-1] == 2:
        print(f"Input data has 2 channels. Padding with Z=0 to match model expectation (3 channels).")
        zeros = np.zeros((data.shape[0], data.shape[1], 1))
        data = np.concatenate([data, zeros], axis=-1)

    # 1. Convert 543 (Body+Face+Hands) -> 75 (Body+Hands)
    # The preset 'mediapipe_holistic_minimal_27' assumes indices based on this 75-point layout
    # 0-32: Body, 33-53: Left Hand, 54-74: Right Hand
    body = data[:, :33, :]
    lh = data[:, 501:522, :]
    rh = data[:, 522:543, :]
    data_75 = np.concatenate([body, lh, rh], axis=1) # (T, 75, 3)

    # 2. Convert to Tensor (C, T, V)
    # OpenHands transforms expect (C, T, V)
    tensor_data = torch.tensor(data_75, dtype=torch.float32).permute(2, 0, 1)
    tensor_data = tensor_data[:2, :, :]
    sample = {"frames": tensor_data}

    # 3. Apply OpenHands Transforms using Presets

    # Select 27 keypoints using the preset
    pose_select = PoseSelect(preset="mediapipe_holistic_minimal_27")
    sample = pose_select(sample)

    # Normalize using the shoulder preset
    normalizer = CenterAndScaleNormalize(reference_points_preset="shoulder_mediapipe_holistic_minimal_27")



    # Apply normalization
    sample = normalizer(sample)

    return sample["frames"] # Returns (C, T, V)

## Load Model and Extract Features

We will load the model using the config and checkpoint, then run the encoder part to get the features.

In [3]:
def load_model(config_path, checkpoint_path):
    cfg = omegaconf.OmegaConf.load(config_path)

    # Ensure the config points to the checkpoint
    cfg.pretrained = checkpoint_path

    # Initialize model
    # We use InferenceModel wrapper to handle loading easily
    inference_model = InferenceModel(cfg=cfg)
    inference_model.init_from_checkpoint_if_available()

    # Return the underlying encoder
    return inference_model.model.encoder


In [8]:
# Example usage
# Note: Ensure you have the checkpoint file at CHECKPOINT_PATH
if os.path.exists(CHECKPOINT_PATH):
    encoder = load_model(CONFIG_PATH, CHECKPOINT_PATH)
    encoder.eval()
    print("Model loaded successfully.")

    # Process a sample file
    # Find a sample file
    sample_file = None
    for f in os.listdir(DATA_PATH):
        if f.endswith(".npy"):
            sample_file = os.path.join(DATA_PATH, f)
            break

    if sample_file:
        print(f"Processing {sample_file}...")
        processed_data = preprocess_keypoints(sample_file) # (C, T, V)

        # Add batch dimension: (1, C, T, V)
        input_tensor = processed_data.unsqueeze(0)

        # check shape
        print(f"Input tensor shape: {input_tensor.shape}")

        # Run inference
        with torch.no_grad():
            features = encoder(input_tensor)

        print(f"Features shape: {features.shape}")
        # Expected shape: (1, 256) or similar depending on n_out_features
    else:
        print("No .npy files found in data directory.")
else:
    print(f"Checkpoint not found at {CHECKPOINT_PATH}. Please download the pretrained model.")

Found 226 classes in ['train'] splits
Loading checkpoint from: ../checkpoints/autsl/sl_gcn/epoch=72-step=64239.ckpt
Model loaded successfully.
Processing ../data/keypoints/signer4_label23_sample9.npy...
Input tensor shape: torch.Size([1, 2, 50, 27])
Features shape: torch.Size([1, 256])


# Batch Feature Extraction