# Align Gloss Annotations with Video Frames

In [None]:

This tutorial shows how to align time-stamped gloss annotations with extracted video frames.
It is a critical step for supervised training, where each frame needs to be labelled with the corresponding gloss.

### Objectives:
- Load gloss annotations with timestamps
- Map frames to glosses based on frame time
- Save an aligned metadata file


In [None]:
# Step 1: Import libraries
import pandas as pd
from pathlib import Path
import math


In [None]:
# Step 2: Load annotation and frame information

# Annotation CSV must have 'timestamp' (in seconds) and 'gloss'
annotations_path = Path('../data/annotations/sample_video_annotations.csv')
annotations = pd.read_csv(annotations_path)

# Assume fixed frame rate and ordered frame list
frame_dir = Path('../data/frames/sample_video')
frame_files = sorted(frame_dir.glob('*.jpg'))  # Ensure sequential order

frame_rate = 5  # frames per second (same used during extraction)
frame_timestamps = [i / frame_rate for i in range(len(frame_files))]

# Add a column for the corresponding gloss
aligned = pd.DataFrame({'frame_file': [f.name for f in frame_files], 'timestamp': frame_timestamps})
aligned['gloss'] = None


In [None]:
# Step 3: Align each frame with nearest annotation (based on timestamp)

for idx, row in annotations.iterrows():
    gloss_time = row['timestamp']
    gloss_label = row['gloss']
    nearest_frame_idx = min(range(len(frame_timestamps)), key=lambda i: abs(frame_timestamps[i] - gloss_time))
    aligned.at[nearest_frame_idx, 'gloss'] = gloss_label

# Forward fill glosses (optional for continuous gloss application)
aligned['gloss'].fillna(method='ffill', inplace=True)

aligned.head(10)


In [None]:
# Step 4: Save the aligned metadata

output_csv = Path('../data/processed_alignments/sample_video_gloss_aligned.csv')
output_csv.parent.mkdir(parents=True, exist_ok=True)
aligned.to_csv(output_csv, index=False)

print(f"Aligned annotation file saved to: {output_csv}")
