In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import joblib
from pathlib import Path

In [19]:
csv_path = Path('./data/dataset/ngt_dynamic_landmarks_18893.csv')
if not csv_path.exists():
    print(f"Error: Dataset not found at {csv_path}")
    print("Please run dynamic data collection script first!")
    exit(1)

df = pd.read_csv(csv_path)
print(f"\n✓ Loaded {len(df)} frames")


✓ Loaded 18893 frames


In [22]:
letters = sorted(df['letter'].unique())
samples_per_letter = df.groupby('letter')['sample_id'].nunique()
print(f"✓ Letters: {letters}")
print(f"\nSamples per letter:")
print(samples_per_letter)

✓ Letters: ['H', 'J', 'U', 'X', 'Z']

Samples per letter:
letter
H    30
J    60
U    60
X    60
Z    90
Name: sample_id, dtype: int64


In [20]:
# Strategy: Pad/truncate all sequences to same length
TARGET_FRAMES = 40  # Standard sequence length

def normalize_landmarks(coords):
    """
    Normalize a single frame of landmarks (21 x 3)
    Same normalization as static model
    """
    # Reshape to 21 landmarks x 3 coords
    coords = coords.reshape(21, 3)
    
    # Center on wrist
    wrist = coords[0]
    coords = coords - wrist
    
    # Scale by hand size
    hand_size = np.linalg.norm(coords[12] - coords[0])
    if hand_size > 0:
        coords = coords / hand_size
    
    return coords

def process_sequence(group):
    """
    Process one gesture sequence:
    1. Normalize each frame
    2. Pad or truncate to TARGET_FRAMES
    3. Return flattened sequence
    """
    # Sort frames in order
    frames = group.sort_values('frame')
    
    # Get landmark columns (exclude letter, sample_id, frame)
    landmark_cols = [col for col in frames.columns 
                     if col not in ['letter', 'sample_id', 'frame']]
    
    # Extract landmarks for each frame and normalize
    normalized_frames = []
    for _, row in frames.iterrows():
        landmarks = row[landmark_cols].values  # 63 values
        normalized = normalize_landmarks(landmarks)  # Normalize
        normalized_frames.append(normalized.flatten())  # Back to 63
    
    normalized_frames = np.array(normalized_frames)
    
    # Pad or truncate to TARGET_FRAMES
    num_frames = len(normalized_frames)
    
    if num_frames < TARGET_FRAMES:
        # PAD: Repeat last frame
        padding = np.repeat(normalized_frames[-1:], 
                           TARGET_FRAMES - num_frames, 
                           axis=0)
        normalized_frames = np.vstack([normalized_frames, padding])
        
    elif num_frames > TARGET_FRAMES:
        # TRUNCATE: Take first TARGET_FRAMES
        normalized_frames = normalized_frames[:TARGET_FRAMES]
    
    # Flatten to single vector: (40 frames × 63 coords = 2520 features)
    return normalized_frames.flatten()

In [23]:
X_list = []
y_list = []
sample_info = []  # Track (letter, sample_id) for each processed sequence

for (letter, sample_id), group in df.groupby(['letter', 'sample_id']):
    sequence = process_sequence(group)
    X_list.append(sequence)
    y_list.append(letter)
    sample_info.append((letter, sample_id))

X = np.array(X_list)
y = np.array(y_list)

print(f"✓ Processed {len(X)} sequences")
print(f"✓ Feature shape: {X.shape}")

✓ Processed 300 sequences
✓ Feature shape: (300, 2520)


In [24]:
train_indices = []
test_indices = []

# Split per letter to maintain balance
for letter in letters:
    # Get all sample indices for this letter
    letter_indices = [i for i, (l, _) in enumerate(sample_info) if l == letter]
    
    # Shuffle and split 80/20
    np.random.seed(42)
    shuffled = np.random.permutation(letter_indices)
    
    n_test = max(1, int(len(letter_indices) * 0.2))
    
    test_indices.extend(shuffled[:n_test])
    train_indices.extend(shuffled[n_test:])

# Create train/test sets
X_train = X[train_indices]
X_test = X[test_indices]
y_train = y[train_indices]
y_test = y[test_indices]

print(f"✓ Training gestures: {len(X_train)}")
print(f"✓ Testing gestures: {len(X_test)}")

# Verify no data leakage
train_sample_ids = set([sample_info[i] for i in train_indices])
test_sample_ids = set([sample_info[i] for i in test_indices])
overlap = train_sample_ids & test_sample_ids

if len(overlap) > 0:
    print(f"\n❌ ERROR: {len(overlap)} samples in both train and test!")
    print("This should never happen!")
else:
    print(f"\n✅ Verified: No gesture appears in both train and test")
    print(f"   Train has {len(train_sample_ids)} unique gestures")
    print(f"   Test has {len(test_sample_ids)} unique gestures")

✓ Training gestures: 240
✓ Testing gestures: 60

✅ Verified: No gesture appears in both train and test
   Train has 240 unique gestures
   Test has 60 unique gestures


In [25]:
clf = RandomForestClassifier(
    n_estimators=50,         # Number of trees
    max_depth=15,            # Max depth (slightly deeper for sequences)
    min_samples_split=8,     # Min samples to split
    min_samples_leaf=3,      # Min samples in leaf
    max_features='sqrt',     # Random feature subset
    class_weight='balanced', # Handle class imbalance
    random_state=42,
    n_jobs=-1
)

clf.fit(X_train, y_train)
print("Training complete!")

Training complete!


In [26]:
train_accuracy = clf.score(X_train, y_train)
test_accuracy = clf.score(X_test, y_test)
print(f"Training: {train_accuracy*100:.2f}%")
print(f"Testing:  {test_accuracy*100:.2f}%")

Training: 100.00%
Testing:  100.00%


In [27]:
model_path = Path('./models/dynamic_model_clf.pkl')
model_path.parent.mkdir(exist_ok=True, parents=True)
joblib.dump(clf, model_path)

['models\\dynamic_model_clf.pkl']

In [28]:
metadata = {
    'target_frames': 40,
    'letters': ['H', 'J', 'U', 'X', 'Z'],
    'normalization': 'wrist_centered_scaled',
    'feature_size': 2520,
    'train_samples': 120,
    'test_samples': 30,
    'test_accuracy': 0.95
}
joblib.dump(metadata, './models/dynamic_metadata.pkl')

['./models/dynamic_metadata.pkl']