In [1]:
import cv2
import mediapipe as mp
import csv
import os
from datetime import datetime
from pathlib import Path
import cv2
import mediapipe as mp
import csv
import os
import numpy as np


In [None]:
# Configuration
STATIC_LETTERS = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'I', 'K', 'L', 'M', 
                  'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']
DYNAMIC_LETTERS = ['H', 'J', 'U', 'X', 'Z']
CONFUSED_LETTERS = ['M', 'N']
SAMPLES_PER_LETTER = 50

# Start with static letters
LETTERS_TO_COLLECT = STATIC_LETTERS
csv_path = Path('./data/dataset/ngt_static_landmarks.csv')
csv_path.parent.mkdir(parents=True, exist_ok=True)



In [None]:
# Initialize MediaPipe
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

# Create CSV with header if it doesn't exist
if not os.path.exists(csv_path):
    with open(csv_path, 'w', newline='') as f:
        writer = csv.writer(f)
        # Header: letter, then 63 landmark coordinates (21 points x 3 coords)
        header = ['letter'] + [f'{axis}{i}' for i in range(21) for axis in ['x', 'y', 'z']]
        writer.writerow(header)

# State
current_letter_idx = 0
current_letter = LETTERS_TO_COLLECT[current_letter_idx]
samples_this_letter = 0

print(f"Starting data collection for {len(LETTERS_TO_COLLECT)} letters")
print(f"Target: {SAMPLES_PER_LETTER} samples per letter")
print(f"\nControls:")
print("  SPACE - Save current hand pose")
print("  N - Next letter (or auto-advances at {SAMPLES_PER_LETTER} samples)")
print("  Q - Quit\n")

cap = cv2.VideoCapture(0)

with mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=1,
    min_detection_confidence=0.7,
    min_tracking_confidence=0.7
) as hands:
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print("Failed to grab frame")
            break
        
        # Flip for mirror view
        frame = cv2.flip(frame, 1)
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Detect hands
        results = hands.process(rgb_frame)
        
        # Draw landmarks if detected
        hand_detected = False
        if results.multi_hand_landmarks:
            hand_detected = True
            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(
                    frame,
                    hand_landmarks,
                    mp_hands.HAND_CONNECTIONS
                )
        
        # Display info
        progress = f"{samples_this_letter}/{SAMPLES_PER_LETTER}"
        letter_progress = f"{current_letter_idx + 1}/{len(LETTERS_TO_COLLECT)}"
        
        cv2.putText(frame, f"Letter: {current_letter}", 
                   (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 255, 0), 3)
        cv2.putText(frame, f"Samples: {progress}", 
                   (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        cv2.putText(frame, f"Progress: {letter_progress}", 
                   (10, 130), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        
        # Status indicator
        if hand_detected:
            cv2.putText(frame, "Hand detected - Press SPACE", 
                       (10, frame.shape[0] - 20), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
        else:
            cv2.putText(frame, "No hand detected", 
                       (10, frame.shape[0] - 20), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
        
        cv2.imshow('NGT Data Collection', frame)
        
        key = cv2.waitKey(1) & 0xFF
        
        # Save sample on SPACE
        if key == ord(' ') and results.multi_hand_landmarks:
            landmarks = results.multi_hand_landmarks[0].landmark
            
            # Extract coordinates
            landmark_coords = []
            for lm in landmarks:
                landmark_coords.extend([lm.x, lm.y, lm.z])
            
            # Save to CSV
            with open(csv_path, 'a', newline='') as f:
                writer = csv.writer(f)
                writer.writerow([current_letter] + landmark_coords)
            
            samples_this_letter += 1
            print(f"‚úì Saved sample {samples_this_letter}/{SAMPLES_PER_LETTER} for letter '{current_letter}'")
            
            # Auto-advance when target reached
            if samples_this_letter >= SAMPLES_PER_LETTER:
                current_letter_idx += 1
                if current_letter_idx >= len(LETTERS_TO_COLLECT):
                    print("\nüéâ All letters collected!")
                    break
                current_letter = LETTERS_TO_COLLECT[current_letter_idx]
                samples_this_letter = 0
                print(f"\n‚Üí Moving to letter '{current_letter}'")
        
        # Manual next letter on N
        elif key == ord('n'):
            if samples_this_letter < SAMPLES_PER_LETTER:
                print(f"‚ö† Only {samples_this_letter}/{SAMPLES_PER_LETTER} samples collected for '{current_letter}'")
            current_letter_idx += 1
            if current_letter_idx >= len(LETTERS_TO_COLLECT):
                print("\nüéâ All letters collected!")
                break
            current_letter = LETTERS_TO_COLLECT[current_letter_idx]
            samples_this_letter = 0
            print(f"\n‚Üí Skipping to letter '{current_letter}'")
        
        # Quit on Q
        elif key == ord('q'):
            print("\n‚èπ Stopped by user")
            break
    

cap.release()
cv2.destroyAllWindows()

print(f"\nData saved to: {csv_path}")
print(f"Collected {current_letter_idx}/{len(LETTERS_TO_COLLECT)} letters")

# Show summary
import pandas as pd
df = pd.read_csv(csv_path)
print(f"\nDataset summary:")
print(df['letter'].value_counts().sort_index())

In [None]:


# Configuration
DYNAMIC_LETTERS = ['H', 'J', 'U', 'X', 'Z']
SAMPLES_PER_LETTER = 30
FRAMES_PER_GESTURE = 30  # Record 30 frames per gesture (~1 second at 30fps)

csv_path = Path('./data/dataset/ngt_dynamic_landmarks.csv')
csv_path.parent.mkdir(exist_ok=True, parents=True)



In [None]:
# Initialize MediaPipe
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

# Create CSV with header if it doesn't exist
if not os.path.exists(csv_path):
    with open(csv_path, 'w', newline='') as f:
        writer = csv.writer(f)
        # Header: letter, sample_id, frame_num, then 63 landmark coordinates
        header = ['letter', 'sample_id', 'frame'] + [f'{axis}{i}' for i in range(21) for axis in ['x', 'y', 'z']]
        writer.writerow(header)

# State
current_letter_idx = 0
current_letter = DYNAMIC_LETTERS[current_letter_idx]
samples_this_letter = 0
recording = False
frame_buffer = []
sample_counter = 0

print(f"Starting DYNAMIC data collection for {len(DYNAMIC_LETTERS)} letters")
print(f"Target: {SAMPLES_PER_LETTER} samples per letter")
print(f"Each sample captures {FRAMES_PER_GESTURE} frames\n")
print(f"Controls:")
print("  SPACE - Start/stop recording gesture")
print("  N - Next letter")
print("  Q - Quit\n")

cap = cv2.VideoCapture(0)

with mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=1,
    min_detection_confidence=0.7,
    min_tracking_confidence=0.7
) as hands:
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print("Failed to grab frame")
            break
        
        frame = cv2.flip(frame, 1)
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
        
        results = hands.process(rgb_frame)
        
        # Draw landmarks
        hand_detected = False
        if results.multi_hand_landmarks:
            hand_detected = True
            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(
                    frame,
                    hand_landmarks,
                    mp_hands.HAND_CONNECTIONS
                )
                
                # If recording, save frame to buffer
                if recording:
                    landmark_coords = []
                    for lm in hand_landmarks.landmark:
                        landmark_coords.extend([lm.x, lm.y, lm.z])
                    frame_buffer.append(landmark_coords)
        
        # Display info
        progress = f"{samples_this_letter}/{SAMPLES_PER_LETTER}"
        letter_progress = f"{current_letter_idx + 1}/{len(DYNAMIC_LETTERS)}"
        
        # Recording indicator
        if recording:
            cv2.rectangle(frame, (0, 0), (frame.shape[1], frame.shape[0]), (0, 0, 255), 10)
            cv2.putText(frame, f"RECORDING: {len(frame_buffer)}/{FRAMES_PER_GESTURE}", 
                       (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 3)
        else:
            cv2.putText(frame, f"Letter: {current_letter} (DYNAMIC)", 
                       (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 255, 0), 3)
        
        cv2.putText(frame, f"Samples: {progress}", 
                   (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        cv2.putText(frame, f"Progress: {letter_progress}", 
                   (10, 130), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        
        if not recording:
            if hand_detected:
                cv2.putText(frame, "Press SPACE to start recording", 
                           (10, frame.shape[0] - 20), 
                           cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
            else:
                cv2.putText(frame, "No hand detected", 
                           (10, frame.shape[0] - 20), 
                           cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
        
        cv2.imshow('NGT Dynamic Data Collection', frame)
        
        # Check if we've recorded enough frames
        if recording and len(frame_buffer) >= FRAMES_PER_GESTURE:
            # Save the sequence
            with open(csv_path, 'a', newline='') as f:
                writer = csv.writer(f)
                for frame_num, coords in enumerate(frame_buffer):
                    writer.writerow([current_letter, sample_counter, frame_num] + coords)
            
            samples_this_letter += 1
            sample_counter += 1
            print(f"‚úì Saved sample {samples_this_letter}/{SAMPLES_PER_LETTER} for letter '{current_letter}' ({FRAMES_PER_GESTURE} frames)")
            
            # Reset
            recording = False
            frame_buffer = []
            
            # Auto-advance when target reached
            if samples_this_letter >= SAMPLES_PER_LETTER:
                current_letter_idx += 1
                if current_letter_idx >= len(DYNAMIC_LETTERS):
                    print("\nüéâ All dynamic letters collected!")
                    break
                current_letter = DYNAMIC_LETTERS[current_letter_idx]
                samples_this_letter = 0
                print(f"\n‚Üí Moving to letter '{current_letter}'")
        
        key = cv2.waitKey(1) & 0xFF
        
        # Toggle recording on SPACE
        if key == ord(' '):
            if not recording:
                recording = True
                frame_buffer = []
                print(f"üî¥ Recording '{current_letter}'...")
            else:
                # Manual stop
                recording = False
                frame_buffer = []
                print("‚èπ Recording cancelled")
        
        # Next letter on N
        elif key == ord('n'):
            if samples_this_letter < SAMPLES_PER_LETTER:
                print(f"‚ö† Only {samples_this_letter}/{SAMPLES_PER_LETTER} samples for '{current_letter}'")
            current_letter_idx += 1
            if current_letter_idx >= len(DYNAMIC_LETTERS):
                print("\nAll dynamic letters collected!")
                break
            current_letter = DYNAMIC_LETTERS[current_letter_idx]
            samples_this_letter = 0
            recording = False
            frame_buffer = []
            print(f"\n‚Üí Skipping to letter '{current_letter}'")
        
        # Quit on Q
        elif key == ord('q'):
            print("\nStopped by user")
            break

cap.release()
cv2.destroyAllWindows()

print(f"\nData saved to: {csv_path}")
print(f"Collected {current_letter_idx}/{len(DYNAMIC_LETTERS)} letters")

# Show summary
import pandas as pd
df = pd.read_csv(csv_path)
print(f"\nDataset summary:")
print(df.groupby('letter')['sample_id'].nunique())

Starting DYNAMIC data collection for 5 letters
Target: 30 samples per letter
Each sample captures 30 frames

Controls:
  SPACE - Start/stop recording gesture
  N - Next letter
  Q - Quit



I0000 00:00:1768890707.311040  742127 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 90.5), renderer: Apple M2
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1768890707.337811  742703 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1768890707.347186  742703 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1768890707.943388  742709 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


üî¥ Recording 'H'...
‚úì Saved sample 1/30 for letter 'H' (30 frames)
üî¥ Recording 'H'...
‚úì Saved sample 2/30 for letter 'H' (30 frames)
üî¥ Recording 'H'...
‚úì Saved sample 3/30 for letter 'H' (30 frames)
üî¥ Recording 'H'...
‚úì Saved sample 4/30 for letter 'H' (30 frames)
üî¥ Recording 'H'...
‚úì Saved sample 5/30 for letter 'H' (30 frames)
üî¥ Recording 'H'...
‚úì Saved sample 6/30 for letter 'H' (30 frames)
üî¥ Recording 'H'...
‚úì Saved sample 7/30 for letter 'H' (30 frames)
üî¥ Recording 'H'...
‚úì Saved sample 8/30 for letter 'H' (30 frames)
üî¥ Recording 'H'...
‚úì Saved sample 9/30 for letter 'H' (30 frames)
üî¥ Recording 'H'...
‚úì Saved sample 10/30 for letter 'H' (30 frames)
üî¥ Recording 'H'...
‚úì Saved sample 11/30 for letter 'H' (30 frames)
üî¥ Recording 'H'...
‚úì Saved sample 12/30 for letter 'H' (30 frames)
üî¥ Recording 'H'...
‚úì Saved sample 13/30 for letter 'H' (30 frames)
üî¥ Recording 'H'...
‚úì Saved sample 14/30 for letter 'H' (30 frames)
