In [10]:
import cv2 as cv
import mediapipe as mp
import csv
import copy
import itertools
from pathlib import Path

In [2]:
def calc_landmark_list(image, landmarks):
    """Extract landmark coordinates."""
    image_width, image_height = image.shape[1], image.shape[0]
    landmark_point = []
    for landmark in landmarks.landmark:
        landmark_x = min(int(landmark.x * image_width), image_width - 1)
        landmark_y = min(int(landmark.y * image_height), image_height - 1)
        landmark_point.append([landmark_x, landmark_y])
    return landmark_point

def pre_process_landmark(landmark_list):
    """Normalize landmarks to relative coordinates."""
    temp_landmark_list = copy.deepcopy(landmark_list)
    
    # Convert to relative coordinates
    base_x, base_y = temp_landmark_list[0][0], temp_landmark_list[0][1]
    for index, landmark_point in enumerate(temp_landmark_list):
        temp_landmark_list[index][0] = temp_landmark_list[index][0] - base_x
        temp_landmark_list[index][1] = temp_landmark_list[index][1] - base_y
    
    # Flatten
    temp_landmark_list = list(itertools.chain.from_iterable(temp_landmark_list))
    
    # Normalization
    max_value = max(list(map(abs, temp_landmark_list)))
    temp_landmark_list = list(map(lambda n: n / max_value, temp_landmark_list))
    
    return temp_landmark_list



In [3]:
def draw_remaining_letters(image, collection_counts, target):
    """Draw list of letters that still need collection at bottom of screen."""
    y_start = image.shape[0] - 100  # Bottom of screen
    cv.rectangle(image, (10, y_start), (image.shape[1] - 10, image.shape[0] - 10), (0, 0, 0), -1)
    
    text = "Remaining: "
    for letter in STATIC_LETTERS:
        if collection_counts[letter] < target:
            text += f"{letter}({collection_counts[letter]}) "
    
    cv.putText(image, text, (20, y_start + 30),
               cv.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
    
    return image

In [4]:
def draw_landmarks(image, landmark_point):
    """Draw hand skeleton from original app.py"""
    if len(landmark_point) > 0:
        # Thumb
        cv.line(image, tuple(landmark_point[2]), tuple(landmark_point[3]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[2]), tuple(landmark_point[3]), (255, 255, 255), 2)
        cv.line(image, tuple(landmark_point[3]), tuple(landmark_point[4]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[3]), tuple(landmark_point[4]), (255, 255, 255), 2)

        # Index finger
        cv.line(image, tuple(landmark_point[5]), tuple(landmark_point[6]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[5]), tuple(landmark_point[6]), (255, 255, 255), 2)
        cv.line(image, tuple(landmark_point[6]), tuple(landmark_point[7]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[6]), tuple(landmark_point[7]), (255, 255, 255), 2)
        cv.line(image, tuple(landmark_point[7]), tuple(landmark_point[8]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[7]), tuple(landmark_point[8]), (255, 255, 255), 2)

        # Middle finger
        cv.line(image, tuple(landmark_point[9]), tuple(landmark_point[10]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[9]), tuple(landmark_point[10]), (255, 255, 255), 2)
        cv.line(image, tuple(landmark_point[10]), tuple(landmark_point[11]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[10]), tuple(landmark_point[11]), (255, 255, 255), 2)
        cv.line(image, tuple(landmark_point[11]), tuple(landmark_point[12]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[11]), tuple(landmark_point[12]), (255, 255, 255), 2)

        # Ring finger
        cv.line(image, tuple(landmark_point[13]), tuple(landmark_point[14]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[13]), tuple(landmark_point[14]), (255, 255, 255), 2)
        cv.line(image, tuple(landmark_point[14]), tuple(landmark_point[15]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[14]), tuple(landmark_point[15]), (255, 255, 255), 2)
        cv.line(image, tuple(landmark_point[15]), tuple(landmark_point[16]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[15]), tuple(landmark_point[16]), (255, 255, 255), 2)

        # Pinky
        cv.line(image, tuple(landmark_point[17]), tuple(landmark_point[18]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[17]), tuple(landmark_point[18]), (255, 255, 255), 2)
        cv.line(image, tuple(landmark_point[18]), tuple(landmark_point[19]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[18]), tuple(landmark_point[19]), (255, 255, 255), 2)
        cv.line(image, tuple(landmark_point[19]), tuple(landmark_point[20]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[19]), tuple(landmark_point[20]), (255, 255, 255), 2)

        # Palm
        cv.line(image, tuple(landmark_point[0]), tuple(landmark_point[1]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[0]), tuple(landmark_point[1]), (255, 255, 255), 2)
        cv.line(image, tuple(landmark_point[1]), tuple(landmark_point[2]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[1]), tuple(landmark_point[2]), (255, 255, 255), 2)
        cv.line(image, tuple(landmark_point[2]), tuple(landmark_point[5]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[2]), tuple(landmark_point[5]), (255, 255, 255), 2)
        cv.line(image, tuple(landmark_point[5]), tuple(landmark_point[9]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[5]), tuple(landmark_point[9]), (255, 255, 255), 2)
        cv.line(image, tuple(landmark_point[9]), tuple(landmark_point[13]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[9]), tuple(landmark_point[13]), (255, 255, 255), 2)
        cv.line(image, tuple(landmark_point[13]), tuple(landmark_point[17]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[13]), tuple(landmark_point[17]), (255, 255, 255), 2)
        cv.line(image, tuple(landmark_point[17]), tuple(landmark_point[0]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[17]), tuple(landmark_point[0]), (255, 255, 255), 2)

    return image

In [5]:
def draw_collection_info(image, letter, collected, target, is_paused):
    """Draw collection progress at top."""
    cv.rectangle(image, (10, 10), (400, 150), (0, 0, 0), -1)
    
    if letter:
        cv.putText(image, f"Letter: {letter}", (20, 40),
                   cv.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 255), 2)
        cv.putText(image, f"Captured: {collected}/{target}", (20, 75),
                   cv.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
        pct = (collected / target * 100) if target > 0 else 0
        color = (0, 255, 0) if pct >= 100 else (255, 255, 255)
        cv.putText(image, f"{pct:.1f}%", (20, 105),
                   cv.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)
        
        # Pause status
        pause_text = "PAUSED - Press SPACE" if is_paused else "Press SPACE to pause"
        pause_color = (0, 165, 255) if is_paused else (200, 200, 200)
        cv.putText(image, pause_text, (20, 135),
                   cv.FONT_HERSHEY_SIMPLEX, 0.6, pause_color, 1)
    else:
        cv.putText(image, "Press letter key", (20, 50),
                   cv.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
        cv.putText(image, "to start collecting", (20, 85),
                   cv.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
        cv.putText(image, "ESC to quit", (20, 120),
                   cv.FONT_HERSHEY_SIMPLEX, 0.7, (200, 200, 200), 1)
    
    return image

In [6]:
def run_collection(output_csv):
    """Main collection loop."""
    
    # Setup camera
    cap = cv.VideoCapture(0)
    cap.set(cv.CAP_PROP_FRAME_WIDTH, 960)
    cap.set(cv.CAP_PROP_FRAME_HEIGHT, 540)
    
    # Setup MediaPipe
    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands(
        static_image_mode=False,
        max_num_hands=1,
        min_detection_confidence=0.7,
        min_tracking_confidence=0.5
    )
    
    # Track collection per letter
    collection_counts = {letter: 0 for letter in STATIC_LETTERS}
    current_letter = None
    is_paused = False
    
    print("Collection started. Press letter keys (A-Z) to collect.")
    print("Press SPACE to pause/resume. Press ESC to quit.")
    
    while True:
        ret, image = cap.read()
        if not ret:
            break
        
        image = cv.flip(image, 1)
        debug_image = copy.deepcopy(image)
        
        # Process hand detection
        image_rgb = cv.cvtColor(image, cv.COLOR_BGR2RGB)
        image_rgb.flags.writeable = False
        results = hands.process(image_rgb)
        image_rgb.flags.writeable = True
        
        # Handle key press
        key = cv.waitKey(10)
        
        if key == 27:  # ESC
            break
        
        # Toggle pause with SPACE
        if key == 32:  # SPACE
            is_paused = not is_paused
            print(f"Collection {'paused' if is_paused else 'resumed'}")
        
        # Check for letter keys - sets current letter
        if 97 <= key <= 122:  # lowercase a-z
            letter = chr(key).upper()
            if letter in STATIC_LETTERS:
                current_letter = letter
                is_paused = False  # Auto-resume when new letter selected
                print(f"Now collecting: {letter}")
        
        # Collect sample if hand detected, letter selected, and not paused
        if results.multi_hand_landmarks and current_letter and not is_paused:
            for hand_landmarks in results.multi_hand_landmarks:
                landmark_list = calc_landmark_list(debug_image, hand_landmarks)
                
                # Draw skeleton
                debug_image = draw_landmarks(debug_image, landmark_list)
                
                processed_landmarks = pre_process_landmark(landmark_list)
                
                # Save to CSV with correct label index
                label_index = ord(current_letter) - ord('A')  # A=0, B=1, etc.
                with open(output_csv, 'a', newline='', encoding='utf-8') as f:
                    writer = csv.writer(f)
                    writer.writerow([label_index] + processed_landmarks)
                
                collection_counts[current_letter] += 1
        
        # Check completion
        all_complete = all(collection_counts[letter] >= TARGET_SAMPLES for letter in STATIC_LETTERS)
        if all_complete:
            print("\nAll letters complete!")
            cap.release()
            cv.destroyAllWindows()
            break
        
        # Draw UI
        debug_image = draw_collection_info(
            debug_image,
            current_letter,
            collection_counts.get(current_letter, 0) if current_letter else 0,
            TARGET_SAMPLES,
            is_paused
        )
        
        # Draw remaining letters list
        debug_image = draw_remaining_letters(debug_image, collection_counts, TARGET_SAMPLES)
        
        cv.imshow('Hand Gesture Collection', debug_image)
    
    cap.release()
    cv.destroyAllWindows()
    
    print("\nCollection Summary:")
    for letter in STATIC_LETTERS:
        count = collection_counts[letter]
        print(f"{letter}: {count}/{TARGET_SAMPLES}")
    
    return collection_counts

In [14]:


# Letters that need data collection
STATIC_LETTERS = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'I', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']
TARGET_SAMPLES = 1246
OUTPUT_CSV = Path('../data/dataset/_NEW_ngt_static_keypoint.csv')

OUTPUT_CSV.parent.mkdir(exist_ok=True, parents=True)



In [15]:
run_collection(output_csv=OUTPUT_CSV)

Collection started. Press letter keys (A-Z) to collect.
Press SPACE to pause/resume. Press ESC to quit.


I0000 00:00:1769007239.187113       1 gl_context.cc:344] GL version: 2.1 (2.1 Metal - 90.5), renderer: Apple M2
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


Collection paused
Now collecting: A
Collection paused
Now collecting: Q

Collection Summary:
A: 354/1246
B: 0/1246
C: 0/1246
D: 0/1246
E: 0/1246
F: 0/1246
G: 0/1246
I: 0/1246
K: 0/1246
L: 0/1246
M: 0/1246
N: 0/1246
O: 0/1246
P: 0/1246
Q: 0/1246
R: 0/1246
S: 0/1246
T: 0/1246
V: 0/1246
W: 0/1246
Y: 0/1246


{'A': 354,
 'B': 0,
 'C': 0,
 'D': 0,
 'E': 0,
 'F': 0,
 'G': 0,
 'I': 0,
 'K': 0,
 'L': 0,
 'M': 0,
 'N': 0,
 'O': 0,
 'P': 0,
 'Q': 0,
 'R': 0,
 'S': 0,
 'T': 0,
 'V': 0,
 'W': 0,
 'Y': 0}

In [27]:
# Load full alphabet
with open('../data/dataset/keypoint_classifier_label.csv', 'r', encoding='utf-8-sig') as f:
    ALL_LETTERS = [row[0] for row in csv.reader(f)]

# Step 1: Find which letters are in Dutch dataset
dutch_indices = set()
with open('../data/dataset/new_samples.csv', 'r', encoding='utf-8-sig') as f:
    for row in csv.reader(f):
        if row:
            dutch_indices.add(int(row[0]))

dutch_letters = [ALL_LETTERS[i] for i in sorted(dutch_indices)]
print(f"Dutch dataset has: {dutch_letters}")
print(f"Dutch indices: {sorted(dutch_indices)}")

# Step 2: Get ASL data for letters NOT in Dutch
asl_data = []
with open('../data/dataset/asl_keypoint.csv', 'r', encoding='utf-8-sig') as f:
    reader = csv.reader(f)
    for row in reader:
        if row:
            label_idx = int(row[0])
            if label_idx not in dutch_indices:  # Keep if NOT in Dutch
                asl_data.append(row)

asl_kept_letters = sorted(set(ALL_LETTERS[int(row[0])] for row in asl_data))
print(f"\nASL keeping: {asl_kept_letters}")
print(f"ASL samples: {len(asl_data)}")

# Step 3: Get all Dutch data
dutch_data = []
with open('../data/dataset/new_samples.csv', 'r', encoding='utf-8-sig') as f:
    dutch_data = list(csv.reader(f))

print(f"Dutch samples: {len(dutch_data)}")

# Step 4: Combine
combined = asl_data + dutch_data

with open('../data/dataset/ngt_keypoint.csv', 'w', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)
    writer.writerows(combined)

print(f"\nTotal combined: {len(combined)} samples")

Dutch dataset has: ['A', 'D', 'E', 'G', 'H', 'J', 'K', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'W']
Dutch indices: [0, 3, 4, 6, 7, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 22]

ASL keeping: ['B', 'C', 'F', 'I', 'L', 'U', 'V', 'X', 'Y', 'Z']
ASL samples: 12467
Dutch samples: 35047

Total combined: 47514 samples


In [28]:
# Check the combined dataset
combined_labels = []
with open('../data/dataset/ngt_keypoint.csv', 'r') as f:
    reader = csv.reader(f)
    for row in reader:
        if row:
            combined_labels.append(int(row[0]))

# Load alphabet
with open('../data/dataset/keypoint_classifier_label.csv', 'r', encoding='utf-8-sig') as f:
    ALL_LETTERS = [row[0] for row in csv.reader(f)]

print("Letter counts in ngt_keypoint.csv:")
print("-" * 40)
for label_idx in sorted(set(combined_labels)):
    letter = ALL_LETTERS[label_idx]
    count = combined_labels.count(label_idx)
    print(f"{label_idx:2d} ({letter}): {count:5d}")

print("-" * 40)
print(f"Total samples: {len(combined_labels)}")
print(f"Unique letters: {len(set(combined_labels))}")

Letter counts in ngt_keypoint.csv:
----------------------------------------
 0 (A):  1898
 1 (B):  1281
 2 (C):   578
 3 (D):  2275
 4 (E):  2340
 5 (F):  1024
 6 (G):  2281
 7 (H):  3209
 8 (I):  1021
 9 (J):  2746
10 (K):  1870
11 (L):  1417
12 (M):  1912
13 (N):  1962
14 (O):  1918
15 (P):  1902
16 (Q):  1891
17 (R):  3018
18 (S):  1965
19 (T):  1895
20 (U):  1182
21 (V):  1227
22 (W):  1965
23 (X):  1094
24 (Y):  2179
25 (Z):  1464
----------------------------------------
Total samples: 47514
Unique letters: 26
