In [1]:
import cv2 as cv
import mediapipe as mp
import csv
import copy
import itertools




In [2]:
TARGET_LETTERS = ['M', 'N', 'P', 'Q', 'R']
TARGET_SAMPLES = 1000 

In [3]:
def calc_landmark_list(image, landmarks):
    """Extract landmark coordinates."""
    image_width, image_height = image.shape[1], image.shape[0]
    landmark_point = []
    for landmark in landmarks.landmark:
        landmark_x = min(int(landmark.x * image_width), image_width - 1)
        landmark_y = min(int(landmark.y * image_height), image_height - 1)
        landmark_point.append([landmark_x, landmark_y])
    return landmark_point

def pre_process_landmark(landmark_list):
    """Normalize landmarks to relative coordinates."""
    temp_landmark_list = copy.deepcopy(landmark_list)
    
    # Convert to relative coordinates
    base_x, base_y = temp_landmark_list[0][0], temp_landmark_list[0][1]
    for index, landmark_point in enumerate(temp_landmark_list):
        temp_landmark_list[index][0] = temp_landmark_list[index][0] - base_x
        temp_landmark_list[index][1] = temp_landmark_list[index][1] - base_y
    
    # Flatten
    temp_landmark_list = list(itertools.chain.from_iterable(temp_landmark_list))
    
    # Normalization
    max_value = max(list(map(abs, temp_landmark_list)))
    temp_landmark_list = list(map(lambda n: n / max_value, temp_landmark_list))
    
    return temp_landmark_list

In [4]:
def draw_landmarks(image, landmark_point):
    """Draw hand skeleton."""
    if len(landmark_point) > 0:
        # Thumb
        cv.line(image, tuple(landmark_point[2]), tuple(landmark_point[3]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[2]), tuple(landmark_point[3]), (255, 255, 255), 2)
        cv.line(image, tuple(landmark_point[3]), tuple(landmark_point[4]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[3]), tuple(landmark_point[4]), (255, 255, 255), 2)

        # Index finger
        cv.line(image, tuple(landmark_point[5]), tuple(landmark_point[6]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[5]), tuple(landmark_point[6]), (255, 255, 255), 2)
        cv.line(image, tuple(landmark_point[6]), tuple(landmark_point[7]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[6]), tuple(landmark_point[7]), (255, 255, 255), 2)
        cv.line(image, tuple(landmark_point[7]), tuple(landmark_point[8]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[7]), tuple(landmark_point[8]), (255, 255, 255), 2)

        # Middle finger
        cv.line(image, tuple(landmark_point[9]), tuple(landmark_point[10]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[9]), tuple(landmark_point[10]), (255, 255, 255), 2)
        cv.line(image, tuple(landmark_point[10]), tuple(landmark_point[11]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[10]), tuple(landmark_point[11]), (255, 255, 255), 2)
        cv.line(image, tuple(landmark_point[11]), tuple(landmark_point[12]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[11]), tuple(landmark_point[12]), (255, 255, 255), 2)

        # Ring finger
        cv.line(image, tuple(landmark_point[13]), tuple(landmark_point[14]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[13]), tuple(landmark_point[14]), (255, 255, 255), 2)
        cv.line(image, tuple(landmark_point[14]), tuple(landmark_point[15]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[14]), tuple(landmark_point[15]), (255, 255, 255), 2)
        cv.line(image, tuple(landmark_point[15]), tuple(landmark_point[16]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[15]), tuple(landmark_point[16]), (255, 255, 255), 2)

        # Pinky
        cv.line(image, tuple(landmark_point[17]), tuple(landmark_point[18]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[17]), tuple(landmark_point[18]), (255, 255, 255), 2)
        cv.line(image, tuple(landmark_point[18]), tuple(landmark_point[19]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[18]), tuple(landmark_point[19]), (255, 255, 255), 2)
        cv.line(image, tuple(landmark_point[19]), tuple(landmark_point[20]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[19]), tuple(landmark_point[20]), (255, 255, 255), 2)

        # Palm
        cv.line(image, tuple(landmark_point[0]), tuple(landmark_point[1]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[0]), tuple(landmark_point[1]), (255, 255, 255), 2)
        cv.line(image, tuple(landmark_point[1]), tuple(landmark_point[2]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[1]), tuple(landmark_point[2]), (255, 255, 255), 2)
        cv.line(image, tuple(landmark_point[2]), tuple(landmark_point[5]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[2]), tuple(landmark_point[5]), (255, 255, 255), 2)
        cv.line(image, tuple(landmark_point[5]), tuple(landmark_point[9]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[5]), tuple(landmark_point[9]), (255, 255, 255), 2)
        cv.line(image, tuple(landmark_point[9]), tuple(landmark_point[13]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[9]), tuple(landmark_point[13]), (255, 255, 255), 2)
        cv.line(image, tuple(landmark_point[13]), tuple(landmark_point[17]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[13]), tuple(landmark_point[17]), (255, 255, 255), 2)
        cv.line(image, tuple(landmark_point[17]), tuple(landmark_point[0]), (0, 0, 0), 6)
        cv.line(image, tuple(landmark_point[17]), tuple(landmark_point[0]), (255, 255, 255), 2)

    return image

In [5]:
def draw_remaining_letters(image, collection_counts, target):
    """Draw list of letters that still need collection at bottom of screen."""
    y_start = image.shape[0] - 100
    cv.rectangle(image, (10, y_start), (image.shape[1] - 10, image.shape[0] - 10), (0, 0, 0), -1)
    
    text = "Remaining: "
    for letter in TARGET_LETTERS:
        if collection_counts[letter] < target:
            text += f"{letter}({collection_counts[letter]}) "
    
    cv.putText(image, text, (20, y_start + 30),
               cv.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
    
    return image

In [6]:
def draw_collection_info(image, letter, collected, target, is_paused):
    """Draw collection progress at top."""
    cv.rectangle(image, (10, 10), (400, 150), (0, 0, 0), -1)
    
    if letter:
        cv.putText(image, f"Letter: {letter}", (20, 40),
                   cv.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 255), 2)
        cv.putText(image, f"Captured: {collected}/{target}", (20, 75),
                   cv.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
        pct = (collected / target * 100) if target > 0 else 0
        color = (0, 255, 0) if pct >= 100 else (255, 255, 255)
        cv.putText(image, f"{pct:.1f}%", (20, 105),
                   cv.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)
        
        # Pause status
        pause_text = "PAUSED - Press SPACE" if is_paused else "Press SPACE to pause"
        pause_color = (0, 165, 255) if is_paused else (200, 200, 200)
        cv.putText(image, pause_text, (20, 135),
                   cv.FONT_HERSHEY_SIMPLEX, 0.6, pause_color, 1)
    else:
        cv.putText(image, "Press M, N, P, Q, or R", (20, 50),
                   cv.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
        cv.putText(image, "to start collecting", (20, 85),
                   cv.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
        cv.putText(image, "ESC to quit", (20, 120),
                   cv.FONT_HERSHEY_SIMPLEX, 0.7, (200, 200, 200), 1)
    
    return image

In [7]:
def run_targeted_collection(output_csv='../data/dataset/additional_samples.csv', 
                            target_samples=1000):
    """Collect additional samples for specific letters."""
    
    # Setup camera
    cap = cv.VideoCapture(0)
    cap.set(cv.CAP_PROP_FRAME_WIDTH, 960)
    cap.set(cv.CAP_PROP_FRAME_HEIGHT, 540)
    
    # Setup MediaPipe
    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands(
        static_image_mode=False,
        max_num_hands=1,
        min_detection_confidence=0.7,
        min_tracking_confidence=0.5
    )
    
    # Track collection per letter
    collection_counts = {letter: 0 for letter in TARGET_LETTERS}
    current_letter = None
    is_paused = False
    
    print(f"Targeted collection started for: {', '.join(TARGET_LETTERS)}")
    print(f"Target: {target_samples} samples per letter")
    print("Press letter keys (M, N, P, Q, R) to collect.")
    print("Press SPACE to pause/resume. Press ESC to quit.")
    
    while True:
        ret, image = cap.read()
        if not ret:
            break
        
        image = cv.flip(image, 1)
        debug_image = copy.deepcopy(image)
        
        # Process hand detection
        image_rgb = cv.cvtColor(image, cv.COLOR_BGR2RGB)
        image_rgb.flags.writeable = False
        results = hands.process(image_rgb)
        image_rgb.flags.writeable = True
        
        # Handle key press
        key = cv.waitKey(10)
        
        if key == 27:  # ESC
            break
        
        # Toggle pause with SPACE
        if key == 32:  # SPACE
            is_paused = not is_paused
            print(f"Collection {'paused' if is_paused else 'resumed'}")
        
        # Check for target letter keys
        if 97 <= key <= 122:  # lowercase a-z
            letter = chr(key).upper()
            if letter in TARGET_LETTERS:
                current_letter = letter
                is_paused = False
                print(f"Now collecting: {letter}")
        
        # Collect sample if hand detected, letter selected, and not paused
        if results.multi_hand_landmarks and current_letter and not is_paused:
            for hand_landmarks in results.multi_hand_landmarks:
                landmark_list = calc_landmark_list(debug_image, hand_landmarks)
                
                # Draw skeleton
                debug_image = draw_landmarks(debug_image, landmark_list)
                
                processed_landmarks = pre_process_landmark(landmark_list)
                
                # Save to CSV with correct label index
                label_index = ord(current_letter) - ord('A')
                with open(output_csv, 'a', newline='', encoding='utf-8') as f:
                    writer = csv.writer(f)
                    writer.writerow([label_index] + processed_landmarks)
                
                collection_counts[current_letter] += 1
        
        # Check completion
        all_complete = all(collection_counts[letter] >= target_samples 
                          for letter in TARGET_LETTERS)
        if all_complete:
            print("\nAll target letters complete!")
            break
        
        # Draw UI
        debug_image = draw_collection_info(
            debug_image,
            current_letter,
            collection_counts.get(current_letter, 0) if current_letter else 0,
            target_samples,
            is_paused
        )
        
        # Draw remaining letters list
        debug_image = draw_remaining_letters(debug_image, collection_counts, target_samples)
        
        cv.imshow('Targeted Letter Collection', debug_image)
    
    cap.release()
    cv.destroyAllWindows()
    
    print("\nCollection Summary:")
    for letter in TARGET_LETTERS:
        count = collection_counts[letter]
        print(f"{letter}: {count}/{target_samples}")
    
    return collection_counts

In [8]:
run_targeted_collection(
        output_csv='../static_collection/additional_samples.csv',
        target_samples=TARGET_SAMPLES
    )

Targeted collection started for: M, N, P, Q, R
Target: 1000 samples per letter
Press letter keys (M, N, P, Q, R) to collect.
Press SPACE to pause/resume. Press ESC to quit.
Collection paused
Now collecting: M
Collection paused
Collection resumed
Collection paused
Now collecting: N
Collection paused
Now collecting: P
Collection paused
Now collecting: Q
Collection paused
Now collecting: R
Collection paused
Now collecting: P

All target letters complete!

Collection Summary:
M: 1044/1000
N: 1059/1000
P: 1000/1000
Q: 1016/1000
R: 1013/1000


{'M': 1044, 'N': 1059, 'P': 1000, 'Q': 1016, 'R': 1013}