In [1]:
# Part 1: Data Collection Script
import cv2
import numpy as np
import mediapipe as mp
import os
import json
from datetime import datetime
from tensorflow import keras
from tensorflow.keras import layers

# Initialize MediaPipe
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5)

# Create directory for saved data
if not os.path.exists('gesture_data'):
    os.makedirs('gesture_data')

# Function to extract hand landmarks
def extract_landmarks(results):
    if results.multi_hand_landmarks:
        hand_landmarks = results.multi_hand_landmarks[0]  # Get the first hand
        landmarks = []
        for landmark in hand_landmarks.landmark:
            landmarks.append([landmark.x, landmark.y, landmark.z])
        return np.array(landmarks).flatten().tolist()  # Flatten to 1D array and convert to list
    return None


In [2]:
# Part 2: Real-time Prediction
from tensorflow.keras.models import load_model

def predict_gestures():
    # Load model and label mapping
    try:
        model = load_model('gesture_model.h5')
        with open('gesture_mapping.json', 'r') as f:
            label_mapping = json.load(f)
    except FileNotFoundError:
        print("Model or mapping file not found. Please train the model first.")
        return
    
    # Initialize MediaPipe
    mp_hands = mp.solutions.hands
    mp_drawing = mp.solutions.drawing_utils
    hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5)
    
    cap = cv2.VideoCapture(0)
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
            
        # Flip the frame horizontally for a more intuitive mirror view
        frame = cv2.flip(frame, 1)
        
        # Convert the BGR image to RGB
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Process the frame with MediaPipe
        results = hands.process(frame_rgb)
        
        # Draw hand landmarks and predict gesture
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
            
            # Extract landmarks for prediction
            landmarks = extract_landmarks(results)
            if landmarks:
                # Make prediction
                prediction = model.predict(np.array([landmarks]), verbose=0)
                gesture_id = np.argmax(prediction[0])
                gesture_name = label_mapping[str(gesture_id)]
                confidence = prediction[0][gesture_id]
                
                # Display prediction
                cv2.putText(frame, f"Gesture: {gesture_name}", (10, 30), 
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
                cv2.putText(frame, f"Confidence: {confidence:.2f}", (10, 70), 
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        
        cv2.imshow('Hand Gesture Recognition', frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()


predict_gestures()

