In [3]:
import os
import cv2
import numpy as np
import mediapipe as mp
import joblib
from tensorflow import keras

In [4]:
# loading model and scaler
MODEL_PATH = 'asl_mediapipe_mlp.h5'
SCALER_PATH = 'scaler.save'

model = keras.models.load_model(MODEL_PATH)
scaler = joblib.load(SCALER_PATH)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [5]:

# Class mapping
id_to_label = {
    0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E',
    5: 'F', 6: 'G', 7: 'H', 8: 'I', 9: 'J',
    10: 'K', 11: 'L', 12: 'M', 13: 'N', 14: 'O',
    15: 'P', 16: 'Q', 17: 'R', 18: 'S', 19: 'T',
    20: 'U', 21: 'V', 22: 'W', 23: 'X', 24: 'Y',
    25: 'Z', 26: 'del', 27: 'space', 28: 'nothing'
}

# Initialize Mediapipe Hands
mp_hands = mp.solutions.hands.Hands(
    static_image_mode=False,
    max_num_hands=1,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)
mp_drawing = mp.solutions.drawing_utils

In [6]:
#Helper: Extract normalized landmarks
def extract_landmarks_from_frame(frame, hands):
    image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(image_rgb)
    
    if not results.multi_hand_landmarks:
        return None, frame
    
    hand_landmarks = results.multi_hand_landmarks[0]
    mp_drawing.draw_landmarks(frame, hand_landmarks, mp.solutions.hands.HAND_CONNECTIONS)
    
    pts = np.array([[lm.x, lm.y, lm.z] for lm in hand_landmarks.landmark], dtype=np.float32)
    
    wrist = pts[0].copy()
    pts -= wrist
    scale = np.max(np.linalg.norm(pts[None, :, :] - pts[:, None, :], axis=-1))
    if scale < 1e-6:
        scale = 1.0
    pts /= scale
    
    return pts.flatten(), frame

In [7]:

# Webcam
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("nhi chl rha")
    exit()

print('Started')

while True:
    ret, frame = cap.read()
    if not ret or frame is None:
        continue

    frame = cv2.flip(frame, 1)

    feat, frame = extract_landmarks_from_frame(frame, mp_hands)
    if feat is not None:
        try:
            x = scaler.transform(np.array(feat).reshape(1, -1))
            pred_probs = model.predict(x, verbose=0)[0]
            pred_class = int(np.argmax(pred_probs))
            label = id_to_label.get(pred_class, 'Unknown')
            confidence = pred_probs[pred_class]

            cv2.putText(frame,
                        f'Predicted: {label} ({confidence*100:.1f}%)',
                        (10, 50),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        1.2, (0, 255, 0), 3)
        except Exception as e:
            print(f"Prediction failed: {e}")

    else:
        cv2.putText(frame,
                    "Bring Hand in Frame",
                    (10, 50),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    1.2, (0, 0, 255), 3)

    cv2.imshow('ASL Recognition', frame)

    if cv2.waitKey(1) & 0xFF == 27:
        print('BYE BYE')
        break

cap.release()
cv2.destroyAllWindows()


Started




BYE BYE
