In [7]:
%pip install -q mediapipe==0.10.7

Note: you may need to restart the kernel to use updated packages.


In [8]:
from concurrent.futures import ThreadPoolExecutor
from tensorflow.keras.models import load_model
import cv2
import numpy as np
import mediapipe as mp
import matplotlib.pyplot as plt
import os

Landmark Function

In [9]:
filtered_hand = list(range(21))

filtered_pose = [11, 12, 13, 14, 15, 16]

filtered_face = [0, 4, 7, 8, 10, 13, 14, 17, 21, 33, 37, 39, 40, 46, 52, 53, 54, 55, 58,
                 61, 63, 65, 66, 67, 70, 78, 80, 81, 82, 84, 87, 88, 91, 93, 95, 103, 105,
                 107, 109, 127, 132, 133, 136, 144, 145, 146, 148, 149, 150, 152, 153, 154,
                 155, 157, 158, 159, 160, 161, 162, 163, 172, 173, 176, 178, 181, 185, 191,
                 234, 246, 249, 251, 263, 267, 269, 270, 276, 282, 283, 284, 285, 288, 291,
                 293, 295, 296, 297, 300, 308, 310, 311, 312, 314, 317, 318, 321, 323, 324,
                 332, 334, 336, 338, 356, 361, 362, 365, 373, 374, 375, 377, 378, 379, 380,
                 381, 382, 384, 385, 386, 387, 388, 389, 390, 397, 398, 400, 402, 405, 409,
                 415, 454, 466, 468, 473]

HAND_NUM = len(filtered_hand)
POSE_NUM = len(filtered_pose)
FACE_NUM = len(filtered_face)

In [10]:
hands = mp.solutions.hands.Hands()
pose = mp.solutions.pose.Pose()
face_mesh = mp.solutions.face_mesh.FaceMesh(refine_landmarks=True)

def get_frame_landmarks(frame):
    
    all_landmarks = np.zeros((HAND_NUM * 2 + POSE_NUM + FACE_NUM, 3))
    
    def get_hands(frame):
        results_hands = hands.process(frame)
        if results_hands.multi_hand_landmarks:
            for i, hand_landmarks in enumerate(results_hands.multi_hand_landmarks):
                if results_hands.multi_handedness[i].classification[0].index == 0: 
                    all_landmarks[:HAND_NUM, :] = np.array(
                        [(lm.x, lm.y, lm.z) for lm in hand_landmarks.landmark]) # right
                else:
                    all_landmarks[HAND_NUM:HAND_NUM * 2, :] = np.array(
                        [(lm.x, lm.y, lm.z) for lm in hand_landmarks.landmark]) # left

    def get_pose(frame):
        results_pose = pose.process(frame)
        if results_pose.pose_landmarks:
            all_landmarks[HAND_NUM * 2:HAND_NUM * 2 + POSE_NUM, :] = np.array(
                [(lm.x, lm.y, lm.z) for lm in results_pose.pose_landmarks.landmark])[filtered_pose]
        
    def get_face(frame):
        results_face = face_mesh.process(frame)
        if results_face.multi_face_landmarks:
            all_landmarks[HAND_NUM * 2 + POSE_NUM:, :] = np.array(
                [(lm.x, lm.y, lm.z) for lm in results_face.multi_face_landmarks[0].landmark])[filtered_face]
        
    with ThreadPoolExecutor(max_workers=3) as executor:
        executor.submit(get_hands, frame)
        executor.submit(get_pose, frame)
        executor.submit(get_face, frame)

    return all_landmarks

Load Model and Data

In [11]:
# Path to the directory containing the model files
directory = 'Model/'

# Get a list of all the model files in the directory
model_files = os.listdir(directory)

# Initialize variables to keep track of the highest accuracy and the corresponding file
highest_accuracy = 0
best_model_file = None

# Iterate over each model file
for model_file in model_files:
    if model_file.endswith('.h5') and model_file.startswith('WLASL_'):
        # Extract the accuracy from the file name
        accuracy_str = model_file.split('___Accuracy_')[1].split('.h5')[0]
        accuracy = float(accuracy_str)
        
        # Check if this model has a higher accuracy than the current highest accuracy
        if accuracy > highest_accuracy:
            highest_accuracy = accuracy
            best_model_file = model_file

# Check if a best model was found
if best_model_file is not None:
    # Specify the path to the best model file
    best_model_path = os.path.join(directory, best_model_file)
    # Load the best model
    model = load_model(best_model_path)
    print(f"Best model found: {best_model_file} with accuracy {highest_accuracy}")
else:
    print("No model files found or no model with higher accuracy found.")

Best model found: WLASL_2024_03_12__18_55_51___Loss_0.49055755138397217___Accuracy_0.8499220013618469.h5 with accuracy 0.8499220013618469


In [12]:
# Load the word list
with open('wordlist.txt', 'r') as f:
    word_index = {word:index for index, word in enumerate([line.strip() for line in f])}
    reverse_word_index = {index: word for word, index in word_index.items()}

Word Level American Sign Language Real-time Recognition

In [13]:

import cv2
import numpy as np
import matplotlib.pyplot as plt

# Start capturing video from the webcam
cap = cv2.VideoCapture(0)

while True:
    # Capture frame-by-frame
    ret, frame = cap.read()

    # Get the height and width of the frame
    height, width, _ = frame.shape

    # Get landmarks for the frame
    frame_landmarks = get_frame_landmarks(frame)

    # Draw landmarks on the frame
    for landmark in frame_landmarks:
        x = int(landmark[0] * width)
        y = int(landmark[1] * height)
        cv2.circle(frame, (x, y), 1, (0, 255, 0), -1)

    frame_pred = model.predict(np.array([frame_landmarks]), verbose=0)
    frame_word_index = np.argmax(frame_pred)
    probability = round(np.max(frame_pred)*100,2)
    
    # Add text to the frame
    font = cv2.FONT_HERSHEY_SIMPLEX
    cv2.putText(frame, reverse_word_index[frame_word_index] + ': ' + str(probability) + '%', (10, 30), font, 1, (255, 255, 255), 2, cv2.LINE_AA)
    
    # Display the frame
    cv2.imshow('Frame with Landmarks', frame)

    # Check for the 'q' key to quit the loop
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the capture
cap.release()
cv2.destroyAllWindows()

