In [1]:
from tensorflow import keras

model = keras.models.load_model("my_final_model.h5")

In [4]:
import time
import cv2
import mediapipe as mp
import numpy as np

labels = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O",
          "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"]

last_pred_time = 0
prediction_delay = 2

mp_drawing = mp.solutions.drawing_utils
# mp.solutions.drawing_utils --> gives convenience functions to draw landmarks & connections.

mp_hands = mp.solutions.hands
# mp.solutions.hands --> is the Hands solution (the model + post-processing API).

cap = cv2.VideoCapture(0)
# opens the laptop camera and Returns a capture object you’ll read frames from.

with mp_hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5) as hands:
    # min_detection_confidence --> threshold to accept an initial hand detection.
    # min_tracking_confidence --> threshold to keep tracking a hand across frames.
    while cap.isOpened(): # Run a loop as long as the camera is open
        ret, frame = cap.read() # ret is True if the cam reads, frame is a NumPy array (image) in BGR color (OpenCV default).
        frame = cv2.flip(frame, 1) # flip the camera to be like a mirror.
        if not ret:
            break  # the camera failed to operate or ended so will exit the loop.

        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # openCV returns BGR but we want RGB for mediapipe so we convert.
        results = hands.process(image) # results --> is a list of 21 landmarks per detected hand.
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # returning back to BRG so openCV can display.

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(image, hand_landmarks, )

            current_time = time.time()
            if current_time - last_pred_time > prediction_delay:
                resized = cv2.resize(image, (128, 128))
                normalized = resized.astype("float32") / 255.0
                input_data = np.expand_dims(normalized, axis=0)

                preds = model.predict(input_data, verbose=0)
                pred_class = np.argmax(preds)
                prediction_text = labels[pred_class]

                print("Prediction:", prediction_text)
                last_pred_time = current_time
        # if hand is detected draw the 21 landmarks(hand_landmarks) and connections (.HAND_CONNECTIONS)

        cv2.imshow('Robotech', image) # open the window and display
        if cv2.waitKey(5) & 0xFF == 27:
            break

cap.release()
cv2.destroyAllWindows()

Prediction: M
Prediction: X
Prediction: X
Prediction: X
Prediction: M
Prediction: B
Prediction: M
Prediction: P
Prediction: X
Prediction: X
