In [2]:
!pip install imageio

Collecting imageio
  Downloading imageio-2.35.1-py3-none-any.whl.metadata (4.9 kB)
Downloading imageio-2.35.1-py3-none-any.whl (315 kB)
Installing collected packages: imageio
Successfully installed imageio-2.35.1


In [None]:
import torch
import torch.nn as nn
import numpy as np
import mediapipe as mp
from collections import deque
from IPython.display import display, clear_output
import matplotlib.pyplot as plt
import imageio.v3 as iio

# -------------------------
# Model definition
# -------------------------
class SignLSTM(nn.Module):
    def __init__(self, num_classes, input_size=126):
        super().__init__()
        self.lstm = nn.LSTM(input_size, 128, batch_first=True)
        self.fc = nn.Linear(128, num_classes)

    def forward(self, x):
        _, (h, _) = self.lstm(x)
        return self.fc(h[-1])


# -------------------------
# Load trained model
# -------------------------
classes = ["cam_on", "toi"]  # same as training
model = SignLSTM(num_classes=2)
model.load_state_dict(torch.load(
    r"D:\WORK\Python\Project\vsl_mediapipe\models\vsl_model_v2.pth",
    map_location="cpu"
))
model.eval()

# -------------------------
# Mediapipe setup
# -------------------------
mp_hands = mp.solutions.hands.Hands(
    static_image_mode=False,
    max_num_hands=2,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)
mp_drawing = mp.solutions.drawing_utils

# -------------------------
# Buffer for sequences
# -------------------------
seq_len = 30
buffer = deque(maxlen=seq_len)
last_pred = None  # keep last prediction

# -------------------------
# Webcam capture with imageio
# -------------------------
for frame in iio.imiter("<video0>"):  # "<video0>" is default webcam
    rgb = np.ascontiguousarray(frame)  # already RGB
    res = mp_hands.process(rgb)

    detected = False
    left_hand = [0] * 63
    right_hand = [0] * 63

    # -------------------------
    # Hand landmark detection
    # -------------------------
    if res.multi_hand_landmarks and res.multi_handedness:
        detected = True
        for hand_landmarks, handedness in zip(res.multi_hand_landmarks, res.multi_handedness):
            pts = []
            for lm in hand_landmarks.landmark:
                pts.extend([lm.x, lm.y, lm.z])
            label = handedness.classification[0].label
            if label == "Left":
                left_hand = pts
            else:
                right_hand = pts

        # Draw landmarks
        h, w, _ = frame.shape
        annotated = frame.copy()
        for hand_landmarks in res.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                annotated, hand_landmarks, mp.solutions.hands.HAND_CONNECTIONS
            )
    else:
        annotated = frame.copy()

    # -------------------------
    # Feature collection
    # -------------------------
    if detected:
        feature = np.array(left_hand + right_hand)
        buffer.append(feature)
    else:
        buffer.clear()
        last_pred = None

    # -------------------------
    # Prediction
    # -------------------------
    if len(buffer) == seq_len:
        x = torch.tensor(np.array(buffer), dtype=torch.float32).unsqueeze(0)  # [1,30,126]
        with torch.no_grad():
            out = model(x)
            pred_id = torch.argmax(out, dim=1).item()
            last_pred = classes[pred_id]

    # -------------------------
    # Overlay text
    # -------------------------
    import cv2  # only for text drawing
    if last_pred is not None:
        cv2.putText(annotated, f"{last_pred}", (50, 100),
                    cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 4)
    elif detected:
        cv2.putText(annotated, "Collecting frames...", (50, 100),
                    cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 165, 0), 4)
    else:
        cv2.putText(annotated, "No hand detected", (50, 100),
                    cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255), 4)

    # -------------------------
    # Show inline in Jupyter
    # -------------------------
    plt.imshow(annotated)
    plt.axis("off")
    clear_output(wait=True)
    display(plt.gcf())
    plt.clf()


  model.load_state_dict(torch.load(


OSError: Could not find a backend to open `<video0>`` with iomode `r`.