In [4]:
import torch
import torch.nn as nn
import cv2
import mediapipe as mp

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
class HandGestureMLP(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(42, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, num_classes)
        )

    def forward(self, x):
        return self.net(x)


In [6]:
class_names = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
num_classes = len(class_names)
model = HandGestureMLP(num_classes).to(device)
model.eval()

HandGestureMLP(
  (net): Sequential(
    (0): Linear(in_features=42, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.3, inplace=False)
    (6): Linear(in_features=64, out_features=10, bias=True)
  )
)

In [7]:
model.load_state_dict(torch.load("landmark_model.pth"))

FileNotFoundError: [Errno 2] No such file or directory: 'landmark_model.pth'

In [None]:
import cv2
import torch
import mediapipe as mp

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=1,
    min_detection_confidence=0.6,
    min_tracking_confidence=0.6
)


In [None]:
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # ðŸ”¥ mirror webcam (important)
    frame = cv2.flip(frame, 1)

    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(rgb)

    if results.multi_hand_landmarks:
        hand_landmarks = results.multi_hand_landmarks[0]

        # ðŸ”¹ draw skeleton (visual only)
        mp_drawing.draw_landmarks(
            frame,
            hand_landmarks,
            mp_hands.HAND_CONNECTIONS
        )

        # ðŸ”¹ extract 42 landmark features
        features = []
        for lm in hand_landmarks.landmark:
            features.extend([lm.x, lm.y])

        features = torch.tensor(
            features, dtype=torch.float32
        ).unsqueeze(0).to(device)

        # ðŸ”¹ inference
        with torch.no_grad():
            output = model(features)
            probs = torch.softmax(output, dim=1)
            conf, pred = torch.max(probs, dim=1)

        label = class_names[pred.item()]
        confidence = conf.item()

        # ðŸ”¹ display result
        cv2.putText(
            frame,
            f"{label} ({confidence:.2f})",
            (20, 40),
            cv2.FONT_HERSHEY_SIMPLEX,
            1.0,
            (0, 255, 0),
            2
        )

    cv2.imshow("Landmark Gesture Recognition", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
