In [None]:
import torch
from torch import nn
import timm
import cv2
from torchvision import transforms
from PIL import Image


In [None]:
# 1. Load Labels
with open("labels.txt") as f:
    classes = [line.strip() for line in f]

# 2. Load Model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = timm.create_model("mobilenetv3_small_100", pretrained=False)
model.classifier = nn.Linear(model.classifier.in_features, len(classes))
model.load_state_dict(torch.load("mobilenetv3_asl.pth", map_location=device))
model = model.to(device)
model.eval()

# 3. Define Transform
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])
])


In [None]:
# 4. Webcam Inference
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Convert OpenCV BGR frame to PIL Image (RGB)
    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    img = Image.fromarray(img)

    # Preprocess frame
    img_tensor = transform(img).unsqueeze(0).to(device)

    # Prediction
    with torch.no_grad():
        outs = model(img_tensor)
        pred = outs.argmax(1).item()
        label = classes[pred]

    # Display result
    cv2.putText(frame, f"Prediction: {label}", (20, 40),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.imshow("ASL Detection - MobileNetV3", frame)

    # Exit on 'q' key
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()