In [1]:
import torch
print("CUDA available:", torch.cuda.is_available())
print("GPU:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "None")



CUDA available: True
GPU: NVIDIA GeForce RTX 4060 Laptop GPU


In [5]:
import cv2
import mediapipe as mp

mp_hands = mp.solutions.hands
hands = mp_hands.Hands(max_num_hands=2)
mp_draw = mp.solutions.drawing_utils

cap = cv2.VideoCapture(0)

while True:
    success, frame = cap.read()
    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(image)

    if results.multi_hand_landmarks:
        for handLms in results.multi_hand_landmarks:
            mp_draw.draw_landmarks(frame, handLms, mp_hands.HAND_CONNECTIONS)

    cv2.imshow("Hand Tracking", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


In [8]:
import os
import shutil

# Replace this with your actual dataset location
src_root = r"C:\Users\vedhr\asl_alphabet_train\asl_alphabet_train"
dst_root = r"C:\Users\vedhr\asl_cleaned"

os.makedirs(dst_root, exist_ok=True)

for subdir in os.listdir(src_root):
    label = ''.join([c for c in subdir if not c.isdigit()])  # A1 → A
    src_path = os.path.join(src_root, subdir)
    dst_path = os.path.join(dst_root, label)
    os.makedirs(dst_path, exist_ok=True)

    for img in os.listdir(src_path):
        src_img = os.path.join(src_path, img)
        dst_img = os.path.join(dst_path, f"{subdir}_{img}")  # avoid filename conflicts
        shutil.copyfile(src_img, dst_img)

print("✅ Cleaned dataset created at:", dst_root)


✅ Cleaned dataset created at: C:\Users\vedhr\asl_cleaned


In [9]:
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
import os
import time

# =========================
# Configuration
# =========================
data_dir = r"C:\Users\vedhr\asl_cleaned"  # <- Point to cleaned folder (A/, B/, ..., space/)
epochs = 10
batch_size = 64
learning_rate = 0.001

# =========================
# Device setup
# =========================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# =========================
# Data loading and transforms
# =========================
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

dataset = datasets.ImageFolder(root=data_dir, transform=transform)
class_names = dataset.classes
num_classes = len(class_names)
print("Classes:", class_names)

train_size = int(0.9 * len(dataset))
val_size = len(dataset) - train_size
train_set, val_set = torch.utils.data.random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size)

# =========================
# CNN Model Definition
# =========================
class ASLCNN(nn.Module):
    def __init__(self, num_classes):
        super(ASLCNN, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, 3), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3), nn.ReLU(), nn.MaxPool2d(2)
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 6 * 6, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

model = ASLCNN(num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# =========================
# Training Loop
# =========================
train_loss, val_loss, val_acc = [], [], []
total_start = time.time()

for epoch in range(epochs):
    epoch_start = time.time()

    # Training
    model.train()
    total_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    train_loss.append(total_loss / len(train_loader))

    # Validation
    model.eval()
    total, correct = 0, 0
    total_val_loss = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            total_val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_loss.append(total_val_loss / len(val_loader))
    val_acc.append(correct / total)

    epoch_time = time.time() - epoch_start
    print(f"Epoch [{epoch+1}/{epochs}] - Time: {epoch_time:.2f}s - Train Loss: {train_loss[-1]:.4f} - Val Acc: {val_acc[-1]*100:.2f}%")

total_time = time.time() - total_start
print(f"\n✅ Training complete in {total_time:.2f} seconds ({total_time/60:.2f} minutes)")

# =========================
# Save Model
# =========================
torch.save(model.state_dict(), "asl_cnn_model.pth")
print("✅ Model saved as 'asl_cnn_model.pth'")


Using device: cuda
Classes: ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'del', 'nothing', 'space']
Epoch [1/10] - Time: 392.91s - Train Loss: 1.0506 - Val Acc: 95.15%
Epoch [2/10] - Time: 404.30s - Train Loss: 0.2530 - Val Acc: 98.63%
Epoch [3/10] - Time: 396.43s - Train Loss: 0.1433 - Val Acc: 98.70%
Epoch [4/10] - Time: 422.71s - Train Loss: 0.1032 - Val Acc: 99.66%
Epoch [5/10] - Time: 423.47s - Train Loss: 0.0762 - Val Acc: 99.69%
Epoch [6/10] - Time: 423.37s - Train Loss: 0.0661 - Val Acc: 99.87%
Epoch [7/10] - Time: 424.10s - Train Loss: 0.0571 - Val Acc: 99.84%
Epoch [8/10] - Time: 423.78s - Train Loss: 0.0465 - Val Acc: 99.84%
Epoch [9/10] - Time: 428.05s - Train Loss: 0.0407 - Val Acc: 99.87%
Epoch [10/10] - Time: 423.73s - Train Loss: 0.0383 - Val Acc: 99.78%

✅ Training complete in 4162.86 seconds (69.38 minutes)
✅ Model saved as 'asl_cnn_model.pth'


In [2]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import cv2
import mediapipe as mp
import numpy as np
import os
from PIL import Image

# ========== 1. Load the trained model ==========
class ASLCNN(nn.Module):
    def __init__(self, num_classes):
        super(ASLCNN, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, 3), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3), nn.ReLU(), nn.MaxPool2d(2)
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 6 * 6, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, 29)  # 26 alphabets + del + nothing + space
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

# Load model
model = ASLCNN(num_classes=29)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.load_state_dict(torch.load("asl_cnn_model.pth", map_location=device))
model.to(device)
model.eval()

# ========== 2. Load class labels ==========
data_dir = r"C:\Users\vedhr\asl_cleaned"  # Path to your class folders used in training
class_names = sorted(os.listdir(data_dir))
print("Classes loaded:", class_names)

# ========== 3. Define transform ==========
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

# ========== 4. Initialize MediaPipe and OpenCV ==========
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.7)
mp_draw = mp.solutions.drawing_utils

cap = cv2.VideoCapture(0)
print("📸 Webcam started — press 'q' to quit.")

while True:
    ret, frame = cap.read()
    if not ret:
        print("❌ Failed to grab frame.")
        break

    h, w, _ = frame.shape
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = hands.process(rgb_frame)

    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Get bounding box from landmarks
            x_coords = [lm.x for lm in hand_landmarks.landmark]
            y_coords = [lm.y for lm in hand_landmarks.landmark]
            x_min, x_max = int(min(x_coords) * w), int(max(x_coords) * w)
            y_min, y_max = int(min(y_coords) * h), int(max(y_coords) * h)

            margin = 20
            x1, y1 = max(0, x_min - margin), max(0, y_min - margin)
            x2, y2 = min(w, x_max + margin), min(h, y_max + margin)
            hand_img = frame[y1:y2, x1:x2]

            if hand_img.size != 0:
                try:
                    # Convert to PIL image
                    hand_rgb = cv2.cvtColor(hand_img, cv2.COLOR_BGR2RGB)
                    hand_pil = Image.fromarray(hand_rgb)
                    hand_tensor = transform(hand_pil).unsqueeze(0).to(device)

                    with torch.no_grad():
                        outputs = model(hand_tensor)
                        _, predicted = torch.max(outputs, 1)
                        label = class_names[predicted.item()]
                    
                    # Draw result
                    cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 255, 0), 2)
                    cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX,
                                1, (0, 255, 0), 2)

                except Exception as e:
                    print("⚠️ Error during prediction:", e)

    cv2.imshow("ASL Sign Detection", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


Classes loaded: ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'del', 'nothing', 'space']
📸 Webcam started — press 'q' to quit.


In [3]:
from PIL import Image
import glob

# Pick sample test images from various classes
test_images = glob.glob(r"C:\Users\vedhr\asl_cleaned\F\*.jpg")[:5]

for img_path in test_images:
    image = Image.open(img_path)
    image = transform(image).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(image)
        _, predicted = torch.max(output, 1)
        print(f"Predicted: {class_names[predicted.item()]} -- File: {os.path.basename(img_path)}")


Predicted: F -- File: F_F1.jpg
Predicted: F -- File: F_F10.jpg
Predicted: F -- File: F_F100.jpg
Predicted: F -- File: F_F1000.jpg
Predicted: F -- File: F_F1001.jpg


In [None]:
# ================================
# ✅ FINAL FAST GPU-OPTIMIZED CODE
# ================================

import torch
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import os
import time

# ========= SYSTEM OPTIMIZATION CHECKLIST =========
# [✔] Dataset on SSD (not HDD or OneDrive)
# [✔] Use num_workers in DataLoader (4–8)
# [✔] Use pin_memory=True in DataLoader
# [✔] Use larger batch size (128)
# [✔] Minimal transforms to reduce CPU load
# ================================================

# ========== DEVICE SETUP ==========
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
if device.type == 'cuda':
    print("CUDA Device:", torch.cuda.get_device_name(0))
    print("Memory Allocated:", torch.cuda.memory_allocated() / 1024**2, "MB")
    print("Memory Cached:", torch.cuda.memory_reserved() / 1024**2, "MB")

# ========== DATA PATH ==========
data_dir = r"C:\\Users\\vedhr\\asl_cleaned"

# ========== TRANSFORMS ==========
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor()
])

# ========== LOAD DATA ==========
dataset = datasets.ImageFolder(root=data_dir, transform=transform)
class_names = dataset.classes
num_classes = len(class_names)
print("Classes:", class_names)

# Split into 90% train / 10% val
train_size = int(0.9 * len(dataset))
val_size = len(dataset) - train_size
train_set, val_set = torch.utils.data.random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_set, batch_size=128, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_set, batch_size=128, num_workers=4, pin_memory=True)

# ========== CNN MODEL ==========
class ASLCNN(nn.Module):
    def __init__(self, num_classes):
        super(ASLCNN, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, 3), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3), nn.ReLU(), nn.MaxPool2d(2)
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 6 * 6, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

model = ASLCNN(num_classes).to(device)

# ========== TRAINING SETUP ==========
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
epochs = 30

# ========== TRAINING LOOP ==========
for epoch in range(epochs):
    start = time.time()
    model.train()
    train_loss = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    model.eval()
    correct = 0
    total = 0
    val_loss = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    end = time.time()
    print(f"Epoch [{epoch+1}/{epochs}] - Time: {end-start:.2f}s - Train Loss: {train_loss/len(train_loader):.4f} - Val Acc: {(correct/total)*100:.2f}%")

# ========== SAVE MODEL ==========
torch.save(model.state_dict(), "asl_cnn_model.pth")
print("✅ Model saved as 'asl_cnn_model.pth'")


Using device: cuda
CUDA Device: NVIDIA GeForce RTX 4060 Laptop GPU
Memory Allocated: 1180.01220703125 MB
Memory Cached: 1552.0 MB
Classes: ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'del', 'nothing', 'space']
