In [1]:
import torch
from torch import nn, optim
from torchvision import models, datasets, transforms
from torch.utils.data import DataLoader

In [None]:
data_transforms = {
    "train": transforms.Compose(
        [
            transforms.Resize(256),
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ]
    ),
    "val": transforms.Compose(
        [
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ]
    ),
}

data_dir = "dataset"
image_datasets = {
    x: datasets.ImageFolder(root=f"{data_dir}/{x}", transform=data_transforms[x])
    for x in ["train", "val"]
}
dataloaders = {
    x: DataLoader(image_datasets[x], batch_size=32, shuffle=True, num_workers=2)
    for x in ["train", "val"]
}
class_names = image_datasets["train"].classes
num_classes = len(class_names)
print(class_names)

['masked', 'unmasked']


In [3]:
image_datasets

{'train': Dataset ImageFolder
     Number of datapoints: 53
     Root location: dataset/train
     StandardTransform
 Transform: Compose(
                Resize(size=256, interpolation=bilinear, max_size=None, antialias=True)
                RandomResizedCrop(size=(224, 224), scale=(0.08, 1.0), ratio=(0.75, 1.3333), interpolation=bilinear, antialias=True)
                RandomHorizontalFlip(p=0.5)
                ToTensor()
                Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ),
 'val': Dataset ImageFolder
     Number of datapoints: 26
     Root location: dataset/val
     StandardTransform
 Transform: Compose(
                Resize(size=256, interpolation=bilinear, max_size=None, antialias=True)
                CenterCrop(size=(224, 224))
                ToTensor()
                Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            )}

In [None]:
model = models.squeezenet1_1(weights="IMAGENET1K_V1")
model.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1, 1), stride=(1, 1))
model.num_classes = num_classes  # Update num_classes

# Move to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [5]:
# === 3. FREEZE ALL LAYERS EXCEPT FINAL CLASSIFIER ===
for param in model.features.parameters():
    param.requires_grad = False  # Freeze feature extractor

# Verify (Optional)
# for name, param in model.named_parameters():
#     print(f"{name}: {param.requires_grad}")

# === 4. OPTIMIZER: Train Only the Final Layer ===
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)
criterion = nn.CrossEntropyLoss()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in dataloaders["train"]:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)

    epoch_loss = running_loss / len(image_datasets["train"])
    print(f"Epoch {epoch + 1}/{num_epochs} - Loss: {epoch_loss:.4f}")

Epoch 1/10 - Loss: 0.5526
Epoch 2/10 - Loss: 0.5564
Epoch 3/10 - Loss: 0.4634
Epoch 4/10 - Loss: 0.4171
Epoch 5/10 - Loss: 0.3137
Epoch 6/10 - Loss: 0.2974
Epoch 7/10 - Loss: 0.2564
Epoch 8/10 - Loss: 0.2929
Epoch 9/10 - Loss: 0.2311
Epoch 10/10 - Loss: 0.1971


In [None]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in dataloaders["val"]:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
print(f"Validation Accuracy: {100 * correct / total:.2f}%")

Validation Accuracy: 100.00%


In [None]:
import cv2
import torch
import numpy as np
from torchvision import transforms, models
from PIL import Image

model.eval()

# Transformation matching your training pipeline
preprocess = transforms.Compose(
    [
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ]
)

# Start webcam feed
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Error: Webcam access failed")
    exit()

print("Press 'q' to quit.")

while True:
    ret, frame = cap.read()
    if not ret:
        print("Error: Frame capture failed")
        break

    # Preprocess current frame for model
    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    img_pil = Image.fromarray(img)
    input_tensor = preprocess(img_pil)
    input_batch = input_tensor.unsqueeze(0)

    # Inference
    with torch.no_grad():
        outputs = model(input_batch)
        _, pred = torch.max(outputs, 1)
        predicted_class = class_names[pred.item()]

    # Display prediction on the frame
    cv2.putText(
        frame,
        f"Prediction: {predicted_class}",
        (10, 30),
        cv2.FONT_HERSHEY_SIMPLEX,
        1,
        (0, 255, 0),
        2,
        cv2.LINE_AA,
    )

    cv2.imshow("Webcam - Press q to quit", frame)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()

Press 'q' to quit.
