In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
import torchvision
import torchvision.transforms as transforms

In [12]:
# Check for MPS availability (for Apple Silicon)
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print("Using device:", device)

# Define the image transformations: resize, convert to tensor, and normalize.
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.Grayscale(num_output_channels=3),
    transforms.ToTensor(),
    # Normalization: using 0.5 for each channel; adjust as needed.
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Download and load the Caltech101 dataset.
dataset = torchvision.datasets.Caltech101(root='./data', download=True, transform=transform)

Using device: mps
Files already downloaded and verified


In [13]:
# Determine number of classes from the dataset.
num_classes = len(dataset.categories)
print("Number of classes:", num_classes)

# Split the dataset into training (80%) and testing (20%) sets
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=7)
test_loader  = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=7)

Number of classes: 101


In [17]:
# Define an advanced CNN architecture.
class AdvancedCNN(nn.Module):
    def __init__(self, num_classes):
        super(AdvancedCNN, self).__init__()
        self.features = nn.Sequential(
            # Block 1: Increase channels and apply BatchNorm.
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            nn.Dropout(0.25),

            # Block 2.
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            nn.Dropout(0.25),

            # Block 3.
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            nn.Dropout(0.25)
        )
        # With a 128x128 input and three pooling layers, the feature map becomes 16x16.
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(0.5),
            nn.Linear(128 * 16 * 16, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )
    
    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

# Instantiate the advanced model.
model = AdvancedCNN(num_classes).to(device)

In [18]:
# Set up the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        # Move data to the selected device
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    avg_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

Epoch [1/10], Loss: 4.6381
Epoch [2/10], Loss: 3.7598
Epoch [3/10], Loss: 3.6218
Epoch [4/10], Loss: 3.5090
Epoch [5/10], Loss: 3.4217
Epoch [6/10], Loss: 3.3646
Epoch [7/10], Loss: 3.3139
Epoch [8/10], Loss: 3.2832
Epoch [9/10], Loss: 3.2344
Epoch [10/10], Loss: 3.1778


In [19]:
# Evaluation on the test set.
model.eval()
correct = 0
total   = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")

Test Accuracy: 35.94%


# ResNet

In [23]:
# Define image transformations for ResNet18.
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # ResNet models expect 224x224 images
    transforms.Grayscale(num_output_channels=3),
    transforms.ToTensor(),
    # Use the normalization values from ImageNet.
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                         std=[0.229, 0.224, 0.225])
])

# Load the Caltech101 dataset from a local directory.
dataset = torchvision.datasets.Caltech101(root='./data', download=False, transform=transform)

# Use dataset.categories to determine the number of classes.
num_classes = len(dataset.categories)
print("Number of classes:", num_classes)

# Split the dataset into training (80%) and testing (20%) sets.
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

Number of classes: 101


In [27]:
# Load a pre-trained ResNet18 model.
model = torchvision.models.resnet18(pretrained=True)

# Replace the final fully-connected layer to adapt for Caltech101.
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

In [28]:
# Set up the loss function and optimizer.
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.0001)

# Training loop.
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    avg_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

Epoch [1/10], Loss: 1.6871
Epoch [2/10], Loss: 0.3354
Epoch [3/10], Loss: 0.0921
Epoch [4/10], Loss: 0.0339
Epoch [5/10], Loss: 0.0190
Epoch [6/10], Loss: 0.0124
Epoch [7/10], Loss: 0.0092
Epoch [8/10], Loss: 0.0076
Epoch [9/10], Loss: 0.0055
Epoch [10/10], Loss: 0.0051


In [29]:
# Evaluation on the test set.
model.eval()
correct = 0
total   = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, dim=1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")

Test Accuracy: 95.45%
