In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
import torch.nn as nn
import torch.optim as optim

# Hyperparameters
batch_size = 64
learning_rates = [0.001, 0.01]
epochs = 2

# Step 1: Load and preprocess the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load CIFAR-100 dataset
train_dataset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

# Split training dataset into sub-training and validation sets
num_train = len(train_dataset)
num_subtrain = int(0.8 * num_train)
num_val = num_train - num_subtrain
subtrain_dataset, val_dataset = random_split(train_dataset, [num_subtrain, num_val])

# Data loaders for sub-training, validation, and test sets
subtrain_loader = DataLoader(subtrain_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Define a function to create various models
def create_model(activation, num_features=256):
    class CNNModel(nn.Module):
        def __init__(self):
            super(CNNModel, self).__init__()
            self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
            self.act1 = activation()
            self.pool = nn.MaxPool2d(2, 2)
            self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
            self.act2 = activation()
            self.fc1 = nn.Linear(64 * 8 * 8, num_features)
            self.act3 = activation()
            self.fc2 = nn.Linear(num_features, 100)  # CIFAR-100 has 100 classes

        def forward(self, x):
            x = self.pool(self.act1(self.conv1(x)))
            x = self.pool(self.act2(self.conv2(x)))
            x = torch.flatten(x, 1)
            x = self.act3(self.fc1(x))
            x = self.fc2(x)
            return x
    return CNNModel

# Activations and Optimizers to experiment with
activations = [nn.ReLU, nn.LeakyReLU, nn.ELU, nn.Sigmoid]
optimizers_dict = {'Adam': optim.Adam, 'SGD': optim.SGD}
results = []

# Experiment with models
for activation in activations:
    for optimizer_name, Optimizer in optimizers_dict.items():
        for lr in learning_rates:
            model = create_model(activation)()
            print(f"Training model with {activation.__name__}, optimizer={optimizer_name}, lr={lr}")
            criterion = nn.CrossEntropyLoss()
            opt = Optimizer(model.parameters(), lr=lr)
            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
            model.to(device)

            # Training and Validation
            for epoch in range(epochs):
                model.train()
                train_loss = 0
                for images, labels in subtrain_loader:
                    images, labels = images.to(device), labels.to(device)
                    opt.zero_grad()
                    outputs = model(images)
                    loss = criterion(outputs, labels)
                    loss.backward()
                    opt.step()
                    train_loss += loss.item()
                avg_train_loss = train_loss / len(subtrain_loader)

                model.eval()
                total = 0
                correct = 0
                with torch.no_grad():
                    for images, labels in val_loader:
                        images, labels = images.to(device), labels.to(device)
                        outputs = model(images)
                        _, predicted = torch.max(outputs.data, 1)
                        total += labels.size(0)
                        correct += (predicted == labels).sum().item()
                accuracy = 100 * correct / total
                print(f"Epoch {epoch+1}: Train Loss: {avg_train_loss:.4f}, Validation Accuracy: {accuracy:.2f}%")

            results.append((model, optimizer_name, lr, accuracy))

# Sort and select top 3 models
results.sort(key=lambda x: x[3], reverse=True)
top_models = results[:3]

# Re-train top 3 models on full training dataset and evaluate on test set
full_train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_accuracies = []
for model, optimizer_name, lr, _ in top_models:
    print(f"Retraining and testing top model with optimizer={optimizer_name}, lr={lr}")
    opt = optimizers_dict[optimizer_name](model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)

    # Full training
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        for images, labels in full_train_loader:
            images, labels = images.to(device), labels.to(device)
            opt.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            opt.step()
            train_loss += loss.item()
        avg_train_loss = train_loss / len(full_train_loader)
        print(f"Full Training Epoch {epoch+1}: Train Loss: {avg_train_loss:.4f}")

    # Testing
    model.eval()
    total = 0
    correct = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    test_accuracy = 100 * correct / total
    test_accuracies.append(test_accuracy)
    print(f"Test accuracy for model with optimizer={optimizer_name}, lr={lr}: {test_accuracy}%")


Files already downloaded and verified
Files already downloaded and verified
Training model with ReLU, optimizer=Adam, lr=0.001
Epoch 1: Train Loss: 3.5901, Validation Accuracy: 25.24%
Epoch 2: Train Loss: 2.7982, Validation Accuracy: 31.22%
Training model with ReLU, optimizer=Adam, lr=0.01
Epoch 1: Train Loss: 4.6206, Validation Accuracy: 1.04%
Epoch 2: Train Loss: 4.6088, Validation Accuracy: 0.88%
Training model with ReLU, optimizer=SGD, lr=0.001
Epoch 1: Train Loss: 4.6048, Validation Accuracy: 1.14%
Epoch 2: Train Loss: 4.6010, Validation Accuracy: 1.66%
Training model with ReLU, optimizer=SGD, lr=0.01
Epoch 1: Train Loss: 4.5723, Validation Accuracy: 3.53%
Epoch 2: Train Loss: 4.2419, Validation Accuracy: 9.30%
Training model with LeakyReLU, optimizer=Adam, lr=0.001
Epoch 1: Train Loss: 3.5349, Validation Accuracy: 25.59%
Epoch 2: Train Loss: 2.6910, Validation Accuracy: 33.62%
Training model with LeakyReLU, optimizer=Adam, lr=0.01
Epoch 1: Train Loss: 4.6306, Validation Accuracy: