In [1]:
import torch
import torch.nn as nn
import torchvision
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import numpy as np
import time

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

train_data = torchvision.datasets.CIFAR10('data', train=True, download=True, transform=transform)
test_data = torchvision.datasets.CIFAR10('data', train=False, download=True, transform=transform)

batch_size = 64
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, pin_memory=True, num_workers=2, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, pin_memory=True, num_workers=2, shuffle=False)

def conv_blk(inputChannels, outputChannels, pooling):
    layers = [nn.Conv2d(inputChannels, outputChannels, kernel_size=3, padding=1),
              nn.BatchNorm2d(outputChannels),
              nn.ReLU()]

    if pooling:
        layers.append(nn.MaxPool2d(2))

    return nn.Sequential(*layers)

class Network(nn.Module):
    def __init__(self, inputChannels, outputChannels):
        super(Network, self).__init__()
        self._block = nn.Sequential(
            conv_blk(inputChannels, 64, False),
            conv_blk(64, 64, False),
            conv_blk(64, 128, True),
            conv_blk(128, 128, False),
            conv_blk(128, 256, True),
            conv_blk(256, 256, False),
            conv_blk(256, 512, True),
            conv_blk(512, 512, False)
        )

        self._adaptive_pool = nn.AdaptiveAvgPool2d((1,1))
        self._res = nn.Linear(512, outputChannels)

    def forward(self, X):
        out = self._block(X)
        out = self._adaptive_pool(out)
        out = torch.flatten(out, 1)
        out = self._res(out)
        return out

class WideNetwork(nn.Module):
    def __init__(self, inChannels, outChannels):
        super().__init__()
        self._block = nn.Sequential(
            nn.Conv2d(inChannels, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )

        self._adaptive_pool = nn.AdaptiveAvgPool2d((1,1))
        self._res = nn.Linear(256, outChannels)

    def forward(self, X):
        out = self._block(X)
        out = self._adaptive_pool(out)
        out = torch.flatten(out, 1)
        out = self._res(out)
        return out

class SmallNetwork(nn.Module):
    def __init__(self, inChannels, outChannels):
        super().__init__()
        self._block = nn.Sequential(
            nn.Conv2d(inChannels, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
        )

        self._adaptive_pool = nn.AdaptiveAvgPool2d((1,1))
        self._res = nn.Linear(256, outChannels)

    def forward(self, X):
        out = self._block(X)
        out = self._adaptive_pool(out)
        out = torch.flatten(out, 1)
        out = self._res(out)
        return out

def train_once(model, train_loader, criterion, optimizer, device):
    avg_accuracy = []
    avg_loss = []

    model.train()
    for data in train_loader:
        image = data[0].to(device)
        label = data[1].to(device)

        optimizer.zero_grad()

        output = model(image)

        loss = criterion(output, label)
        avg_loss.append(loss.item())

        loss.backward()
        optimizer.step()

        pred = output.argmax(dim=1)
        accuracy = (pred == label).float().mean()
        avg_accuracy.append(accuracy.item())

    return np.mean(avg_loss), np.mean(avg_accuracy)

def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    models = [
        WideNetwork(3, 10).to(device),
        SmallNetwork(3, 10).to(device),
        Network(3, 10).to(device)
    ]

    model_names = ["WideNetwork", "SmallNetwork", "Network"]

    learning_rate = 0.001

    for i, model in enumerate(models):
        print(f"\n=== Training {model_names[i]} ===")

        criterion = nn.CrossEntropyLoss().to(device)
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        loss_list = []
        acc_list = []

        epochs = 20
        start_time = time.time()

        for epoch in range(1, epochs + 1):
            epoch_loss, train_acc = train_once(model, train_loader, criterion, optimizer, device)

            print(f"Epoch {epoch:02d}/{epochs} | loss={epoch_loss:.4f} | train_acc={train_acc:.4f}")

            loss_list.append(epoch_loss)
            acc_list.append(train_acc)

        model.eval()
        test_loss = 0
        correct = 0
        total = 0

        with torch.no_grad():
            for data in test_loader:
                images = data[0].to(device)
                labels = data[1].to(device)

                outputs = model(images)
                loss = criterion(outputs, labels)

                test_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        accuracy = 100 * correct / total
        avg_test_loss = test_loss / len(test_loader)

        training_time = time.time() - start_time

        print(f"Final Results for {model_names[i]}:")
        print(f"Test Loss: {avg_test_loss:.4f}, Test Accuracy: {accuracy:.2f}%")
        print(f"Training Time: {training_time:.2f} seconds")
        print()

if __name__ == "__main__":
    main()

100%|██████████| 170M/170M [00:04<00:00, 42.0MB/s]


Using device: cuda

=== Training WideNetwork ===
Epoch 01/20 | loss=1.5787 | train_acc=0.4178
Epoch 02/20 | loss=1.2034 | train_acc=0.5684
Epoch 03/20 | loss=1.0269 | train_acc=0.6372
Epoch 04/20 | loss=0.9231 | train_acc=0.6755
Epoch 05/20 | loss=0.8458 | train_acc=0.7018
Epoch 06/20 | loss=0.7736 | train_acc=0.7304
Epoch 07/20 | loss=0.7159 | train_acc=0.7497
Epoch 08/20 | loss=0.6609 | train_acc=0.7695
Epoch 09/20 | loss=0.6134 | train_acc=0.7860
Epoch 10/20 | loss=0.5672 | train_acc=0.8029
Epoch 11/20 | loss=0.5301 | train_acc=0.8156
Epoch 12/20 | loss=0.4864 | train_acc=0.8297
Epoch 13/20 | loss=0.4499 | train_acc=0.8430
Epoch 14/20 | loss=0.4135 | train_acc=0.8566
Epoch 15/20 | loss=0.3851 | train_acc=0.8661
Epoch 16/20 | loss=0.3504 | train_acc=0.8785
Epoch 17/20 | loss=0.3210 | train_acc=0.8888
Epoch 18/20 | loss=0.2902 | train_acc=0.9005
Epoch 19/20 | loss=0.2662 | train_acc=0.9083
Epoch 20/20 | loss=0.2395 | train_acc=0.9172
Final Results for WideNetwork:
Test Loss: 0.8059, T