In [44]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import time
import datetime

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

data_path = "../data-unversioned/p1ch7"
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4823, 0.4468),
                         (0.2470, 0.2435, 0.2616))
])

cifar10_train = datasets.CIFAR10(data_path, train=True, download=True, transform=transform)
cifar10_test  = datasets.CIFAR10(data_path, train=False, download=True, transform=transform)

trainX = torch.stack([img for img, _ in cifar10_train]).to(device)
trainY = torch.tensor([label for _, label in cifar10_train], dtype=torch.long).to(device)

testX = torch.stack([img for img, _ in cifar10_test]).to(device)
testY = torch.tensor([label for _, label in cifar10_test], dtype=torch.long).to(device)

train_dataset = torch.utils.data.TensorDataset(trainX, trainY)
val_dataset   = torch.utils.data.TensorDataset(testX, testY)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader   = torch.utils.data.DataLoader(val_dataset, batch_size=1000, shuffle=False)

Using device: cuda


In [45]:
class NetRes(nn.Module):
    def __init__(self, n_chans1=32):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(n_chans1, n_chans1 // 2, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(n_chans1 // 2, n_chans1 // 2, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(4 * 4 * (n_chans1 // 2), 32)
        self.fc2 = nn.Linear(32, 10)

    def forward(self, x):
        out = F.max_pool2d(torch.relu(self.conv1(x)), 2)
        out = F.max_pool2d(torch.relu(self.conv2(out)), 2)
        out1 = out
        out = F.max_pool2d(torch.relu(self.conv3(out)) + out1, 2)

        out = out.view(out.size(0), -1)
        out = torch.relu(self.fc1(out))
        out = self.fc2(out)
        return out

model = NetRes().to(device)

In [46]:
class NetDropout(nn.Module):
    def __init__(self, n_chans1=32):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.conv1_dropout = nn.Dropout2d(p=0.3)
        self.conv2 = nn.Conv2d(n_chans1, n_chans1 // 2, kernel_size=3, padding=1)
        self.conv2_dropout = nn.Dropout2d(p=0.3)
        self.fc1 = nn.Linear(8 * 8 * (n_chans1 // 2), 32)
        self.fc2 = nn.Linear(32, 10)

    def forward(self, x):
        out = F.max_pool2d(torch.tanh(self.conv1(x)), 2)
        out = self.conv1_dropout(out)
        out = F.max_pool2d(torch.tanh(self.conv2(out)), 2)
        out = self.conv2_dropout(out)
        out = out.view(out.size(0), -1)
        out = torch.tanh(self.fc1(out))
        out = self.fc2(out)
        return out

model = NetDropout().to(device)

In [47]:
class NetBatchNorm(nn.Module):
    def __init__(self, n_chans1=32):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.conv1_batchnorm = nn.BatchNorm2d(num_features=n_chans1)
        self.conv2 = nn.Conv2d(n_chans1, n_chans1 // 2, kernel_size=3, padding=1)
        self.conv2_batchnorm = nn.BatchNorm2d(num_features=n_chans1 // 2)
        self.fc1 = nn.Linear(8 * 8 * (n_chans1 // 2), 32)
        self.fc2 = nn.Linear(32, 10)

    def forward(self, x):
        out = self.conv1_batchnorm(self.conv1(x))
        out = F.max_pool2d(torch.tanh(out), 2)
        out = self.conv2_batchnorm(self.conv2(out))
        out = F.max_pool2d(torch.tanh(out), 2)
        out = out.view(out.size(0), -1)
        out = torch.tanh(self.fc1(out))
        out = self.fc2(out)
        return out

model = NetBatchNorm().to(device)

In [48]:
def train_model(model, train_loader, val_loader, n_epochs=300, lr=1e-3, weight_decay=0.0):
    model = model.to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=lr, weight_decay=weight_decay)
    start = time.time()

    for epoch in range(1, n_epochs + 1):
        model.train()
        running_loss = 0.0

        for batch_X, batch_Y in train_loader:
            batch_X, batch_Y = batch_X.to(device), batch_Y.to(device)
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = loss_fn(outputs, batch_Y)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * batch_X.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        if epoch == 1 or epoch % 10 == 0:
            print(f"Epoch {epoch}, Loss: {epoch_loss:.4f}")

    end = time.time()
    print("\nTraining Time =", end - start, "seconds")
    print("Final Training Loss =", epoch_loss)

    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_X, batch_Y in val_loader:
            batch_X, batch_Y = batch_X.to(device), batch_Y.to(device)
            outputs = model(batch_X)
            preds = outputs.argmax(dim=1)
            correct += (preds == batch_Y).sum().item()
            total += batch_Y.size(0)
    accuracy = correct / total
    print("Evaluation Accuracy =", accuracy)
    return epoch_loss, accuracy

In [49]:
print("\n ResNet with Weight Decay (lambda=0.001)")
train_model(NetRes(), train_loader, val_loader, weight_decay=0.001)

print("\n ResNet-10 with Dropout (p=0.3)")
train_model(NetDropout(), train_loader, val_loader)

print("\n ResNet-10 with Batch Normalization")
train_model(NetBatchNorm(), train_loader, val_loader)


 ResNet with Weight Decay (lambda=0.001)
Epoch 1, Loss: 2.2927
Epoch 10, Loss: 1.7903
Epoch 20, Loss: 1.4912
Epoch 30, Loss: 1.3530
Epoch 40, Loss: 1.2639
Epoch 50, Loss: 1.1964
Epoch 60, Loss: 1.1365
Epoch 70, Loss: 1.0833
Epoch 80, Loss: 1.0370
Epoch 90, Loss: 0.9959
Epoch 100, Loss: 0.9602
Epoch 110, Loss: 0.9296
Epoch 120, Loss: 0.9038
Epoch 130, Loss: 0.8830
Epoch 140, Loss: 0.8642
Epoch 150, Loss: 0.8480
Epoch 160, Loss: 0.8348
Epoch 170, Loss: 0.8208
Epoch 180, Loss: 0.8094
Epoch 190, Loss: 0.7973
Epoch 200, Loss: 0.7868
Epoch 210, Loss: 0.7778
Epoch 220, Loss: 0.7690
Epoch 230, Loss: 0.7599
Epoch 240, Loss: 0.7529
Epoch 250, Loss: 0.7430
Epoch 260, Loss: 0.7362
Epoch 270, Loss: 0.7296
Epoch 280, Loss: 0.7229
Epoch 290, Loss: 0.7157
Epoch 300, Loss: 0.7093

Training Time = 2230.9503643512726 seconds
Final Training Loss = 0.7093406404495239
Evaluation Accuracy = 0.7079

 ResNet-10 with Dropout (p=0.3)
Epoch 1, Loss: 2.2633
Epoch 10, Loss: 1.8800
Epoch 20, Loss: 1.6996
Epoch 30, 

(0.5697642152786255, 0.6912)