In [2]:
import torch 
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
import time

In [3]:
torch.manual_seed(0)

In [4]:
BATCH_SIZE = 8

In [5]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [6]:
trainset = torchvision.datasets.CIFAR10(root='./train-data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

In [7]:
testset = torchvision.datasets.CIFAR10(root='./test-data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

In [10]:
class MediumDeepNetwork(nn.Module):

    def __init__(self, activation_fn, batch_norm=False):
        super(MediumDeepNetwork, self).__init__()
        
        conv_layer1 = []
        conv_layer1.append(nn.Conv2d(3, 32, kernel_size=3, padding=1))
        if(batch_norm):
            conv_layer1.append(nn.BatchNorm2d(32))
        conv_layer1.append(activation_fn)
        conv_layer1.append(nn.Conv2d(32, 64, kernel_size=3, padding=1))
        conv_layer1.append(activation_fn)
        conv_layer1.append(nn.MaxPool2d(kernel_size=2, stride=2))
        
        self.conv_layer1 = nn.Sequential(*conv_layer1)

        conv_layer2 = []
        conv_layer2.append(nn.Conv2d(64, 128, kernel_size=3, padding=1))
        if(batch_norm):
            conv_layer2.append(nn.BatchNorm2d(32))
        conv_layer2.append(activation_fn)
        conv_layer2.append(nn.Conv2d(128, 128, kernel_size=3, padding=1))
        conv_layer2.append(activation_fn)
        conv_layer2.append(nn.MaxPool2d(kernel_size=2, stride=2))
        
        self.conv_layer2 = nn.Sequential(*conv_layer2)
        
        self.fc_layer = nn.Sequential(
            nn.Linear(8192, 1024),
            activation_fn,
            nn.Linear(1024, 512),
            activation_fn,
            nn.Dropout(p=0.1),
            nn.Linear(512, 10)
        )
        
    def forward(self, x):
        x = self.conv_layer1(x)
        x = self.conv_layer2(x)
        x = x.view(x.size(0), -1)
        x = self.fc_layer(x)
        return x

In [11]:
models = {
    "ReLU with BN": MediumDeepNetwork(nn.ReLU(inplace=True), batch_norm=True),
    "Sigmoid with BN": MediumDeepNetwork(nn.Sigmoid(), batch_norm=True),
    "Tanh with BN": MediumDeepNetwork(nn.Tanh(), batch_norm=True),
    "ReLU": MediumDeepNetwork(nn.ReLU(inplace=True), batch_norm=False),
    "Sigmoid": MediumDeepNetwork(nn.Sigmoid(), batch_norm=False),
    "Tanh": MediumDeepNetwork(nn.Tanh(), batch_norm=False),
}

In [19]:
lr = 0.001
mom = 0.9

In [20]:
for model_name in list(models.keys()):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    criterion = nn.CrossEntropyLoss()

    model = models[model_name].to(device)
    
    optimizers = [
        optim.Adam(model.parameters(), lr=lr),
        optim.SGD(model.parameters(), lr=lr, momentum=mom),
        optim.SGD(model.parameters(), lr=lr, momentum=0)
    ]
    
    for opt in optimizers:
        start_time = time.time()

        for epoch in range(10):
            running_loss = 0.0
            for i, (inputs, labels) in enumerate(trainloader, 0):
                inputs, labels = inputs.to(device), labels.to(device)

                opt.zero_grad()

                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                opt.step()

                running_loss += loss.item()
                if i % 2000 == 1999:
                    print('[%d, %5d] loss: %.3f' %(epoch + 1, i + 1, running_loss / 2000))
                    running_loss = 0.0
                    
        time_taken = time.time() - start_time
        
        correct, total = 0, 0

        with torch.no_grad():
            for (images, labels) in testloader:
                images, labels = images.to(device), labels.to(device)

                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print(f"Model {model_name} took {time_taken} (s) time to execute with an accuracy of {(100 * correct / total)}")