In [1]:
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader, Subset
from torchvision.transforms import ToTensor, Compose, Normalize
from torch.optim import Adam

# Task 1 - Main Functions

## Dataset operations

In [29]:
def get_loaders(batch_size):
    # transformers
    transform = Compose([
        ToTensor(),
        Normalize((0.1307,), (0.3081,))
    ])

    # MNIST dataset train/test
    train_dataset = MNIST('./data', train=True, download=True, transform=transform)
    test_dataset = MNIST('./data', train=False, download=True, transform=transform)

    # sampling 1000 data from each class for simplicity
    train_labels = train_dataset.targets
    counter_dict = torch.zeros(10)
    sampled_indices = []
    for idx, label in enumerate(train_labels):
        if counter_dict[label] < 1000:
            sampled_indices.append(idx)
            counter_dict[label] = counter_dict[label] + 1
        if torch.sum(counter_dict).item() == 10000:
            break

    train_subset = Subset(train_dataset, torch.tensor(sampled_indices))

    # loaders
    train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

    return train_loader, test_loader

## Create model class

In [None]:
class CustomModel(nn.Module):
    def __init__(self, param_size_param, dropout_param, input_dimension=784, num_class=10):
        super().__init__()
        self._param_size_param = param_size_param
        self._dropout_param = dropout_param
        self._input_dimension = input_dimension
        self._num_class = num_class

        self.__hidden_layer = nn.Linear(self._input_dimension, self._param_size_param)
        self.__out_layer = nn.Linear(self._param_size_param, self._num_class)
        self.__relu = nn.ReLU()
        if dropout_param != 1:
            self.__dropout = nn.Dropout(self._dropout_param)
        else:
            self.__dropout = None

    def forward(self, input_data):
        out = input_data.reshape(input_data.size(0), -1)
        out = self.__hidden_layer(out)
        out = self.__relu(out)
        if self.__dropout:
            out = self.__dropout(out)
        out = self.__out_layer(out)
        return out


## Train and test functions

In [None]:
def main_train_test(batch_size, param_size_param, dropout_param):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = CustomModel(param_size_param, dropout_param).to(device)
    optimizer = Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    running_loss, running_training_acc = [], []
    train_loader, test_loader = get_loaders(batch_size)
    tot_loss = 0
    for idx, (data, label) in enumerate(train_loader):
        model.train()
        data, label = data.to(device), label.to(device)

        optimizer.zero_grad()

        preds = model(data)
        loss = criterion(preds, label)
        optimizer.step()

        tot_loss += loss.item()
        running_loss.append(loss.item())

        if idx == 0 and (idx + 1) % 10 == 0:
            model.eval()
            tot_acc = 0
            tot_data = 0
            with torch.no_grad():
                for train_data, train_label in train_loader:
                    train_data, train_label = train_data.to(device), train_label.to(device)
                    train_preds = model(train_data)
                    train_pred_idx = torch.argmax(train_preds, dim=1)
                    tot_acc += torch.count_nonzero(train_pred_idx, train_label)
                    tot_data += train_data.shape[0]
                running_training_acc.append(tot_acc.item() / tot_data)
            if idx == 0:
                print('loss: {}, train acc: {}'.format(loss, running_training_acc[-1]))
            else:
                print('loss: {}, train acc: {}'.format(tot_loss / 10, running_training_acc[-1]))
                tot_loss = 0

    model.eval()
    tot_acc = 0
    tot_data = 0
    with torch.no_grad():
        for idx, (test_data, test_label) in test_loader:
            test_data, test_label = test_data.to(device), test_label.to(device)
            test_preds = model(test_data)
            test_pred_idx = torch.argmax(test_preds, dim=1)
            tot_acc += torch.count_nonzero(test_pred_idx, test_label)
            tot_data += test_data.shape[0]
        accuracy = tot_acc.item() / tot_data
    print('epoch: {}, loss: {}, train acc: {}, test acc: {}'.format(None, running_loss[-1], running_training_acc[-1], accuracy))

    return running_loss, running_training_acc, accuracy

# Task 2 - Parameter Grid

## No dropout regularization and effect of $k$

## Training accuracy for each $k$ and $p$

## Test accuracy for each $k$ and $p$

# Task 3 - Adding Noise to Labels

## No dropout regularization and effect of $k$

## Training accuracy for each $k$ and $p$

## Test accuracy for each $k$ and $p$

# Task 4 - Comments