In [1]:
import matplotlib.pyplot as plt
import logging
import os

import torch
import torch.nn as nn
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader, Subset
from torchvision.transforms import ToTensor, Compose, Normalize
from torch.optim import Adam

In [2]:
SAVED_PARAMS_PATH = os.path.join('.', 'saved_params_without_noise')
if not os.path.exists(SAVED_PARAMS_PATH):
    os.mkdir(SAVED_PARAMS_PATH)
    # logging config
log_file_path = os.path.join('./', 'mnist_with_dropout_and_noise__without_noise.log')
logging.basicConfig(filename=log_file_path, encoding='utf-8', level=logging.DEBUG, force=True)

# Task 1 - Main Functions

## Dataset operations

In [3]:
def custom_dataloader(batch_size, num_class=10, noise=False):
    # transformers
    transform = Compose([
        ToTensor(),
        Normalize((0.1307,), (0.3081,))
    ])

    # MNIST dataset train/test
    train_dataset = MNIST('./data', train=True, download=True, transform=transform)
    test_dataset = MNIST('./data', train=False, download=True, transform=transform)

    # sampling 1000 data from each class for simplicity
    train_labels = train_dataset.targets
    counter_dict = torch.zeros(10)
    sampled_indices = []
    for idx, label in enumerate(train_labels):
        if counter_dict[label] < 1000:
            sampled_indices.append(idx)
            counter_dict[label] = counter_dict[label] + 1
        if torch.sum(counter_dict).item() == 10000:
            break

    train_subset = Subset(train_dataset, torch.tensor(sampled_indices))

    # loaders
    train_loader = DataLoader(train_subset, batch_size=len(train_subset), shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=len(test_dataset), shuffle=True)

    train_data, train_label = next(iter(train_loader))
    test_data, test_label = next(iter(test_loader))

    if noise:
        # adding noise to train samples
        num_of_noisy_samples = (train_data.size(0) * 4) // 10
        selected_idx = torch.randperm(train_data.size(0))[:num_of_noisy_samples]

        for selected_id in selected_idx:
            # find noisy class
            curr_class = train_label[selected_id]
            noise = torch.randint(num_class, size=(1,))[0]
            while curr_class != noise:
                noise = torch.randint(num_class, size=(1,))[0]

            train_label[selected_id] = noise
        logging.info('noise is added to the dataset')

    train_data, train_label = torch.split(train_data, batch_size), torch.split(train_label, batch_size)
    test_data, test_label = torch.split(test_data, batch_size), torch.split(test_label, batch_size)

    train_loader = list(zip(train_data, train_label))
    test_loader = list(zip(test_data, test_label))

    logging.info('dataset is handled')

    return train_loader, test_loader

## Create model class

In [4]:
class CustomModel(nn.Module):
    def __init__(self, param_size_param, dropout_param, input_dimension=784, num_class=10):
        super().__init__()
        self._param_size_param = param_size_param
        self._dropout_param = dropout_param
        self._input_dimension = input_dimension
        self._num_class = num_class

        self.__hidden_layer = nn.Linear(self._input_dimension, self._param_size_param)
        self.__out_layer = nn.Linear(self._param_size_param, self._num_class)
        self.__relu = nn.ReLU()
        if dropout_param != 1:
            self.__dropout = nn.Dropout(self._dropout_param)
        else:
            self.__dropout = None

    def forward(self, input_data):
        out = input_data.reshape(input_data.size(0), -1)
        out = self.__hidden_layer(out)
        out = self.__relu(out)
        if self.__dropout:
            out = self.__dropout(out)
        out = self.__out_layer(out)
        return out


## Train and test functions

In [5]:
def main_epoch(model, optimizer, criterion, device, train_loader, test_loader, epoch):
    running_loss, running_training_acc = [], []
    tot_loss = 0
    for idx, (data, label) in enumerate(train_loader):
        model.train()
        data, label = data.to(device), label.to(device)

        optimizer.zero_grad()

        preds = model(data)
        loss = criterion(preds, label)
        loss.backward()
        optimizer.step()

        tot_loss += loss.item()
        running_loss.append(loss.item())

        if idx == 0 or (idx + 1) % 10 == 0:
            model.eval()
            tot_acc = 0
            tot_data = 0
            with torch.no_grad():
                for train_data, train_label in train_loader:
                    train_data, train_label = train_data.to(device), train_label.to(device)
                    train_preds = model(train_data)
                    train_pred_idx = torch.argmax(train_preds, dim=1)
                    tot_acc += torch.count_nonzero((train_pred_idx == train_label).long())
                    tot_data += train_data.shape[0]
                running_training_acc.append(tot_acc.item() / tot_data)
            if idx == 0:
                print('loss: {}, train acc: {}'.format(loss, running_training_acc[-1]))
                logging.info('loss: {}, train acc: {}'.format(loss, running_training_acc[-1]))
            else:
                print('loss: {}, train acc: {}'.format(tot_loss / 10, running_training_acc[-1]))
                logging.info('loss: {}, train acc: {}'.format(tot_loss / 10, running_training_acc[-1]))
                tot_loss = 0

    model.eval()
    tot_acc = 0
    tot_data = 0
    with torch.no_grad():
        for idx, (test_data, test_label) in enumerate(test_loader):
            test_data, test_label = test_data.to(device), test_label.to(device)
            test_preds = model(test_data)
            test_pred_idx = torch.argmax(test_preds, dim=1)
            tot_acc += torch.count_nonzero((test_pred_idx == test_label).long())
            tot_data += test_data.shape[0]
        accuracy = tot_acc.item() / tot_data
    print('epoch: {}, loss: {}, train acc: {}, test acc: {}'.format(epoch, running_loss[-1], running_training_acc[-1], accuracy))
    logging.info('epoch: {}, loss: {}, train acc: {}, test acc: {}'.format(epoch, running_loss[-1], running_training_acc[-1], accuracy))
    return running_loss, running_training_acc, accuracy

def main_param_dropout(batch_size, param_size_param, dropout_param, number_of_epochs=80, lr=0.001, noise=False):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = CustomModel(param_size_param, dropout_param).to(device)
    optimizer = Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    if noise:
        train_loader, test_loader = custom_dataloader(batch_size, noise=noise)
    else:
        train_loader, test_loader = custom_dataloader(batch_size, noise=noise)

    running_loss, running_training_acc, running_accuracy = [], [], []

    print('#####training and testing start with K:{}, P:{}######'.format(param_size_param, dropout_param))
    logging.info('#####training and testing start with K:{}, P:{}######'.format(param_size_param, dropout_param))
    for epoch in range(number_of_epochs):
        curr_running_loss, curr_running_training_acc, curr_accuracy = main_epoch(model, optimizer, criterion, device, train_loader, test_loader, epoch)
        running_loss += curr_running_loss
        running_training_acc += curr_running_training_acc
        running_accuracy.append(curr_accuracy)
    print('#####training and testing end with K:{}, P:{}######'.format(param_size_param, dropout_param))
    logging.info('#####training and testing end with K:{}, P:{}######'.format(param_size_param, dropout_param))
    return running_loss, running_training_acc, running_accuracy

def param_dropout_grid(batch_size, param_size_param_arr, dropout_param_arr, **kwargs):
    for param_size_idx, param_size_param in enumerate(param_size_param_arr):
        for dropout_idx, dropout_param in enumerate(dropout_param_arr):
            running_loss, running_training_acc, running_accuracy = main_param_dropout(batch_size, param_size_param, dropout_param, **kwargs)
            save_param_path = os.path.join(SAVED_PARAMS_PATH, 'exp_k_{}_p_{}.pth'.format(param_size_idx, dropout_idx))
            torch.save({
                'running_loss': running_loss,
                'running_training_acc': running_training_acc, 
                'running_accuracy': running_accuracy
            },save_param_path)

# Task 2 - Parameter Grid

In [6]:
param_dropout_grid(128, [1, 5, 10, 20, 40], [0.1, 0.5, 1])

#####training and testing start with K:1, P:0.1######
loss: 2.368612289428711, train acc: 0.107
loss: 2.4298451423645018, train acc: 0.1154
loss: 2.356693887710571, train acc: 0.1543
loss: 2.311783051490784, train acc: 0.1402
loss: 2.27259247303009, train acc: 0.1878
loss: 2.252685785293579, train acc: 0.2033
loss: 2.2267510175704954, train acc: 0.2121
loss: 2.214937949180603, train acc: 0.2112
epoch: 0, loss: 2.0312681198120117, train acc: 0.2112, test acc: 0.2031
loss: 2.122227191925049, train acc: 0.206
loss: 2.217936372756958, train acc: 0.2106
loss: 2.191518783569336, train acc: 0.2126
loss: 2.189629411697388, train acc: 0.2083
loss: 2.183908224105835, train acc: 0.2097
loss: 2.146162009239197, train acc: 0.2049
loss: 2.1473097085952757, train acc: 0.2143
loss: 2.123350644111633, train acc: 0.2106
epoch: 1, loss: 1.9440760612487793, train acc: 0.2106, test acc: 0.1952
loss: 2.0900561809539795, train acc: 0.2012
loss: 2.1590965509414675, train acc: 0.2144
loss: 2.1320515632629395, 

## No dropout regularization and effect of $k$

## Training accuracy for each $k$ and $p$

## Test accuracy for each $k$ and $p$

# Task 3 - Adding Noise to Labels

In [7]:
SAVED_PARAMS_PATH = os.path.join('.', 'saved_params_with_noise')
if not os.path.exists(SAVED_PARAMS_PATH):
    os.mkdir(SAVED_PARAMS_PATH)
    # logging config
log_file_path = os.path.join('./', 'mnist_with_dropout_and_noise__with_noise.log')
logging.basicConfig(filename=log_file_path, encoding='utf-8', level=logging.DEBUG, force=True)

In [8]:
param_dropout_grid(128, [1, 5, 10, 20, 40], [0.1, 0.5, 1], noise=True)

#####training and testing start with K:1, P:0.1######
loss: 2.503192186355591, train acc: 0.1
loss: 2.4422786235809326, train acc: 0.1
loss: 2.4630873680114744, train acc: 0.1
loss: 2.4626322269439695, train acc: 0.1
loss: 2.4083083152770994, train acc: 0.1
loss: 2.425239086151123, train acc: 0.1
loss: 2.3850432634353638, train acc: 0.1
loss: 2.3874953985214233, train acc: 0.1
epoch: 0, loss: 2.4133033752441406, train acc: 0.1, test acc: 0.0982
loss: 2.3798000812530518, train acc: 0.1
loss: 2.313624119758606, train acc: 0.1
loss: 2.336075520515442, train acc: 0.1
loss: 2.3362366914749146, train acc: 0.1
loss: 2.293461036682129, train acc: 0.1
loss: 2.3319353818893434, train acc: 0.1
loss: 2.3005876779556274, train acc: 0.1
loss: 2.297777462005615, train acc: 0.1
epoch: 1, loss: 2.387000560760498, train acc: 0.1, test acc: 0.0982
loss: 2.3072304725646973, train acc: 0.1
loss: 2.247397708892822, train acc: 0.1
loss: 2.271711802482605, train acc: 0.1
loss: 2.2748739242553713, train acc: 0

## No dropout regularization and effect of $k$

## Training accuracy for each $k$ and $p$

## Test accuracy for each $k$ and $p$

# Task 4 - Comments