# Importing Python libraries

In [None]:
from __future__ import print_function, division
import argparse
import time
import os
import copy
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import sys
import copy

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
import torchvision
from torchvision import datasets, models, transforms
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.data.sampler import SubsetRandomSampler

# Defining the neural network architecture

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 15, kernel_size=5)
        self.conv2 = nn.Conv2d(15, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 500)
        self.fc2 = nn.Linear(500, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        #x = F.relu(F.max_pool2d(self.conv2(x), 2, stride=2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

# Defining noisy training loader

In [None]:
'''
A simple way to define a noisy data loader. We pick a batch of training
set size and introduce corruptions via directed or random noise models 
on randomly chosen samples
'''
def noisy_loader(params):
    train_loader = torch.utils.data.DataLoader(
                        datasets.MNIST('../data', train=True, download=True,
                                       transform=transforms.Compose([transforms.ToTensor(),
                                                                     transforms.Normalize((0.1307,), (0.3081,))])),
                                       batch_size=params['batchsize'], shuffle=True)
    for index, (data, target) in enumerate(train_loader):
        flag = np.random.binomial(1, params['epsilon'], size=(len(target), 1))
        target_noisy = copy.deepcopy(target.numpy())
        if params['clean_data'] is False:
            for index, val in enumerate(flag):
                if val[0] == 1 and params['noise_type'] == 'directed':
                    target_noisy[index] = (target[index] + params['shift']) % 10
                if val[0] == 1 and params['noise_type'] == 'random':
                    out = np.random.randint(0, 10)
                    while out == target_noisy[index]:
                        out = np.random.randint(0, 10)
                    target_noisy[index] = out
            break
        elif params['clean_data'] is True:
            print('Do nothing')
            
    target_noisy = torch.from_numpy(target_noisy)
    train_noisy = torch.utils.data.TensorDataset(data, target_noisy)

    if params['optimizer_type'] == 'sgd':
        train_loader_noisy = torch.utils.data.DataLoader(train_noisy, batch_size=int(params['frac'] * params['k']), 
                                                         shuffle=True, drop_last=True)
    elif params['optimizer_type'] == 'mkl':
        train_loader_noisy = torch.utils.data.DataLoader(train_noisy, batch_size=params['k'], shuffle=True, 
                                                         drop_last=True)
    return train_loader_noisy



# Training and Testing Phase

In [None]:
def train(train_loader_noisy, epoch, run, epsilon, params):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader_noisy):
        data, target = Variable(data.cuda(), requires_grad=True), Variable(target.cuda())
        if params['optimizer_type'] == 'sgd':
            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
        elif params['optimizer_type'] == 'mkl':
            output = model(data)
            temp_loss = F.nll_loss(output, target.cuda(), reduction='none')
            temp = temp_loss.cpu().detach().numpy() 
            # Pick the samples with the lowest loss
            index1 = np.argpartition(temp, int(params['frac'] * params['k']))
            data1 = data[index1[:int(params['frac'] * params['k'])],:,:,:].view(int(params['frac'] * params['k']), 1, 28, 28)
            target1 = target[index1[:int(params['frac'] * params['k'])]]
            data1, target1 = Variable(data1.cuda()), Variable(target1.cuda())
            output1 = model(data1)
            optimizer.zero_grad()
            loss = F.nll_loss(output1, target1)
        loss.backward()
        optimizer.step()

        if batch_idx % 250 == 249:
            print('Train Run: {} Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}, {}'.format(
                run + 1 , epoch, batch_idx * len(data), len(train_loader_noisy.dataset),
                100. * batch_idx / len(train_loader_noisy), loss.item(), params['optimizer_type']))
    return loss.item()

def test(test_loader, run):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(test_loader):
            data, target = Variable(data.cuda()), Variable(target.cuda())
            output = model(data)
            test_loss += F.nll_loss(output, target, size_average=False).item() # sum up batch loss
            pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
            correct += pred.eq(target.data.view_as(pred)).float().cpu().sum()
    test_loss /= len(test_loader.dataset)
    print('\nTest set {} Run: {} : Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            params['optimizer_type'], run + 1, test_loss, correct, len(test_loader.dataset),
            100. * correct / len(test_loader.dataset)))
    return test_loss, 100. * correct / len(test_loader.dataset)

def select_optimizer(optimizer_name, params):
    if optimizer_name == 'sgd':
        optimizer = optim.SGD(model.parameters(), lr=params['lr'])
    elif optimizer_name == 'sgdmomentum':
        optimizer = optim.SGD(model.parameters(), lr=params['lr'], momentum=params['momentum'])
    return optimizer

def one_run(train_loader_noisy, model, optimizer, params, results):
    num_epochs = params['num_epochs']
    count = params['current_run']
    time_start = time.time()
    for epoch in range(1, num_epochs+1):
        if params['decayschedule'] != 0:
            scheduler.step()
        results['train_loss'][epoch - 1, count] = train(train_loader_noisy, epoch, params['current_run'], params['epsilon'], params)
        results['test_loss'][epoch - 1, count], results['test_acc'][epoch - 1, count] = test(test_loader, params['current_run'])
        results['time_spent'][epoch - 1, count] = time.time() - time_start
    return results

# Define parameters

In [None]:
def init_params():
    params = {}
    params['lr'] = 0.05                # Learning rate
    params['momentum'] = 0.0           # Momentum parameter
    params['k'] = 10                   # Number of loss evaluations per batch
    params['batchsize'] = 5000         # Size of dataset, parameter used in noisy_loader
    params['decayschedule'] = 30       # Decay Schedule
    params['noise_type'] = 'directed'  # Noise type
                                       #    To use directed noise model of corruption, set 'directed'
                                       #    To use random noise model of corruption, set 'random'
    params['learningratedecay'] = 0.2  # Learning Rate Decay
    params['eps']= 1e-08               # Corruption parameter
    params['num_epochs'] = 80          # Number of epochs
    params['optimizer_type'] = 'mkl'   # Optimizer Type:
                                       #    To run standard stochastic gradient descent, set 'sgd'
                                       #    To run standard MKL-SGD, set 'mkl'
    params['runs'] = 5                 # Number of runs
    params['epsilon'] = 0.1            # Fraction of corrupted data 
    params['frac'] = 0.6               # Fraction of samples with lowest loss chosen
                                       #    Number of gradient updates = params['frac'] * params['k']
    params['clean_data'] = False       # Flag to only consider clean data
    params['shift'] = 2                # Shift parameter in directed noise model

    results = {}
    results['train_loss'] = np.zeros((params['num_epochs'], params['runs']))
    results['test_loss'] = np.zeros((params['num_epochs'], params['runs']))
    results['test_acc'] = np.zeros((params['num_epochs'], params['runs']))
    results['time_spent'] = np.zeros((params['num_epochs'], params['runs'])) 

    transform_train = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
    transform_test = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])

    trainset = datasets.MNIST('../data', train=True, download=True, transform=transform_train)
    testset = datasets.MNIST('../data', train=False, transform = transform_test)
    test_loader = torch.utils.data.DataLoader(testset,batch_size = 1000, shuffle=False)
    return params, trainset, test_loader, results

# Main function

In [None]:
time_start = time.time()
params, trainset, test_loader, results = init_params()
for run in range(params['runs']):
    train_loader_noisy = noisy_loader(params)
    model = Net()
    model.cuda()
    optimizer = select_optimizer('sgd', params)
    if params['decayschedule'] != 0:
        scheduler = lr_scheduler.StepLR(optimizer, step_size=params['decayschedule'], gamma=params['learningratedecay'])
    model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count()))        
    params['current_run'] = run
    results = one_run(train_loader_noisy, model, optimizer, params, results)


# Saving parameters

In [None]:
if params['optimizer_type'] == 'sgd':
    file_path = "./results/MNIST_SGD_latest_%s_%s/lr_%.4f_momentum_%.4f_eps_%.4f_ds_%d_n_epochs_%d_runs_%d_minibatch_%d"\
                % (params['optimizer_type'], params['noise_type'], params['lr'], 
                   params['momentum'], params['epsilon'], params['decayschedule'], 
                   params['num_epochs'], params['runs'], params['k'])
elif params['optimizer_type'] == 'mkl':
    file_path = "./results/MNIST_SGD_latest_%s_%s/lr_%.4f_momentum_%.4f_eps_%.4f_ds_%d_n_epochs_%d_runs_%d_minibatch_%d_frac_%.2f"\
                % (params['optimizer_type'], params['noise_type'], params['lr'], 
                   params['momentum'], params['epsilon'], params['decayschedule'], 
                   params['num_epochs'], params['runs'], params['k'], params['frac'])

directory = os.path.dirname(file_path)
if not os.path.exists(directory):
    os.makedirs(directory)
np.savez(file_path, train_loss=results['train_loss'], test_loss=results['test_loss'], test_acc=results['test_acc'], time_spent=results['time_spent'])

  
