# Part 0: Initial setup

To enable GPU:
1.   Click Edit -> Notebook settings
2.   Under Hardware Accelerator select GPU
3.   On the right side of this page, click connect to a hosted runtime


If you ever see an error about needing third-party cookies enabled, you can disable blocking them or whitelist them.
Here is a simple way to whitelist (in chrome)

For old chrome:
1.   Goto chrome://settings/content/cookies>search=cookie
2.   Uncheck "Block third-party cookies" or
3.   Click Add next to Allow and type https://[*.]googleusercontent.com:443

For new chrome:
1.   Goto settings and search "cookie"
2.   Click the "content settings" button
3.   Follow steps 2 or 3 from above
---



In [0]:
# This shows how to connect your google drive account with a colab instance. It's pretty easy.
# Load the Drive helper and mount
from google.colab import drive

# This will prompt for authorization.
drive.mount('/gdrive')
# Create a directory and mount Google Drive using that directory.


# Drive not connecting after it seemed like it worked before?
1. First try restarting the runtime via Runtime -> Restart Runtime
2. Then try to run the above again.
3. If this still doesn't work, call Reset All Runtimes. This is the nuclear option that will delete all your data not saved on your personal drive account, and will erase everything you installed.


In [0]:
# Now let's test that Google Drive is up and running. 
# You may have to change "My Drive" if you have renamed it something else.
!ls "/gdrive/My Drive"

with open('/gdrive/My Drive/foo.txt', 'w') as f:
  f.write('Hello Google Drive!')
!cat "/gdrive/My Drive/foo.txt"
!rm "/gdrive/My Drive/foo.txt"

In [0]:
# This is code to download and install pytorch
from os import path
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())

accelerator = 'cu80' if path.exists('/opt/bin/nvidia-smi') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.1-{platform}-linux_x86_64.whl torchvision
  
import torch
print('Version', torch.__version__)
print('CUDA enabled:', torch.cuda.is_available())
  
# NOTE: This one takes a while the first time you run it, and you will likely see 
# tcmalloc: large alloc 1073750016 bytes == 0x5c54a000 @ or something similar.
# It should then print out:
# Version 0.4.1
# CUDA enabled: True

In [0]:
# Define some useful save and restoring functions. 
# You can thank your TAs for providing this code, 
# it will probably be useful for you in the future as well.
import torch
import torch.nn as nn
import numpy as np
import os
class pt_util(object):
    @staticmethod
    # This does more than the simple Pytorch restore. It checks that the names 
    # of variables match, and if they don't doesn't throw a fit. It is similar 
    # to how Caffe acts. This is especially useful if you decide to change your
    # network architecture but don't want to retrain from scratch.
    def restore(net, save_file):
        net_state_dict = net.state_dict()
        restore_state_dict = torch.load(save_file)

        restored_var_names = set()

        print('Restoring:')
        for var_name in restore_state_dict.keys():
            if var_name in net_state_dict:
                var_size = net_state_dict[var_name].size()
                restore_size = restore_state_dict[var_name].size()
                if var_size != restore_size:
                    print('Shape mismatch for var', var_name, 'expected', var_size, 'got', restore_size)
                else:
                    if isinstance(net_state_dict[var_name], torch.nn.Parameter):
                        # backwards compatibility for serialized parameters
                        net_state_dict[var_name] = restore_state_dict[var_name].data
                    try:
                        net_state_dict[var_name].copy_(restore_state_dict[var_name])
                        print(str(var_name) + ' -> \t' + str(var_size) + ' = ' + str(int(np.prod(var_size) * 4 / 10**6)) + 'MB')
                        restored_var_names.add(var_name)
                    except:
                        print('While copying the parameter named {}, whose dimensions in the model are'
                              ' {} and whose dimensions in the checkpoint are {}, ...'.format(
                                  var_name, var_size, restore_size))
                        raise

        ignored_var_names = sorted(list(set(restore_state_dict.keys()) - restored_var_names))
        unset_var_names = sorted(list(set(net_state_dict.keys()) - restored_var_names))
        print('')
        if len(ignored_var_names) == 0:
            print('Restored all variables')
        else:
            print('Did not restore:\n\t' + '\n\t'.join(ignored_var_names))
        if len(unset_var_names) == 0:
            print('No new variables')
        else:
            print('Initialized but did not modify:\n\t' + '\n\t'.join(unset_var_names))

        print('Restored %s' % save_file)
        
    @staticmethod
    def restore_latest(net, folder):
        import glob
        import re
        checkpoints = sorted(glob.glob(folder + '/*.pt'), key=os.path.getmtime)
        start_it = 0
        if len(checkpoints) > 0:
            pt_util.restore(net, checkpoints[-1])
            start_it = int(re.findall(r'\d+', checkpoints[-1])[-1])
        return start_it

    @staticmethod
    def save(net, file_name, num_to_keep=1):
        folder = os.path.dirname(file_name)
        if not os.path.exists(folder):
            os.makedirs(folder)
        torch.save(net.state_dict(), file_name)
        import glob
        extension = os.path.splitext(file_name)[1]
        checkpoints = sorted(glob.glob(folder + '/*' + extension), key=os.path.getmtime)
        print('Saved %s\n' % file_name)
        if num_to_keep > 0:
            for ff in checkpoints[:-num_to_keep]:
                os.remove(ff)


# Part 1: Implementing a network for MNIST

In [0]:
# This is where you define your network architecture.
# Note: The TAs know this follows the PyTorch MNIST tutorial available at 
# https://github.com/pytorch/examples/blob/master/mnist/main.py
# Where do you think we got it from? 
# So we are asking you to implement something slightly different. 
# You can use that as a guide, but make sure you understand what it all does.


import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
class MNISTNet(nn.Module):
    def __init__(self):
        super(MNISTNet, self).__init__()
        # The network should be as follows:
        # One fully connected layer with 1024 outputs.
        # One fully connected layer with 512 outputs.
        # Then the final classification layer.
        # All the nonlinearities should be ReLU.
        # These instructions are vague on purpose.
        raise NotImplementedError('Define the layers here')

    def forward(self, x):
        raise NotImplementedError('Define the forward pass')
      
    def save_model(self, file_path, num_to_keep=1):
        pt_util.save(self, file_path, num_to_keep)
        
    def loss(self, prediction, label, reduction='elementwise_mean'):
        # You should also use the cross_entropy loss rather than the NLL loss.
        raise NotImplementedError('Define the loss here')


    def load_model(self, file_path):
        pt_util.restore(self, file_path)

    def load_last_model(self, dir_path):
        return pt_util.restore_latest(self, dir_path)


def train(model, device, train_loader, optimizer, epoch, log_interval):
    model.train()
    for batch_idx, (data, label) in enumerate(train_loader):
        data, label = data.to(device), label.to(device)
        optimizer.zero_grad()
        
        raise NotImplementedError('Define the forward pass and loss here')
        
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, label in test_loader:
            data, label = data.to(device), label.to(device)
            output = model(data)
            test_loss += _____________
            pred = ______________
            correct += ___________

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [0]:
# Play around with these constants, you may find a better setting.
BATCH_SIZE = 256
TEST_BATCH_SIZE = 1000
EPOCHS = 10
LEARNING_RATE = 0.01
MOMENTUM = 0.5
USE_CUDA = True
SEED = 0
LOG_INTERVAL = 100

In [0]:
# Now the actual training code
use_cuda = USE_CUDA and torch.cuda.is_available()

torch.manual_seed(SEED)

device = torch.device("cuda" if use_cuda else "cpu")
print('Using device', device)
import multiprocessing
print('num cpus:', multiprocessing.cpu_count())

kwargs = {'num_workers': multiprocessing.cpu_count(),
          'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=BATCH_SIZE, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=TEST_BATCH_SIZE, **kwargs)


model = MNISTNet().to(device)
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM)
# This will save checkpoints in your Google Drive account.
start_epoch = model.load_last_model('/gdrive/My Drive/colab_files/homework1/mnist/checkpoints')
try:
    for epoch in range(start_epoch, EPOCHS + 1):
        train(model, device, train_loader, optimizer, epoch, LOG_INTERVAL)
        test(model, device, test_loader)
        model.save_model('/gdrive/My Drive/colab_files/homework1/mnist/checkpoints/%03d.pt' % epoch)
except KeyboardInterrupt as ke:
    print('Interrupted')
except:
    import traceback
    traceback.print_exc()
finally:
    model.save_model('/gdrive/My Drive/colab_files/homework1/mnist/checkpoints/%03d.pt' % epoch)
        

# Part 2: Reimplementing the Cross Entropy loss function.

In [0]:
class MNISTNetNewLoss(MNISTNet):
  
    def loss(self, prediction, label, reduction='elementwise_mean'): 
        # Reimplement the Cross Entropy loss function using mathematical primitives.
        # This means you are not allowed to use any function in the "Loss functions" 
        # section of https://pytorch.org/docs/stable/nn.html#id50 nor the 
        # functional versions. You can use them to verify that your output looks correct.
        # You should implement reduction for none (i.e. return a vector, sum, and elementwise_mean).
        # Note: Due to floating point errors, the values won't be exactly equal.
        # Second note: You can assume inputs will be 2D (batch X features).
        loss_val_old = super(MNISTNetNewLoss, self).loss(prediction, label, reduction)        
        
        raise NotImplementedError('Define the loss here')
        
        assert(abs(loss_val_new - loss_val_old).item() < 0.01)

        

In [0]:
# Now the actual training code
use_cuda = USE_CUDA and torch.cuda.is_available()

torch.manual_seed(SEED)

device = torch.device("cuda" if use_cuda else "cpu")
print('Using device', device)
import multiprocessing
print('num cpus:', multiprocessing.cpu_count())

kwargs = {'num_workers': multiprocessing.cpu_count(),
          'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=BATCH_SIZE, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=TEST_BATCH_SIZE, **kwargs)


model = MNISTNetNewLoss().to(device)
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM)
start_epoch = model.load_last_model('/gdrive/My Drive/colab_files/homework1/mnist2/checkpoints')
try:
    for epoch in range(start_epoch, EPOCHS + 1):
        train(model, device, train_loader, optimizer, epoch, LOG_INTERVAL)
        test(model, device, test_loader)
        model.save_model('/gdrive/My Drive/colab_files/homework1/mnist2/checkpoints/%03d.pt' % epoch)
except KeyboardInterrupt as ke:
    print('Interrupted')
except:
    import traceback
    traceback.print_exc()
finally:
    model.save_model('/gdrive/My Drive/colab_files/homework1/mnist2/checkpoints/%03d.pt' % epoch)