# Lab 02 : CIFAR multi-layer -- exercise

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from random import randint
import utils
import time

### With or without GPU?

It is recommended to run this code on GPU:<br> 
* Time for 1 epoch on CPU : 5 sec<br> 
* Time for 1 epoch on GPU : 0.6 sec w/ GeForce GTX 1080 Ti<br>

In [None]:
#device= torch.device("cuda")
device= torch.device("cpu")
print(device)

### Download the CIFAR dataset 

In [2]:
from utils import check_cifar_dataset_exists
data_path=check_cifar_dataset_exists()

train_data=torch.load(data_path+'cifar/train_data.pt')
train_label=torch.load(data_path+'cifar/train_label.pt')
test_data=torch.load(data_path+'cifar/test_data.pt')
test_label=torch.load(data_path+'cifar/test_label.pt')

print(train_data.size())

torch.Size([50000, 3, 32, 32])


### Make a Three layer net class. 

In [None]:
class three_layer_net(nn.Module):

    def __init__(self, input_size, hidden_size1, hidden_size2,  output_size):
        super(three_layer_net , self).__init__()
        
        self.layer1 = # COMPLETE HERE
        self.layer2 = # COMPLETE HERE
        self.layer3 = # COMPLETE HERE
        
    def forward(self, x):
        
        y       = # COMPLETE HERE
        y_hat   = # COMPLETE HERE
        z       = # COMPLETE HERE
        z_hat   = # COMPLETE HERE
        scores  = # COMPLETE HERE
        
        return scores

### Build the net with the following size:
* input size = 3,072
* hidden size 1 = 500
* hidden size 2 = 500
* output size = 10
### How many parameters in total? (the one layer net had 30,000 parameters)

In [None]:
net=three_layer_net(3072,500,500,10)
print(net)
utils.display_num_param(net)

### Send the weights of the networks to the GPU

In [None]:
net= # COMPLETE HERE

### Choose the criterion and optimizer. Also choose:
* batch size =200
* learning rate =0.01


In [None]:
criterion = nn.CrossEntropyLoss()
optimizer=torch.optim.SGD( net.parameters() , lr=0.01 )
bs= # COMPLETE HERE

### Function to evaluate the network on the test set

In [None]:
def eval_on_test_set():

    running_error=0
    num_batches=0


    for i in range(0,10000,bs):

        # extract the minibatch
        minibatch_data =  test_data[i:i+bs]
        minibatch_label= test_label[i:i+bs]

        # send them to the gpu
        minibatch_data=minibatch_data.to(device)
        minibatch_label=minibatch_label.to(device)

        # reshape the minibatch
        inputs = minibatch_data.view(bs,3072)

        # feed it to the network
        scores=net( inputs ) 

        # compute the error made on this batch
        error = utils.get_error( scores , minibatch_label)

        # add it to the running error
        running_error += error.item()

        num_batches+=1


    # compute error rate on the full test set
    total_error = running_error/num_batches

    print( 'error rate on test set =', total_error*100 ,'percent')

### Do 160 passes through the training set.

In [None]:
start=time.time()

for epoch in range(160):
    
    running_loss=0
    running_error=0
    num_batches=0
    
    shuffled_indices=torch.randperm(50000)
 
    for count in range(0,50000,bs):
    
        # Set the gradients to zeros
        optimizer.zero_grad()
        
        # create a minibatch       
        indices=          # COMPLETE HERE
        minibatch_data =  # COMPLETE HERE
        minibatch_label=  # COMPLETE HERE
        
        # send the minibatch to the gpu
        minibatch_data=  # COMPLETE HERE
        minibatch_label= # COMPLETE HERE
        
        # reshape the minibatch
        inputs = # COMPLETE HERE

        # tell Pytorch to start tracking all operations that will be done on "inputs"
        inputs.requires_grad_()

        # forward the minibatch through the net 
        scores=# COMPLETE HERE 

        # Compute the average of the losses of the data points in the minibatch
        loss =  # COMPLETE HERE 
        
        # backward pass to compute dL/dU, dL/dV and dL/dW   
        loss.backward()

        # do one step of stochastic gradient descent: U=U-lr(dL/dU), V=V-lr(dL/dU), ...
        optimizer.step()
        
        # START COMPUTING STATS
        
        # add the loss of this batch to the running loss
        running_loss += loss.detach().item()
        
        # compute the error made on this batch and add it to the running error       
        error = utils.get_error( scores.detach() , minibatch_label)
        running_error += error.item()
        
        num_batches+=1
    
    
    # compute stats for the full training set
    total_loss = running_loss/num_batches
    total_error = running_error/num_batches
    elapsed = time.time()-start
    
    if epoch%20 == 0:
        print('epoch=',epoch, '\t time=', elapsed, '\t loss=', total_loss , '\t error=', total_error*100 ,'percent')
        eval_on_test_set() 
        print(' ')
    
    
        
        

### Choose image at random from the test set and see how good/bad are the predictions

In [None]:
# choose a picture at random
idx=randint(0, 10000-1)
im=test_data[idx]

# diplay the picture
utils.show(im)

# feed it to the net and display the confidence scores
scores =  net( im.view(1,3072).to(device)) 
probs= F.softmax(scores, dim=1)
utils.show_prob_cifar(probs.cpu())