In [1]:
import torch
import numpy as np
import os.path
import utils
import time

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

### Download the CIFAR dataset 
* 50000 32 * 32 RGB training image 
* 10000 32 * 32 RGB test image

In [2]:
from utils import check_cifar_dataset_exists
data_path=check_cifar_dataset_exists()

train_data=torch.load(data_path+'cifar/train_data.pt')
train_label=torch.load(data_path+'cifar/train_label.pt')
test_data=torch.load(data_path+'cifar/test_data.pt')
test_label=torch.load(data_path+'cifar/test_label.pt')

print(train_data.size())
print(train_label.size())
print(test_data.size())

torch.Size([50000, 3, 32, 32])
torch.Size([50000])
torch.Size([10000, 3, 32, 32])


### Compute average pixel intensity over all training set and all channels

In [3]:
mean= train_data.mean()

print(mean)

tensor(0.4733)


### Compute standard deviation

In [4]:
std= train_data.std()

print(std)

tensor(0.2516)


### Make a Resnet convnet Class

In [None]:
# The BasicBlock is the repeated block in Resnet.
class BasicBlock(nn.Module):
    
    def __init__(self, in_channels):
        super().__init__()
        
        # block 1 :  channel x 32 x 32 -> channel x 32 x 32 -> channel x 32 x 32 (2 layers)
        self.residual_function = nn.Sequential(
            nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(in_channels),
            nn.ReLU(inplace = True),
            nn.Conv2d(in_channels,in_channels, kernel_size=3, stride = 1, padding=1, bias=False),
            nn.BatchNorm2d(in_channels)
        )

        # identity shorcut
        self.shortcut = nn.Sequential()
        
    def forward(self, x):
        return nn.ReLU(inplace=True)(self.residual_function(x) + self.shortcut(x))  

In [None]:
class ResNet(nn.Module):

    def __init__(self, num_classes=10):
        super().__init__()
        
        # income conv 
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1, stride = 1, bias = False),
            nn.BatchNorm2d(16),
            nn.ReLU(inplace=True))
        
        self.conv2 = BasicBlock(16)
        self.conv2_1 = BasicBlock(16)
        self.conv2_2 = BasicBlock(16)
        
        self.conv3 = nn.Conv2d(16, 32, kernel_size = 3, stride = 2, padding = 1) # subsampling using a stride of 2.
        self.conv4 = BasicBlock(32)
        self.conv4_1 = BasicBlock(32)
        self.conv4_2 = BasicBlock(32)
        
        self.conv5 = nn.Conv2d(32, 64, kernel_size = 3, stride = 2, padding = 1) # subsampling
        self.conv6 = BasicBlock(64)
        self.conv6_1 = BasicBlock(64)
        self.conv6_2 = BasicBlock(64)
        
#         self.avg_pool = nn.AdaptiveAvgPool2d((8, 8))

        # linear layers:   64 x 8 x 8 --> 4096 --> 10
        self.fc = nn.Linear(4096, 10)
        

    def forward(self, x):
        output = self.conv1(x)
        
        output = self.conv2(output)
        output = self.conv2_1(output)
        output = self.conv2_2(output)
        
        output = self.conv3(output)
        
        output = self.conv4(output)
        output = self.conv4_1(output)
        output = self.conv4_2(output)
        
        output = self.conv5(output)
        
        output = self.conv6(output)
        output = self.conv6_1(output)
        output = self.conv6_2(output)
        output = output.view(output.size(0), -1)
        # bs x 4096 ->  bs * 10
#         print(output.size())

        x = self.fc(output)
        x = F.log_softmax(x, dim =1)

        return x 

In [None]:
model = ResNet()
# print(model)

In [None]:
utils.display_num_param(model)

There are 356218 (0.36 million) parameters in this neural network


In [None]:
bs=5
x=torch.rand(bs,3,32,32)
y = model(x)
print(y.size())

torch.Size([5, 10])


### Put the network to GPU

In [None]:
gpu_id = 0
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)

device = torch.device('cuda')
print(device)

cuda


### Send the weights of the networks to the GPU (as well as the mean and std)

In [None]:
model = model.to(device)

mean=mean.to(device)

std=std.to(device)

### Choose the criterion, learning rate, and batch size.

In [None]:
criterion = nn.NLLLoss()

my_lr=0.1 

bs= 128

### Function to evaluate the network on the test set

In [None]:
def eval_on_test_set():

    running_error=0
    num_batches=0

    for i in range(0,10000,bs):

        minibatch_data =  test_data[i:i+bs]
        minibatch_label = test_label[i:i+bs]

        minibatch_data=minibatch_data.to(device)
        minibatch_label=minibatch_label.to(device)
        
        inputs = (minibatch_data - mean)/std

        scores=model( inputs ) 

        error = utils.get_error( scores , minibatch_label)

        running_error += error.item()

        num_batches+=1

    total_error = running_error/num_batches
    print( 'error rate on test set =', total_error*100 ,'percent')

### Do 64k passes through the training set. Divide the learning rate by 10 at epoch 32k and 48k

In [None]:
start=time.time()

for epoch in range(1,64000):
    
    # divide the learning rate by 10 at epoch 32k and 48k
    if epoch==32000 or epoch == 48000:
        my_lr = my_lr / 10
    
    # create a new optimizer at the beginning of each epoch: give the current learning rate.   
    optimizer=torch.optim.SGD( model.parameters() , lr=my_lr )
        
    # set the running quatities to zero at the beginning of the epoch
    running_loss=0
    running_error=0
    num_batches=0
    
    # set the order in which to visit the image from the training set
    shuffled_indices = torch.randperm(50000)
 
    for count in range(0,50000,bs):
    
        # Set the gradients to zeros
        optimizer.zero_grad()
        
        # create a minibatch       
        indices = shuffled_indices[count:count+bs]
        minibatch_data = train_data[indices]
        minibatch_label = train_label[indices]
        
        # send them to the gpu
        minibatch_data=minibatch_data.to(device)
        minibatch_label=minibatch_label.to(device)
        
        # normalize the minibatch (this is the only difference compared to before!)
        inputs = (minibatch_data - mean)/std
        
        # tell Pytorch to start tracking all operations that will be done on "inputs"
        inputs.requires_grad_()

        # forward the minibatch through the net 
        scores=model( inputs ) 

        # Compute the average of the losses of the data points in the minibatch
        loss =  criterion( scores , minibatch_label) 
        
        # backward pass to compute dL/dU, dL/dV and dL/dW   
        loss.backward()

        # do one step of stochastic gradient descent: U=U-lr(dL/dU), V=V-lr(dL/dU), ...
        optimizer.step()
        

        # START COMPUTING STATS
        
        # add the loss of this batch to the running loss
        running_loss += loss.detach().item()
        
        # compute the error made on this batch and add it to the running error       
        error = utils.get_error( scores.detach() , minibatch_label)
        running_error += error.item()
        
        num_batches+=1        
    
    
    # compute stats for the full training set
    total_loss = running_loss/num_batches
    total_error = running_error/num_batches
    elapsed = (time.time()-start)/60
    

    print('epoch=',epoch, '\t time=', elapsed,'min','\t lr=', my_lr  ,'\t loss=', total_loss , '\t error=', total_error*100 ,'percent')
    eval_on_test_set() 
    print(' ')
    
           
    

epoch= 1 	 time= 0.11096932093302408 min 	 lr= 0.1 	 loss= 2.2472341453937617 	 error= 74.80458758981027 percent
error rate on test set = 61.28362341772152 percent
 
epoch= 2 	 time= 0.22154191335042317 min 	 lr= 0.1 	 loss= 1.575421626305641 	 error= 57.12715792838875 percent
error rate on test set = 57.23892405063291 percent
 
epoch= 3 	 time= 0.3335428357124329 min 	 lr= 0.1 	 loss= 1.4071412043803184 	 error= 50.85997442760126 percent
error rate on test set = 49.01107594936709 percent
 
epoch= 4 	 time= 0.4475507974624634 min 	 lr= 0.1 	 loss= 1.2699607342405392 	 error= 45.86437019850592 percent
error rate on test set = 45.52017405063291 percent
 
epoch= 5 	 time= 0.5601283073425293 min 	 lr= 0.1 	 loss= 1.149068681023005 	 error= 41.10813618011182 percent
error rate on test set = 41.732594936708864 percent
 
epoch= 6 	 time= 0.6763214747111003 min 	 lr= 0.1 	 loss= 1.0454528398830871 	 error= 37.106777487508474 percent
error rate on test set = 38.686708860759495 percent
 
epoch= 

In [None]:
import matplotlib.pyplot as plt
def show(X):
    if X.dim() == 3 and X.size(0) == 3:
        plt.imshow( np.transpose(  X.numpy() , (1, 2, 0))  )
        plt.show()
    elif X.dim() == 2:
        plt.imshow(   X.numpy() , cmap='gray'  )
        plt.show()
    else:
        print('WRONG TENSOR SIZE')

In [None]:
def show_prob_cifar(p):


    p=p.data.squeeze().numpy()

    ft=15
    label = ('airplane', 'automobile', 'Bird', 'Cat', 'Deer', 'Dog', 'Frog', 'Horse', 'Ship','Truck' )
    #p=p.data.squeeze().numpy()
    y_pos = np.arange(len(p))*1.2
    target=2
    width=0.9
    col= 'blue'
    #col='darkgreen'

    plt.rcdefaults()
    fig, ax = plt.subplots()

    # the plot
    ax.barh(y_pos, p, width , align='center', color=col)

    ax.set_xlim([0, 1.3])
    #ax.set_ylim([-0.8, len(p)*1.2-1+0.8])

    # y label
    ax.set_yticks(y_pos)
    ax.set_yticklabels(label, fontsize=ft)
    ax.invert_yaxis()  
    #ax.set_xlabel('Performance')
    #ax.set_title('How fast do you want to go today?')

    # x label
    ax.set_xticklabels([])
    ax.set_xticks([])
    #x_pos=np.array([0, 0.25 , 0.5 , 0.75 , 1])
    #ax.set_xticks(x_pos)
    #ax.set_xticklabels( [0, 0.25 , 0.5 , 0.75 , 1] , fontsize=15)

    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.spines['left'].set_linewidth(4)


    for i in range(len(p)):
        str_nb="{0:.2f}".format(p[i])
        ax.text( p[i] + 0.05 , y_pos[i] ,str_nb ,
                 horizontalalignment='left', verticalalignment='center',
                 transform=ax.transData, color= col,fontsize=ft)
    plt.show()

In [None]:
# choose a picture at random
from random import randint
idx=randint(0, 10000-1)
im=test_data[idx]

# diplay the picture
show(im)

# send to device, rescale, and view as a batch of 1 
im = im.to(device)
im= (im-mean) / std
im=im.view(1,3,32,32)

# feed it to the net and display the confidence scores
scores =  model(im) 
probs= F.softmax(scores, dim=1)
show_prob_cifar(probs.cpu())