# Convolutional Neural Nets in PyTorch

In [None]:
%matplotlib inline
import torch
import numpy as np
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda, Compose, Normalize
import matplotlib.pyplot as plt

In [None]:
# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Define a transform that first converts the original PIL image to a Tensor with float values in the range $[0,1]$ and then normalizes the values by subtracting the mean (`mu`) and dividing by standard deviation (`sd`):

In [None]:
mu = 0.5
sd = 0.5

# how to choose values of mu and sd in practice? is this important?
transform = Compose(
    [ToTensor(),
     Normalize((mu, mu, mu), (sd, sd, sd))])

Load CIFAR10 dataset

In [None]:
labels_map = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 
           'ship', 'truck')

trainset = datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)

testset = datasets.CIFAR10(root='./data', train=False,
                                        download=True, transform=transform)

Create a `DataLoader` object that iterates over batches of the data:

In [None]:
trainloader = DataLoader(trainset, batch_size=8,
                         shuffle=True, num_workers=2)
testloader = DataLoader(testset, batch_size=8,
                         shuffle=True, num_workers=2)

In [None]:
for X, y in trainloader:
    print("Shape of X [N, C, H, W]: ", X.shape)
    print("Shape of y: ", y.shape, y.dtype)
    break

In [None]:
def imshow(img, mean, sd):
    img = img *sd + mean    # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.axis('off')

In [None]:
from torchvision.utils import make_grid

# get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.next()

plt.figure(figsize=(16,2))
# show images
imshow(make_grid(images), mean=mu, sd=sd)
# print labels
print(' '.join('%5s' % labels_map[labels[j]] for j in range(8)))

## Build LeNet

Output size of convolutions:

$$n_{out} = \left\lfloor \frac{n_{in} + 2p - f}{s} \right\rfloor + 1 $$

In [None]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.convolutions = nn.Sequential(
            nn.Conv2d(3, 6, 5),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(6, 16, 5),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Flatten()
        )
        self.fc_layers = nn.Sequential(
            nn.Linear(16 * 5 * 5, 120),
            nn.Linear(120, 84),
            nn.Linear(84, 10)
        )
        
    def forward(self, x):
        cn = self.convolutions(x)
        logits = self.fc_layers(cn)
        return logits

Instantiate a model:

In [None]:
model = LeNet()

In [None]:
model

In [None]:
for name, param in model.convolutions.named_parameters():
    print(f"Layer: {name} | Size: {param.size()}\n")

In [None]:
print("Layer 0, weight dimensions:", model.convolutions[0].weight.shape)
print("Layer 0, bias dimensions:", model.convolutions[0].bias.shape)

In [None]:
print("Layer 3, weight dimensions: ", model.convolutions[3].weight.shape)
print("Layer 3, bias dimensions: ", model.convolutions[3].bias.shape)

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [None]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # forward pass
        pred = model(X)
        # compute loss
        loss = loss_fn(pred, y)

        # Backpropagation
        # reset gradients (otherwie they accumulate)
        optimizer.zero_grad()
        # run backpropagation
        loss.backward()
        # update the parameters of the model
        optimizer.step()

        if batch % 500 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [None]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    # don't track the gradients
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            # forward pass
            pred = model(X)
            # aggregate loss
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    # average loss
    test_loss /= num_batches
    # accuracy
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

Questions:
+ Weight initialization? Not important in PyTorch
+ Do we need to seed the results? Just globally with `torch.manual_seed()`? I can ask around

In [None]:
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(trainloader, model, loss_fn, optimizer)
    test(testloader, model, loss_fn)
print("Done!")

We can use TensorBoard to visualize our model training and view individual train or test instances:

In [None]:
# PyTorch TensorBoard support
from torch.utils.tensorboard import SummaryWriter

For instance, we can show images in TensorBoard:

In [None]:
# Extract a batch of 4 images
dataiter = iter(trainloader)
images, labels = dataiter.next()

img_grid = make_grid(images)
imshow(img_grid, mean=mu, sd=sd)

Below, we use the `add_image()` call on `SummaryWriter` to log the image for consumption by TensorBoard, and we also call `flush()` to make sure it’s written to disk right away.

In [None]:
# Default log_dir argument is "runs" - but it's good to be specific
# torch.utils.tensorboard.SummaryWriter is imported above
writer = SummaryWriter('runs/fashion_mnist_experiment_1')

# Write image data to TensorBoard log dir
writer.add_image('Four Fashion-MNIST Images', img_grid)
writer.flush()

# To view, start TensorBoard on the command line with:
#   tensorboard --logdir=runs
# ...and open a browser tab to http://localhost:6006/

Now let’s train a single epoch, and evaluate the training vs. validation set losses every 1000 batches:

In [None]:
len(trainloader)

In [None]:
print(len(testloader))
for epoch in range(3):  # loop over the dataset multiple times
    running_loss = 0.0

    for i, data in enumerate(trainloader, 0):
        # basic training loop
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 1000 == 999:    # Every 1000 mini-batches...
            print('Batch {}'.format(i + 1))
            # Check against the validation set
            running_vloss = 0.0

            model.train(False) # Don't need to track gradents for validation
            for j, vdata in enumerate(testloader, 0):
                vinputs, vlabels = vdata
                voutputs = model(vinputs)
                vloss = loss_fn(voutputs, vlabels)
                running_vloss += vloss.item()
            model.train(True) # Turn gradients back on for training

            avg_loss = running_loss / 1000
            avg_vloss = running_vloss / len(testloader)

            # Log the running loss averaged per batch
            writer.add_scalars('Training vs. Validation Loss',
                            { 'Training' : avg_loss, 'Validation' : avg_vloss },
                            epoch * len(trainloader) + i)

            running_loss = 0.0
print('Finished Training')

writer.flush()

TensorBoard can also be used to examine the data flow within your model. To do this, call the `add_graph()` method with a model and sample input.

In [None]:
# Again, grab a single mini-batch of images
dataiter = iter(trainloader)
images, labels = dataiter.next()

# add_graph() will trace the sample input through your model,
# and render it as a graph.
writer.add_graph(model, images)
writer.flush()