In [1]:
import torch

from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda

ds = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
    target_transform=Lambda(lambda y: torch.zeros(10, dtype=torch.float).scatter_(0, torch.tensor(y), value=1))
)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to data\FashionMNIST\raw\train-images-idx3-ubyte.gz


26422272it [00:04, 6599492.94it/s]                              


Extracting data\FashionMNIST\raw\train-images-idx3-ubyte.gz to data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to data\FashionMNIST\raw\train-labels-idx1-ubyte.gz


29696it [00:00, 995285.84it/s]           


Extracting data\FashionMNIST\raw\train-labels-idx1-ubyte.gz to data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to data\FashionMNIST\raw\t10k-images-idx3-ubyte.gz


4422656it [00:00, 6084225.44it/s]                             


Extracting data\FashionMNIST\raw\t10k-images-idx3-ubyte.gz to data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to data\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz


6144it [00:00, ?it/s]                   

Extracting data\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz to data\FashionMNIST\raw




  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [2]:
ds

Dataset FashionMNIST
    Number of datapoints: 60000
    Root location: data
    Split: Train
    StandardTransform
Transform: ToTensor()
Target transform: Lambda()

# Unit 5: Building the model layers

In [3]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using {device} device')

Using cuda device


## Building a neural network

The base class for all neural network modules in PyTorch is `torch.nn.Module`. More generally, the [torch.nn](https://pytorch.org/docs/stable/nn.html) namespace provides all the building blocks for a neural net.

In [5]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28 * 28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
            nn.ReLU()
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits


**NB**: `forward()` shouldn't be called directly

In [6]:
model = NeuralNetwork().to(device)
model

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
    (5): ReLU()
  )
)

In [7]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_prob = nn.Softmax(dim=1)(logits)
y_pred = pred_prob.argmax(1)
print(f'Predicted class: {y_pred}')

Predicted class: tensor([1], device='cuda:0')


## Model breakdown

In [8]:
input_image = torch.rand(3, 28, 28)
print(input_image.size())
input_image

torch.Size([3, 28, 28])


tensor([[[2.1058e-01, 5.9001e-01, 3.1618e-01,  ..., 3.9524e-01,
          3.2162e-01, 3.6114e-01],
         [6.4634e-01, 4.4112e-01, 4.0760e-01,  ..., 5.8843e-01,
          1.0455e-01, 6.7277e-01],
         [8.5326e-01, 4.0920e-01, 5.4896e-01,  ..., 5.9516e-01,
          7.5553e-01, 8.3157e-01],
         ...,
         [2.5806e-01, 1.4574e-01, 7.6796e-01,  ..., 1.3272e-01,
          2.4663e-01, 2.7345e-01],
         [1.3655e-01, 3.4637e-01, 5.1549e-01,  ..., 8.7665e-01,
          8.6684e-01, 8.5020e-01],
         [3.3296e-01, 5.0664e-01, 7.7467e-01,  ..., 4.8543e-01,
          6.2337e-01, 8.1998e-01]],

        [[1.9082e-01, 2.1380e-01, 5.7392e-01,  ..., 6.4819e-02,
          6.2593e-03, 1.0868e-01],
         [1.5603e-01, 8.4725e-01, 3.9217e-01,  ..., 5.9693e-01,
          7.4372e-01, 6.2483e-01],
         [7.6496e-01, 6.9977e-01, 5.0129e-01,  ..., 3.3123e-01,
          3.0496e-01, 9.9120e-03],
         ...,
         [4.6764e-01, 9.5213e-01, 2.9876e-01,  ..., 6.9338e-01,
          8.270

In [9]:
flatten = nn.Flatten()
flat_image = flatten(input_image)  # 28x28 image -> array of pixel values
print(flat_image.size())

torch.Size([3, 784])


In [10]:
layer1 = nn.Linear(in_features=28*28, out_features=20)  # linear transformation on the input using stored weights and biases
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


In [11]:
print(f'Before ReLU: {hidden1}\n=======================\n')
hidden1 = nn.ReLU()(hidden1)
print(f'After ReLU: {hidden1}')

Before ReLU: tensor([[ 0.0696, -0.4402,  0.1617, -0.0284,  0.5393,  0.1870,  0.3323,  0.0616,
         -0.5229, -0.0705,  0.0662, -0.3027,  0.4374,  0.1862, -0.3497,  0.1750,
          0.0488,  0.1443,  0.0456,  0.2527],
        [ 0.3013, -0.1426,  0.2289, -0.1085, -0.0030,  0.1591,  0.3011,  0.0328,
         -0.2768, -0.0633,  0.1825,  0.1095,  0.6924, -0.5917, -0.4833,  0.2557,
         -0.2206,  0.0251,  0.0274, -0.1531],
        [ 0.1122, -0.1027,  0.7141, -0.0857,  0.8524,  0.2045,  0.4231, -0.3580,
         -0.1873,  0.0025, -0.1181, -0.0369,  0.6290, -0.2200, -0.0826,  0.2365,
         -0.0490,  0.3936, -0.2644,  0.2209]], grad_fn=<AddmmBackward>)

After ReLU: tensor([[0.0696, 0.0000, 0.1617, 0.0000, 0.5393, 0.1870, 0.3323, 0.0616, 0.0000,
         0.0000, 0.0662, 0.0000, 0.4374, 0.1862, 0.0000, 0.1750, 0.0488, 0.1443,
         0.0456, 0.2527],
        [0.3013, 0.0000, 0.2289, 0.0000, 0.0000, 0.1591, 0.3011, 0.0328, 0.0000,
         0.0000, 0.1825, 0.1095, 0.6924, 0.0000, 0.0000

### [nn.Sequential](https://pytorch.org/docs/stable/generated/torch.nn.Sequential.html)

An _ordered container_ of moduels. Data is passed through the modules in the order _as defined_.

In [12]:
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10)
)
logits = seq_modules(input_image)

### nn.Softmax

Parameter `dim` indicates the dimension along which softmax is applied.

In [13]:
softmax = nn.Softmax(dim=1)
pred_prob = softmax(logits)
pred_prob

tensor([[0.0835, 0.0947, 0.1264, 0.1082, 0.0939, 0.1037, 0.1090, 0.1058, 0.1116,
         0.0632],
        [0.0751, 0.1009, 0.1076, 0.1178, 0.0951, 0.1005, 0.1226, 0.0978, 0.1098,
         0.0727],
        [0.0798, 0.1112, 0.1283, 0.1072, 0.0914, 0.0987, 0.1294, 0.1001, 0.0956,
         0.0583]], grad_fn=<SoftmaxBackward>)

In [14]:
pred_prob.size()

torch.Size([3, 10])

### Model parameters

All parameters (weights and biases) of a subclass of `nn.Module` are tracked in the model object and can be accessed with `parameters()` or `named_parameters()`.

In [16]:
for name, param in model.named_parameters():
    print(f'Layer: {name}\nSize: {param.size()}\nValues:\n{param[:2]}\n==============\n')

Layer: linear_relu_stack.0.weight
Size: torch.Size([512, 784])
Values:
tensor([[ 0.0166, -0.0087,  0.0113,  ...,  0.0025, -0.0189, -0.0030],
        [-0.0200,  0.0157, -0.0264,  ..., -0.0177, -0.0215,  0.0225]],
       device='cuda:0', grad_fn=<SliceBackward>)

Layer: linear_relu_stack.0.bias
Size: torch.Size([512])
Values:
tensor([-0.0073,  0.0322], device='cuda:0', grad_fn=<SliceBackward>)

Layer: linear_relu_stack.2.weight
Size: torch.Size([512, 512])
Values:
tensor([[ 6.2257e-05, -1.7939e-02, -1.0175e-02,  ...,  2.7330e-02,
         -4.1035e-02, -3.8821e-02],
        [-1.7885e-02, -3.7783e-02,  3.2482e-02,  ...,  3.6938e-03,
         -3.0572e-02, -2.3671e-02]], device='cuda:0', grad_fn=<SliceBackward>)

Layer: linear_relu_stack.2.bias
Size: torch.Size([512])
Values:
tensor([ 0.0150, -0.0423], device='cuda:0', grad_fn=<SliceBackward>)

Layer: linear_relu_stack.4.weight
Size: torch.Size([10, 512])
Values:
tensor([[ 0.0160, -0.0320,  0.0066,  ..., -0.0060,  0.0377, -0.0160],
        [