In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using {device} device')

Using cuda device


In [9]:
class MyNeuralNetwork(nn.Module) :
    def __init__(self) :
        super(MyNeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28 * 28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x) :
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [10]:
# Create and instance of NN and move it to Device

model = MyNeuralNetwork().to(device)
print(model)

MyNeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


### To use the model, we pass it the **input data**. 
### This executes the model’s `forward`, along with some *background operations*.
### Do not call `model.forward()` directly!

In [21]:
X = torch.rand(1, 28, 28, device=device) # Random input
logits = model(X)
pred_prob = nn.Softmax(dim=1)(logits)
y_pred = pred_prob.argmax(1)
y_pred
print(f"Predicted class: {y_pred}")

Predicted class: tensor([2], device='cuda:0')


## Model Layers

In [None]:
# sample random mini batch of 3 images
ip_image = torch.rand(3, 28, 28)

# flatten
flatten = nn.Flatten()
f_img = flatten(ip_image) # flat image
f_img.shape

# Linear 
linear_layer = nn.Linear(in_features=28*28, out_features=20)
hidden = linear_layer(f_img) 
hidden.shape

# ReLU
print(f"Before ReLU: {hidden}\n\n")
hidden = nn.ReLU()(hidden)
print(f"After ReLU: {hidden}")
hidden.shape


### Sequential
- nn.Sequential is an ordered container of modules.

In [33]:
# Sequential
seq_modules = nn.Sequential(
    flatten,
    linear_layer,
    nn.ReLU(),
    nn.Linear(20, 10)
)

ip_img = torch.rand(2, 28, 28)
logits = seq_modules(ip_image)
logits

tensor([[ 0.2926,  0.2791, -0.0297,  0.0965,  0.2100, -0.0977, -0.0172, -0.2406,
          0.1639,  0.0310],
        [ 0.4041,  0.2398, -0.0510,  0.0590,  0.0988, -0.1335,  0.0272, -0.1560,
          0.2642, -0.0984],
        [ 0.3587,  0.2055, -0.0432,  0.1264,  0.3078, -0.0731, -0.0199, -0.3034,
          0.1657,  0.0671]], grad_fn=<AddmmBackward0>)

### Softmax
- The logits are *scaled* to values **[0, 1]** representing the model’s **predicted probabilities** for each class.
- `dim` parameter indicates the dimension along which the values must **sum to 1**.

In [35]:
softmax = nn.Softmax(dim=1)
pred_prob = softmax(logits)
pred_prob

tensor([[0.1235, 0.1218, 0.0894, 0.1015, 0.1137, 0.0836, 0.0906, 0.0724, 0.1085,
         0.0950],
        [0.1381, 0.1171, 0.0876, 0.0978, 0.1017, 0.0806, 0.0947, 0.0788, 0.1200,
         0.0835],
        [0.1300, 0.1115, 0.0870, 0.1031, 0.1236, 0.0844, 0.0890, 0.0671, 0.1072,
         0.0971]], grad_fn=<SoftmaxBackward0>)

### Model Parameters


In [36]:
print("Model structure: ", model, "\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure:  MyNeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
) 


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[-9.2748e-03, -1.2833e-02, -1.4886e-02,  ...,  3.4303e-02,
          2.6195e-02,  2.9703e-02],
        [-3.2315e-02, -2.2119e-02, -2.7076e-03,  ..., -2.1394e-02,
          8.9705e-06,  1.1695e-04]], device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([0.0198, 0.0010], device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[-0.0172,  0.0278, -0.0020,  ...,  0.0058, -0.0163,  0.0230],
        [-0.0064, -0.0075, -0.0249,  ...,  0.