# Build the Neural Network


In [16]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

### Get Device for training

We want to be able to train our model on an accelerator such as CUDA, MPS, MTIA, or XPU. If the current accelerator is available, we will use it. Otherwise, we use the CPU.

In [None]:
device = torch.accelerator.current_accelerator() if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


### Define the Class

We define our neural network by subclassing `nn.Module`, and initialize the neural network layers in `__init__`. Every `nn.Module` subclass implements the operations on input data in the `forward` method.

In [40]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()  
        self.flatten = nn.Flatten()
        self.liner_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.liner_relu_stack(x)
        return logits

We create an instance of `Neural Network`, and move it to the `device`, and print its structure.

In [41]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (liner_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [53]:
# Notes

In [52]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([5], device='cuda:0')


### Model Layers

In [55]:
input_image = torch.rand(3, 28, 28)
print(input_image)
print(input_image.size())

tensor([[[0.4636, 0.3223, 0.5861,  ..., 0.0394, 0.1890, 0.7044],
         [0.2432, 0.6714, 0.7969,  ..., 0.5217, 0.2478, 0.7964],
         [0.3349, 0.9554, 0.3277,  ..., 0.4157, 0.6213, 0.6595],
         ...,
         [0.8620, 0.9463, 0.9309,  ..., 0.9475, 0.5156, 0.4633],
         [0.8687, 0.5993, 0.2405,  ..., 0.0161, 0.9048, 0.2285],
         [0.3180, 0.0745, 0.6252,  ..., 0.0412, 0.7314, 0.5756]],

        [[0.9902, 0.4668, 0.8115,  ..., 0.6918, 0.0766, 0.4585],
         [0.0990, 0.4542, 0.6419,  ..., 0.5744, 0.5756, 0.1856],
         [0.1013, 0.9827, 0.6108,  ..., 0.0622, 0.5485, 0.2581],
         ...,
         [0.8140, 0.5561, 0.5642,  ..., 0.2131, 0.5737, 0.3240],
         [0.9593, 0.3754, 0.6515,  ..., 0.8251, 0.0898, 0.9540],
         [0.5395, 0.0059, 0.8246,  ..., 0.5200, 0.6519, 0.5417]],

        [[0.1180, 0.4795, 0.7787,  ..., 0.9871, 0.2208, 0.4813],
         [0.8089, 0.1534, 0.6387,  ..., 0.3003, 0.2958, 0.0637],
         [0.1489, 0.0865, 0.0697,  ..., 0.8246, 0.1198, 0.

### nn.Flatten

We initialize the `nn.flatten` layer to convert each 2D 28x28 image a contiguous array of 784 pixel values( the minibatch dimension (at dim=0) is maintained).

In [58]:
flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image)
print(flat_image.size())

tensor([[0.4636, 0.3223, 0.5861,  ..., 0.0412, 0.7314, 0.5756],
        [0.9902, 0.4668, 0.8115,  ..., 0.5200, 0.6519, 0.5417],
        [0.1180, 0.4795, 0.7787,  ..., 0.9203, 0.1468, 0.5499]])
torch.Size([3, 784])


### nn.Linear

The linear layer is a module that applies a linear transformation on the input using its stored weights and biases.

In [60]:
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


### nn.ReLU

negative values -> 0

In [72]:
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}\n\n")

Before ReLU: tensor([[0.2654, 0.0000, 0.0000, 0.1017, 0.6601, 0.4090, 0.0537, 0.3378, 0.0000,
         0.0000, 0.0000, 0.2667, 0.1671, 0.6319, 0.4313, 0.0000, 0.0000, 0.4930,
         0.0000, 0.2605],
        [0.4304, 0.0000, 0.0000, 0.4126, 0.6318, 0.1676, 0.0948, 0.0684, 0.0465,
         0.0000, 0.0000, 0.0962, 0.2016, 0.4992, 0.0325, 0.0000, 0.0000, 0.2932,
         0.0000, 0.0000],
        [0.2428, 0.0213, 0.0000, 0.3006, 0.5404, 0.4285, 0.0321, 0.1925, 0.0000,
         0.0000, 0.0572, 0.0000, 0.2984, 0.0000, 0.6748, 0.0000, 0.0000, 0.3365,
         0.0000, 0.2540]], grad_fn=<ReluBackward0>)


After ReLU: tensor([[0.2654, 0.0000, 0.0000, 0.1017, 0.6601, 0.4090, 0.0537, 0.3378, 0.0000,
         0.0000, 0.0000, 0.2667, 0.1671, 0.6319, 0.4313, 0.0000, 0.0000, 0.4930,
         0.0000, 0.2605],
        [0.4304, 0.0000, 0.0000, 0.4126, 0.6318, 0.1676, 0.0948, 0.0684, 0.0465,
         0.0000, 0.0000, 0.0962, 0.2016, 0.4992, 0.0325, 0.0000, 0.0000, 0.2932,
         0.0000, 0.0000],
       

### nn.Sequential


In [None]:
seq_modules = nn.Sequential(
    flatten, 
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10)
)
input_image = torch.rand(3, 28, 28)
logits = seq_modules(input_image)

### nn.Softmax

In [None]:
softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)

### Model Parameters



In [None]:
print(f"Model structure: {model}\n\n")
      
for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure: NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (liner_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


Layer: liner_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[-0.0061,  0.0182, -0.0245,  ...,  0.0004, -0.0054, -0.0235],
        [-0.0218,  0.0173, -0.0078,  ..., -0.0297,  0.0113,  0.0020]],
       device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: liner_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([-0.0070, -0.0066], device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: liner_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[ 0.0393,  0.0402, -0.0049,  ...,  0.0286, -0.0008,  0.0439],
        [ 0.0078,  0.0283,  0.0293,  ...,  0.0302, -0.0098,  0.0291]],
       device='cuda:0', grad_fn=<SliceB

[more info](http://docs.pytorch.org/docs/stable/nn.html)