In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [2]:
# Training device 
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cpu device


In [4]:
# create neural network
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 500),
            nn.ReLU(),
            nn.Linear(500, 300),
            nn.ReLU(),
            nn.Linear(300, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits
    
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=500, bias=True)
    (1): ReLU()
    (2): Linear(in_features=500, out_features=300, bias=True)
    (3): ReLU()
    (4): Linear(in_features=300, out_features=10, bias=True)
  )
)


In [7]:
# Get initial prediction 
X = torch.rand(1, 28, 28, device=device)
logits = model(X) # Get logits from model (predictions)
pred_probab = nn.Softmax(dim=1)(logits) # get probability 
y_pred = pred_probab.argmax(1) # Find predicted class 
print(f"Predicted class: {y_pred}")

Predicted class: tensor([4])


In [8]:
# minibatch of size 3 
input_image = torch.rand(3,28,28)
print(input_image.size())

torch.Size([3, 28, 28])


In [9]:
# Flatten 
flatten = nn.Flatten() # flatten layer 
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


In [10]:
# Linear transform 
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


In [11]:
# non linear activation (ReLU)
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

Before ReLU: tensor([[-3.3560e-01, -4.5919e-01,  2.4642e-01, -3.3313e-01, -5.2785e-02,
         -6.9358e-01,  2.4218e-01,  2.6250e-01,  1.8091e-01,  2.6777e-01,
          1.0016e-01,  3.4641e-01,  2.0225e-01,  2.2813e-01, -1.4953e-01,
          2.6209e-01, -1.0052e-01,  1.3246e-03,  1.4766e-01,  1.4833e-01],
        [-3.3730e-01, -1.0123e-01,  2.1543e-01, -1.3996e-01,  3.9756e-02,
         -4.0434e-01,  9.2186e-02,  1.6282e-01,  8.7210e-03, -3.1627e-01,
         -4.5786e-01,  4.0195e-01,  1.0305e-01, -6.7269e-02, -2.4217e-01,
          1.6081e-01, -3.3481e-01,  2.0169e-02,  4.9292e-01,  4.2445e-04],
        [-3.8932e-01, -3.2384e-01,  1.0678e-01,  2.2381e-01, -2.7386e-01,
         -7.2335e-01,  2.6898e-01,  1.9139e-01,  1.4466e-01, -2.0953e-01,
         -4.9018e-01,  3.2371e-01,  1.6510e-01, -8.0963e-02, -2.7826e-04,
          7.2788e-02,  1.7909e-02,  1.5802e-01,  4.6243e-01, -1.2237e-01]],
       grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.0000e+00, 0.0000e+00, 2.4642e-01, 0.0

In [26]:
# nn sequential 
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10) # get output 
)
input_image = torch.rand(3,28,28)
logits = seq_modules(input_image)
logits.shape

# Get predictions 
softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)
print(pred_probab.argmax(axis=1)) # get max 
pred_probab

tensor([9, 2, 9])


tensor([[0.0857, 0.1043, 0.1170, 0.0898, 0.0826, 0.0969, 0.0705, 0.1154, 0.1067,
         0.1312],
        [0.0804, 0.1010, 0.1305, 0.0910, 0.0733, 0.1067, 0.0692, 0.1110, 0.1074,
         0.1297],
        [0.0814, 0.1027, 0.1206, 0.0924, 0.0841, 0.0979, 0.0772, 0.1124, 0.0978,
         0.1335]], grad_fn=<SoftmaxBackward0>)

In [27]:
# model parameters 
print(f"Model structure: {model}\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure: NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=500, bias=True)
    (1): ReLU()
    (2): Linear(in_features=500, out_features=300, bias=True)
    (3): ReLU()
    (4): Linear(in_features=300, out_features=10, bias=True)
  )
)


Layer: linear_relu_stack.0.weight | Size: torch.Size([500, 784]) | Values : tensor([[-0.0279, -0.0203,  0.0219,  ..., -0.0296, -0.0075,  0.0299],
        [-0.0202,  0.0265,  0.0120,  ...,  0.0264, -0.0243, -0.0018]],
       grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([500]) | Values : tensor([0.0298, 0.0253], grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([300, 500]) | Values : tensor([[ 1.9436e-03,  7.3831e-03, -2.5742e-02,  3.4341e-02,  2.8283e-02,
          3.0806e-02,  2.1972e-02,  3.8721e-02,  3.5151e-02, -2.3217e-02,
         -2.4017e-02,  1.5989e-03,  2.8442e-02,  2.7545e-02,  1.