In [1]:
import torch
from torch import nn

In [2]:
# 1. Choose the hardware accelerator to use

# torch.backends.mps.is_available()
device = 'mps'

In [3]:
# 2. Define the model

class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.flatten = nn.Flatten()

        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
            
        )

    def forward(self, x):
        x = self.flatten(x)
        
        logits = self.linear_relu_stack(x)
        
        return logits


In [4]:
# 3. Initialize the model

model = NeuralNetwork().to(device)

print(model)

for name, param in model.named_parameters():
    print(f'----------{name}----------')
    print(param.shape)
    print(param[:2])

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)
----------linear_relu_stack.0.weight----------
torch.Size([512, 784])
tensor([[ 0.0089, -0.0257, -0.0096,  ..., -0.0282,  0.0317, -0.0322],
        [ 0.0082, -0.0239,  0.0185,  ..., -0.0294,  0.0192, -0.0102]],
       device='mps:0', grad_fn=<SliceBackward0>)
----------linear_relu_stack.0.bias----------
torch.Size([512])
tensor([-0.0046,  0.0147], device='mps:0', grad_fn=<SliceBackward0>)
----------linear_relu_stack.2.weight----------
torch.Size([512, 512])
tensor([[-0.0117, -0.0408,  0.0423,  ..., -0.0326, -0.0180,  0.0339],
        [-0.0299,  0.0229,  0.0161,  ...,  0.0345, -0.0164, -0.0145]],
       device='mps:0', grad_fn=<SliceBackward0>)
----------linear_relu_stack.2

In [8]:
# 4. use the model for 1 case of prediction

X = torch.rand(1,28,28, device=device)

logits = model(X)
print(f'logits: {logits}')
print(f'logits.shape: {logits.shape}')


pred_probab = nn.Softmax(dim=1)(logits)
print(f'pred_probab: { pred_probab }')
print(f'pred_probab shape: { pred_probab.shape }')
y_pred = pred_probab.argmax(1)


print(f"Predicted class: {y_pred}")

logits: tensor([[-0.0274, -0.0662, -0.0074,  0.0271, -0.0942, -0.0444, -0.0353,  0.0319,
         -0.0674,  0.0687]], device='mps:0', grad_fn=<LinearBackward0>)
logits.shape: torch.Size([1, 10])
pred_probab: tensor([[0.0993, 0.0955, 0.1013, 0.1048, 0.0929, 0.0976, 0.0985, 0.1054, 0.0954,
         0.1093]], device='mps:0', grad_fn=<SoftmaxBackward0>)
pred_probab shape: torch.Size([1, 10])
Predicted class: tensor([9], device='mps:0')


In [6]:
# 5. check torch.argmax function's dim parameter impact
# on the result of the function

import torch

# Create a random 2D tensor
a = torch.randn(4, 4)

print(a)

# Get the indices of the maximum values along dimension 0 (columns)
max_indices_dim0 = torch.argmax(a, dim=0)

# Get the indices of the maximum values along dimension 1 (rows)
max_indices_dim1 = torch.argmax(a, dim=1)

# Using keepdim
max_indices_keepdim = torch.argmax(a, dim=1, keepdim=True)

print(f'along dimension 0 (columns), max_indices_dim0: { max_indices_dim0 }')
print(f'along dimension 1 (rows), max_indices_dim1: { max_indices_dim1 }')
print(f'max_indices_keepdim: { max_indices_keepdim }')

tensor([[-0.2380, -1.9356, -0.0540,  0.0089],
        [-0.3114,  1.7560,  0.8787, -2.5100],
        [ 0.4066,  0.5323,  0.7411,  1.4264],
        [-0.9227,  2.7609, -0.4521, -0.3625]])
along dimension 0 (columns), max_indices_dim0: tensor([2, 3, 1, 2])
along dimension 1 (rows), max_indices_dim1: tensor([3, 1, 3, 1])
max_indices_keepdim: tensor([[3],
        [1],
        [3],
        [1]])
