In [1]:
import torch
from torch import nn

In [2]:
# 1. Choose the hardware accelerator to use

# torch.backends.mps.is_available()
device = 'mps'

In [3]:
# 2. Define the model

class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.flatten = nn.Flatten()

        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
            
        )

    def forward(self, x):
        x = self.flatten(x)
        
        logits = self.linear_relu_stack(x)
        
        return logits


In [4]:
# 3. Initialize the model

model = NeuralNetwork().to(device)

print(model)

for name, param in model.named_parameters():
    print(f'----------{name}----------')
    print(param.shape)
    print(param[:2])

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)
----------linear_relu_stack.0.weight----------
torch.Size([512, 784])
tensor([[ 0.0132,  0.0353,  0.0352,  ..., -0.0169,  0.0142, -0.0240],
        [-0.0274, -0.0135, -0.0336,  ..., -0.0033, -0.0265, -0.0347]],
       device='mps:0', grad_fn=<SliceBackward0>)
----------linear_relu_stack.0.bias----------
torch.Size([512])
tensor([-0.0069, -0.0134], device='mps:0', grad_fn=<SliceBackward0>)
----------linear_relu_stack.2.weight----------
torch.Size([512, 512])
tensor([[ 0.0177, -0.0381,  0.0387,  ...,  0.0430,  0.0100, -0.0172],
        [-0.0284, -0.0271,  0.0421,  ..., -0.0205, -0.0070,  0.0066]],
       device='mps:0', grad_fn=<SliceBackward0>)
----------linear_relu_stack.2

In [5]:
# 4. use the model for 1 case of prediction

X = torch.rand(1,28,28, device=device)

logits = model(X)


pred_probab = nn.Softmax(dim=1)(logits)
print(f'pred_probab: { pred_probab }')
print(f'pred_probab shape: { pred_probab.shape }')
y_pred = pred_probab.argmax(1)


print(f"Predicted class: {y_pred}")

pred_probab: tensor([[0.0982, 0.0961, 0.1090, 0.0917, 0.0911, 0.1015, 0.1058, 0.1011, 0.1017,
         0.1038]], device='mps:0', grad_fn=<SoftmaxBackward0>)
pred_probab shape: torch.Size([1, 10])
Predicted class: tensor([2], device='mps:0')


In [6]:
# 5. check torch.argmax function's dim parameter impact
# on the result of the function

import torch

# Create a random 2D tensor
a = torch.randn(4, 4)

print(a)

# Get the indices of the maximum values along dimension 0 (columns)
max_indices_dim0 = torch.argmax(a, dim=0)

# Get the indices of the maximum values along dimension 1 (rows)
max_indices_dim1 = torch.argmax(a, dim=1)

# Using keepdim
max_indices_keepdim = torch.argmax(a, dim=1, keepdim=True)

print(f'along dimension 0 (columns), max_indices_dim0: { max_indices_dim0 }')
print(f'along dimension 1 (rows), max_indices_dim1: { max_indices_dim1 }')
print(f'max_indices_keepdim: { max_indices_keepdim }')

tensor([[-0.8586,  1.3269,  0.2544, -0.7241],
        [-0.4024,  1.4058, -0.6319,  0.3630],
        [ 0.0155, -1.1597, -0.0358, -0.0931],
        [-2.3221,  0.7368, -0.1445, -0.2449]])
along dimension 0 (columns), max_indices_dim0: tensor([2, 1, 0, 1])
along dimension 1 (rows), max_indices_dim1: tensor([1, 1, 0, 1])
max_indices_keepdim: tensor([[1],
        [1],
        [0],
        [1]])
