In [1]:
import torch
from torch import nn

In [2]:
# 1. Choose the hardware accelerator to use

# torch.backends.mps.is_available()
device = 'mps'

In [3]:
# 2. Define the model

class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.flatten = nn.Flatten()

        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
            
        )

    def forward(self, x):
        x = self.flatten(x)
        
        logits = self.linear_relu_stack(x)
        
        return logits


In [4]:
# 3. Initialize the model

model = NeuralNetwork().to(device)

print(model)

for name, param in model.named_parameters():
    print(name)
    print(param.shape)
    print(param[:2])

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)
linear_relu_stack.0.weight
torch.Size([512, 784])
tensor([[ 0.0143, -0.0307,  0.0164,  ...,  0.0033,  0.0321,  0.0175],
        [ 0.0068, -0.0279, -0.0288,  ...,  0.0034, -0.0045, -0.0094]],
       device='mps:0', grad_fn=<SliceBackward0>)
linear_relu_stack.0.bias
torch.Size([512])
tensor([-0.0320,  0.0122], device='mps:0', grad_fn=<SliceBackward0>)
linear_relu_stack.2.weight
torch.Size([512, 512])
tensor([[ 0.0204, -0.0121,  0.0319,  ...,  0.0437,  0.0280,  0.0207],
        [ 0.0084, -0.0226,  0.0125,  ..., -0.0068, -0.0374, -0.0220]],
       device='mps:0', grad_fn=<SliceBackward0>)
linear_relu_stack.2.bias
torch.Size([512])
tensor([ 0.0342, -0.0134], device='mps:0', gra

In [5]:
# 4. use the model for 1 case of prediction

X = torch.rand(1,28,28, device=device)

logits = model(X)


pred_probab = nn.Softmax(dim=1)(logits)
print(f'pred_probab: { pred_probab }')
print(f'pred_probab shape: { pred_probab.shape }')
y_pred = pred_probab.argmax(1)


print(f"Predicted class: {y_pred}")

pred_probab: tensor([[0.0892, 0.1094, 0.1008, 0.0982, 0.0931, 0.1009, 0.1082, 0.0976, 0.1066,
         0.0960]], device='mps:0', grad_fn=<SoftmaxBackward0>)
pred_probab shape: torch.Size([1, 10])
Predicted class: tensor([1], device='mps:0')


In [6]:
# 5. check torch.argmax function's dim parameter impact
# on the result of the function

import torch

# Create a random 2D tensor
a = torch.randn(4, 4)

print(a)

# Get the indices of the maximum values along dimension 0 (columns)
max_indices_dim0 = torch.argmax(a, dim=0)

# Get the indices of the maximum values along dimension 1 (rows)
max_indices_dim1 = torch.argmax(a, dim=1)

# Using keepdim
max_indices_keepdim = torch.argmax(a, dim=1, keepdim=True)

print(f'along dimension 0 (columns), max_indices_dim0: { max_indices_dim0 }')
print(f'along dimension 1 (rows), max_indices_dim1: { max_indices_dim1 }')
print(f'max_indices_keepdim: { max_indices_keepdim }')

tensor([[ 1.6062, -1.0260, -1.7278, -0.0105],
        [-0.6658, -0.3243, -2.1588,  2.4055],
        [ 0.8687, -0.4782, -0.1693, -2.5708],
        [ 0.1019, -0.6981,  2.0004,  0.7112]])
along dimension 0 (columns), max_indices_dim0: tensor([0, 1, 3, 1])
along dimension 1 (rows), max_indices_dim1: tensor([0, 3, 0, 2])
max_indices_keepdim: tensor([[0],
        [3],
        [0],
        [2]])
