In [2]:
# Build the Neural Network

# Neural Networks compirse of layers/modules that perform operations on data
# torch.nn namespace provides all the building blocks needed to build own neural network
# Every module in PyTorch subclasses the nn.Module


In [None]:
# Preliminary
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

# get device for training
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else 'cpu'
print(f"Using {device} device")
# Note that available device types are only CPU, CUDA, MPS, XPU, XLA, or Meta
# and rocM just shows up as CUDA, confusingly

Using cuda device


In [None]:
# Define the neural network class

class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512), 
            nn.ReLU(), 
            nn.Linear(512, 512),
            nn.ReLU(), 
            nn.Linear(512, 10)
        )
    
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits
    
# Explanations
# nn.Sequential is the main container for all "layers" (modules) in PyTorch
# nn.Linear is the standard fully connected, input layer dim, output layer dim
# nn.ReLU is the ReLU activation function
# nn.Softmax applies softmax, to ensure output is a probability distribution


In [9]:
# Create an instance of NeuralNetwork, and move it to device, then print its structure
model = NeuralNetwork().to(device)

print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [None]:
# To use model, then pass it the input data
# create some random X data, and store it on the device
X = torch.rand(1, 28, 28, device = device)

# This "calls" the model using the input data, and returns the output, i.e. fitted values
logits = model(X)
# Use softmax to convert the logits to probabilities, because the raw fitted values are \
# possible negative
# This is because the model has a Linear as its final layer (is it worth changing this?)
pred_probab = nn.Softmax(dim = 1)(logits)
# Use the max function to convert probabilities to predicted class
y_pred = pred_probab.argmax(1)
print(f"Predicted Class: {y_pred}")

Predicted Class: tensor([2], device='cuda:0')


  return F.linear(input, self.weight, self.bias)


tensor([[0.0977, 0.1051, 0.1118, 0.0941, 0.1104, 0.0967, 0.0906, 0.1020, 0.0935,
         0.0981]], device='cuda:0', grad_fn=<SoftmaxBackward0>)