In [8]:
# initalize 
import torch
import os
import torch.nn as nn
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda
from torch.utils.data import DataLoader
print("Initialized successfully") 


Initialized successfully


In [2]:
# get device
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using device: {device}")

Using device: cuda


In [14]:
# Hyperparameters
learning_rate = 1e-3
batch_size = 64
epochs = 5

In [4]:
# design model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 128),
            nn.ReLU(),
            nn.Linear(128, 10)
        )

    def forward(self, X):
        X = self.flatten(X)
        logits = self.linear_relu_stack(X)
        return logits
        

In [6]:
# set up a model
model = NeuralNetwork().to(device)
print(model)
for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values: {param[:2]} \n")

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=128, bias=True)
    (5): ReLU()
    (6): Linear(in_features=128, out_features=10, bias=True)
  )
)
Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values: tensor([[-1.3789e-02,  2.3320e-02, -2.3449e-02,  ...,  2.5281e-02,
         -2.6549e-02, -5.1865e-03],
        [-1.3348e-05, -2.6808e-02,  3.3510e-02,  ...,  3.3615e-02,
         -8.6144e-03,  3.4196e-02]], device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values: tensor([0.0154, 0.0131], device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values: tensor([[ 0.0387,  0.0140, -0.0008,  ..., -0.0371,  0.0265,  0.

In [21]:
# Load data
train_data = datasets.FashionMNIST(
    root = "data", 
    train = True,
    download = True, 
    transform = ToTensor(),
    # target_transform = Lambda(lambda y:torch.zeros(10,dtype=torch.float).scatter_(dim=0, index=torch.tensor(y), value=1))
)
test_data = datasets.FashionMNIST(
    root = "data", 
    train = False,
    download = True,
    transform = ToTensor()
    # target_transform = Lambda(lambda y:torch.zeros(10,dtype=torch.float).scatter_(dim=0, index=torch.tensor(y), value=1))
)

train = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test = DataLoader(test_data, batch_size=batch_size, shuffle=True)

print(train_data[5][0].device)
print("Done!")

cpu
Done!


- function of train_loop does 4 things in each batch:
    1. send tensor to cuda, initialize model
    2. front propagation
    3. back propagation
    4. use optimizer adjust learning_rate & empty gradient

In [None]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        # forward propagation
        X, y = X.to(device), y.to(device)
        pred = model(X)
        loss = loss_fn(pred, y)
        # backward propagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        if batch % 100 == 0:
            loss, current = loss.item(), batch * batch_size + len(X)
        print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader