In [2]:
import torch
from torch.utils.data import DataLoader
import torch.nn as nn 
import torch.nn.functional as F
import torch.optim as optim
import torchvision 
from torchvision import datasets
from torchvision.transforms import v2
import matplotlib as plt

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device: {device}')

Using device: cpu


In [4]:
# Transformation

transforms = v2.Compose([
    v2.ToImage(),
    v2.ToDtype(torch.float32, scale=True)
])

In [5]:
# Load data

batch_size = 32

train_dataset = torchvision.datasets.MNIST(root='../dataset/', train=True, transform=transforms, download=True)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = datasets.MNIST(root='../dataset/', train=False, transform=transforms, download=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)   

In [6]:
# Define CNN Model

class CNN(nn.Module):
    def __init__(self, in_channels=1, num_classes=10):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=(3,3), stride=(1,1), padding=(1,1))
        self.pool = nn.MaxPool2d(kernel_size=(2,2), stride=(2,2))
        self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3,3), stride=(1,1), padding=(1,1))
        self.fc1 = nn.Linear(16*7*7, num_classes) # 28 -> 28 -> 14 -> 14 -> 7 (changes due to maxpool)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc1(x) 
        
        return x 

In [14]:
# Training and validation loop

num_epochs = 5
learning_rate = 0.001
train_losses = []
test_losses = []

model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimiser = optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for X, y in train_loader:
        X = X.to(device=device)
        y = y.to(device=device)
        optimiser.zero_grad() 
        pred_y = model(X)
        loss = criterion(pred_y, y) # mean loss per sample
        loss.backward()
        optimiser.step()
        running_loss += loss.item() * X.size(0) 
        # running_loss = total loss per batch, X.size() refers to (batch_size, channels, height, width)
    epoch_loss = running_loss/len(train_loader)
    train_losses.append(epoch_loss)

    model.eval()
    running_loss = 0.0
    with torch.no_grad():
        for X, y in test_loader:
            X = X.to(device=device)
            y = y.to(device=device)
            pred_y = model(X)
            loss = criterion(pred_y, y)
            running_loss += loss.item() * X.size(0)
    epoch_test_loss = running_loss/len(test_loader)
    test_losses.append(epoch_test_loss)

    print(f'Epoch {epoch+1}/{num_epochs}, Training Loss: {epoch_loss:.4f}, Validation Loss: {epoch_test_loss:.4f}')

Epoch 1/5, Training Loss: 8.1240, Validation Loss: 2.6046
Epoch 2/5, Training Loss: 2.6833, Validation Loss: 1.7088
Epoch 3/5, Training Loss: 2.0293, Validation Loss: 1.7759
Epoch 4/5, Training Loss: 1.6978, Validation Loss: 1.3643
Epoch 5/5, Training Loss: 1.4369, Validation Loss: 1.2843


In [10]:
a = torch.randn(1, 3)
a

tensor([[-0.4402,  1.9464, -0.5156]])

In [13]:
torch.max(a, 2)

IndexError: Dimension out of range (expected to be in range of [-2, 1], but got 2)