In [103]:
#   NOTEBOOK STRUCTURE:
# STEP 0: imports and TRAIN/TEST BEHAVIOR
# 0.1: Train step
# 0.2: Test step
# 0.3: Loop
# STEP 1: DATA
# 1.1: Paths lists, validate?
# 1.2: Transforms, dataloaders 
# STEP 2: ARCHITECTURE
# STEP 3: Training loop
# STEP 4: ANALYSIS
# STEP 5: EXPORT

def reminder():
    '''Prints a series of reminder text about global variables and their contents'''
    print("ex im_path = ONLY the base dir: /images")
    print("ex label_path_list = all category dirs eg images/ramen/")
    print("ex img_path_list = all .jpg files eg images/ramen/125235.jpg")
    print("ex class_list = ['spring_rolls', 'pulled_pork_sandwich',...]")

reminder()

ex im_path = ONLY the base dir: /images
ex label_path_list = all category dirs eg images/ramen/
ex img_path_list = all .jpg files eg images/ramen/125235.jpg
ex class_list = ['spring_rolls', 'pulled_pork_sandwich',...]


In [104]:
# STEP 0: Imports, init cuda

import numpy as np 
import pandas as pd 
import matplotlib as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data
from torchvision import datasets, transforms

from sklearn.model_selection import train_test_split
torch.__version__

device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [167]:
# 0.1: Train step
def train_step(model: torch.nn.Module,
               dataloader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer):
  model.train() # train mode

  train_loss, train_acc = 0, 0
  for batch, (X, y) in enumerate(dataloader):
    X, y = X.to(device), y.to(device)

    # Forward pass
    y_pred = model(X)
    loss = loss_fn(y_pred, y)
    train_loss += loss.item()

    # Backprop
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Accuracy
    y_pred_class = torch.argmax(y_pred, dim=1)
    train_acc += (y_pred_class == y).sum().item()/len(y_pred)

  # Compute average loss and accuracy
  train_loss = train_loss / len(dataloader)
  train_acc = train_acc / len(dataloader)
  return train_loss, train_acc

In [168]:
# 0.2: Test step
def test_step(model: torch.nn.Module,
              dataloader: torch.utils.data.DataLoader,
              loss_fn: torch.nn.Module):
  model.eval() # eval mode

  test_loss, test_acc = 0, 0

  with torch.inference_mode():
    for batch, (X, y) in enumerate(dataloader):
      X, y = X.to(device), y.to(device)
      # forward pass
      test_pred_logits = model(X)
      # calculate the loss
      loss = loss_fn(test_pred_logits, y)
      test_loss += loss.item()
      # calc acc
      test_pred_labels = test_pred_logits.argmax(dim=1)
      test_acc += ((test_pred_labels==y)).sum().item()/len(test_pred_labels)

  # Compute average loss and accuracy
  test_loss = test_loss / len(dataloader)
  test_acc = test_acc / len(dataloader)
  return test_loss, test_acc

In [169]:
# 0.3: Loop
from tqdm.auto import tqdm

def train_loop(model: torch.nn.Module,
          train_dataloader: torch.utils.data.DataLoader,
          test_dataloader: torch.utils.data.DataLoader,
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module,
          epochs: int):

    # create empty results dictionary
    results = {
        "train_loss": [],
        "train_acc": [],
        "test_loss": [],
        "test_acc": []
    }
    # loop through training and testing steps for a number of epochs
    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model=model,
                                           dataloader=train_dataloader,
                                           loss_fn=loss_fn,
                                           optimizer=optimizer)
        test_loss, test_acc = test_step(model=model,
            dataloader=test_dataloader,
            loss_fn=loss_fn)

        print(
            f"Epoch: {epoch+1} | "
            f"train_loss: {train_loss:.4f} | "
            f"train_acc: {train_acc:.4f} | "
            f"test_loss: {test_loss:.4f} | "
            f"test_acc: {test_acc:.4f}"
        )

        # update results dictionary
        results["train_loss"].append(train_loss.item() if isinstance(train_loss, torch.Tensor) else train_loss)
        results["train_acc"].append(train_acc.item() if isinstance(train_acc, torch.Tensor) else train_acc)
        results["test_loss"].append(test_loss.item() if isinstance(test_loss, torch.Tensor) else test_loss)
        results["test_acc"].append(test_acc.item() if isinstance(test_acc, torch.Tensor) else test_acc)

    return results

In [170]:
# STEP 1: DATA
# 1.1: Paths lists, validate

# Download latest version of data from kaggle 
import kagglehub
path = kagglehub.dataset_download("oddrationale/mnist-in-csv")
print("Path to dataset files:", path)

df = pd.read_csv(path + '/mnist_train.csv')
df

Path to dataset files: C:\Users\zecer\.cache\kagglehub\datasets\oddrationale\mnist-in-csv\versions\2


Unnamed: 0,label,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,...,28x19,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28
0,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59995,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
59996,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
59997,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
59998,6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [171]:
# 1.2: Transforms, train-test-split, dataloaders 

# load data into numpy vectors with test_train_split
y = df['label'].values
X = df.drop(columns=['label'],axis=1).values.astype('float32')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# define transform
BATCH_SIZE = 64

# convert data and labels to torch vectors
torch_X_train = torch.from_numpy(X_train).type(torch.float32).view(-1, 1, 28, 28).to(device)
torch_y_train = torch.from_numpy(y_train).type(torch.long).to(device)

# create feature and targets tensor for test set.
torch_X_test = torch.from_numpy(X_test).type(torch.float32).view(-1, 1, 28, 28).to(device)
torch_y_test = torch.from_numpy(y_test).type(torch.long).to(device)

# Pytorch train and test sets
train = torch.utils.data.TensorDataset(torch_X_train,torch_y_train)
test = torch.utils.data.TensorDataset(torch_X_test,torch_y_test)

# data loader
train_loader = torch.utils.data.DataLoader(train, batch_size = BATCH_SIZE, shuffle = True)
test_loader = torch.utils.data.DataLoader(test, batch_size = BATCH_SIZE, shuffle = False)

In [172]:
# STEP 2: ARCHITECTURE
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)  # 28x28 -> 26x26
        self.conv2 = nn.Conv2d(32, 64, 3, 1) # 26x26 -> 24x24
        self.dropout1 = nn.Dropout(0.25)
        self.fc1 = nn.Linear(64 * 12 * 12, 128)  # After 2x2 pooling
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))       # 28 -> 26
        x = F.relu(self.conv2(x))       # 26 -> 24
        x = F.max_pool2d(x, 2)          # 24 -> 12
        x = self.dropout1(x)
        x = torch.flatten(x, 1)         # flatten except batch dim
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


model = CNN().to(device)
model

CNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (dropout1): Dropout(p=0.25, inplace=False)
  (fc1): Linear(in_features=9216, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)

In [173]:
# STEP 3: Loss fn, optimizer, dummy forward pass

# reasonably good choices depend on data
optimizer = torch.optim.Adam(model.parameters())
loss_fn = nn.CrossEntropyLoss()

# dummy forward pass
batch, label_batch = next(iter(train_loader))
img, label = batch[0].unsqueeze(dim=0), label_batch[0] 

model.eval()
with torch.inference_mode():
  pred = model(img.to(device))

print(f'outp logits: {pred},\n outp probabilities: {torch.softmax(pred, dim=1)}')
print(f'outp label: {torch.argmax(torch.softmax(pred, dim=1), dim=1)}\n actual label: {label}')

outp logits: tensor([[ 7.0822, -5.9170, -3.8916, -6.5120, -4.0127, -5.5917, -0.1559,  4.9961,
         -4.9308,  8.4320]], device='cuda:0'),
 outp probabilities: tensor([[2.0075e-01, 4.5411e-07, 3.4416e-06, 2.5046e-07, 3.0491e-06, 6.2871e-07,
         1.4427e-04, 2.4926e-02, 1.2174e-06, 7.7418e-01]], device='cuda:0')
outp label: tensor([9], device='cuda:0')
 actual label: 0


In [174]:
# STEP 3: Training loop

from timeit import default_timer as timer
start_time = timer()

# Training begins here
NUM_EPOCHS = 5
model_results = train_loop(model=model,
                        train_dataloader=train_loader,
                        test_dataloader=test_loader,
                        optimizer=optimizer,
                        loss_fn=loss_fn,
                        epochs=NUM_EPOCHS)

end_time = timer()
print(f"Total training time: {end_time-start_time:.3f} seconds")

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.3637 | train_acc: 0.9421 | test_loss: 0.0645 | test_acc: 0.9791
Epoch: 2 | train_loss: 0.0726 | train_acc: 0.9775 | test_loss: 0.0653 | test_acc: 0.9808
Epoch: 3 | train_loss: 0.0528 | train_acc: 0.9834 | test_loss: 0.0548 | test_acc: 0.9831
Epoch: 4 | train_loss: 0.0449 | train_acc: 0.9862 | test_loss: 0.0586 | test_acc: 0.9840
Epoch: 5 | train_loss: 0.0386 | train_acc: 0.9880 | test_loss: 0.0505 | test_acc: 0.9862
Total training time: 8.016 seconds


In [175]:
# DEBUG CELL (type whatever in here)




In [None]:
# STEP 4: ANALYSIS
def plot_loss_curves(results):
  loss = results['train_loss']
  test_loss = results['test_loss']

  acc = results['train_acc']
  test_acc = results['test_acc']

  epochs = range(len(results['train_loss']))

  plt.plot(epochs, loss, label='train_loss')
  plt.plot(epochs, test_loss, label='test_loss')
  plt.title('Loss')
  plt.legend()

In [None]:
# STEP 5: EXPORT

# save and load as pytorch file (.pt) INFERENCE ONLY
pt_path = 'model.pt'
torch.save(model.state_dict(), pt_path)
model = CNN() # match correct model architecture
model.load_state_dict(torch.load(pt_path, weights_only=True))
model.eval()

CNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (dropout1): Dropout(p=0.25, inplace=False)
  (fc1): Linear(in_features=9216, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)

In [None]:
# load and export as .onnx file INFERENCE ONLY

pt_model = CNN() # match correct model architecture
pt_model.load_state_dict(torch.load(pt_path))
pt_model.eval()
ex_input = (torch.zeros(1, 1, 28, 28)) # match correct input dimenstions
torch.onnx.export(pt_model, ex_input, 'onnx_model.onnx',
                  dtype=torch.float32, dynamo=True)

  pt_model.load_state_dict(torch.load(pt_path))


[torch.onnx] Obtain model graph for `CNN([...]` with `torch.export.export`...
[torch.onnx] Obtain model graph for `CNN([...]` with `torch.export.export`... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅


ONNXProgram(
    model=
        <
            ir_version=9,
            opset_imports={'': 18, 'pkg.onnxscript.torch_lib.common': 1},
            producer_name='pytorch',
            producer_version='2.5.1+cu121',
            domain=None,
            model_version=None,
        >
        graph(
            name=main_graph,
            inputs=(
                %"x"<FLOAT,[1,1,28,28]>
            ),
            outputs=(
                %"addmm_1"<FLOAT,[1,10]>
            ),
            initializers=(
                %"conv1.weight"<FLOAT,[32,1,3,3]>{TorchTensor(...)},
                %"conv1.bias"<FLOAT,[32]>{TorchTensor(...)},
                %"conv2.weight"<FLOAT,[64,32,3,3]>{TorchTensor(...)},
                %"conv2.bias"<FLOAT,[64]>{TorchTensor(...)},
                %"fc1.weight"<FLOAT,[128,9216]>{TorchTensor(...)},
                %"fc1.bias"<FLOAT,[128]>{TorchTensor(...)},
                %"fc2.weight"<FLOAT,[10,128]>{TorchTensor(...)},
                %"fc2.bias"<FLOAT,[10]>{