In [None]:
import torch
import pandas as pd
import numpy as np
from enum import Enum

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

In [2]:
if torch.cuda.is_available():
    device = "cuda" 
    print("CUDA version:", torch.version.cuda)
    print("GPU:", torch.cuda.get_device_name())
else:
    device = "cpu"
    
device

'cpu'

In [3]:
class NPZChessDataset(Dataset):
    def __init__(self, npz_path):
        self.data = np.load(npz_path, mmap_mode="r")
        self.X = self.data["X"]
        self.y = self.data["y"]

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        X = torch.tensor(self.X[idx], dtype=torch.float32) # float32 for NNs
        y = torch.tensor(self.y[idx], dtype=torch.long) # long for CrossEntropyLoss
        return X, y

In [4]:
BATCH_SIZE = 32
num_workers = 0

train_dataloader = DataLoader(dataset=NPZChessDataset("chess_bitboards_train.npz"), 
                              batch_size=BATCH_SIZE, 
                              num_workers=num_workers,
                              shuffle=True)
val_dataloader = DataLoader(dataset=NPZChessDataset("chess_bitboards_val.npz"), 
                            batch_size=BATCH_SIZE, 
                            num_workers=num_workers,
                            shuffle=False)
test_dataloader = DataLoader(dataset=NPZChessDataset("chess_bitboards_test.npz"), 
                             batch_size=BATCH_SIZE, 
                             num_workers=num_workers,
                             shuffle=False)

In [5]:
"""
If this value increases significantly in future (e.g. >10-20 ms per batch),
that means that CPU/GPU is waiting on the DataLoader.
check Dataset code or consider using num_workers > 0 for parallel loading
"""

import time

loader = DataLoader(NPZChessDataset("chess_bitboards_train.npz"), 
                    batch_size=256, 
                    shuffle=True, 
                    num_workers=0)

start = time.time()
for i, (X, y) in enumerate(loader):
    if i == 100:  # measure 100 batches
        break
print("Avg batch load time:", (time.time() - start) / 100)


Avg batch load time: 0.0008795595169067383


In [14]:
Xb, yb = next(iter(train_dataloader))
print("X batch shape:", Xb.shape, "dtype:", Xb.dtype)
print("y batch shape:", yb.shape, "dtype:", yb.dtype, "classes in batch:", yb.unique().tolist())

X batch shape: torch.Size([32, 768]) dtype: torch.float32
y batch shape: torch.Size([32]) dtype: torch.int64 classes in batch: [2, 3, 4, 5]


In [None]:
class PositionLabel(Enum):
    WHITE_WINNING = 0
    WHITE_DECISIVE = 1
    WHITE_BETTER = 2
    EQUAL = 3
    BLACK_BETTER = 4
    BLACK_DECISIVE = 5
    BLACK_WINNING = 6

In [27]:
class RandomTestModel(nn.Module):
    def __init__(self,
                 input_shape: int,
                 hidden_units: int,
                 output_shape: int) -> None:
        super().__init__()
        self.layer = nn.Sequential(
            nn.Linear(in_features=input_shape,
                      out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units,
                      out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(hidden_units, output_shape)
        )

    def forward(self, x):
        return self.layer(x)

In [32]:
import torchinfo
from torchinfo import summary

model = RandomTestModel(input_shape=768,
                  hidden_units=100,
                  output_shape=7) # 7 classes
summary(model, input_size=(32, 768))

Layer (type:depth-idx)                   Output Shape              Param #
RandomTestModel                          [32, 7]                   --
├─Sequential: 1-1                        [32, 7]                   --
│    └─Linear: 2-1                       [32, 100]                 76,900
│    └─ReLU: 2-2                         [32, 100]                 --
│    └─Linear: 2-3                       [32, 100]                 10,100
│    └─ReLU: 2-4                         [32, 100]                 --
│    └─Linear: 2-5                       [32, 7]                   707
Total params: 87,707
Trainable params: 87,707
Non-trainable params: 0
Total mult-adds (M): 2.81
Input size (MB): 0.10
Forward/backward pass size (MB): 0.05
Params size (MB): 0.35
Estimated Total Size (MB): 0.50

In [None]:
def train_step(model: torch.nn.Module,
               dataloader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               device=device) -> tuple[float, float]:
    """
    Performs one training epoch for the given model.
    Returns the average loss and accuracy across all batches.
    """
    
    # Put model in train mode
    model.train()

    train_loss, train_accuracy = 0, 0

    for batch, (X,y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device) # X and Y are both shape (BATCH_SIZE,)

        # Reset gradients
        optimizer.zero_grad()

        # Forward Pass
        y_pred = model(X)

        # Calculate loss
        loss = loss_fn(y_pred, y)
        train_loss += loss.item()

        # Backpropagation
        loss.backward()

        # Update weights
        optimizer.step()

        # Calculate accuracy metrics
        """softmax and argmax dim=1 because tensor of shape (batchsize, num_classes)"""
        y_pred_class = torch.argmax(y_pred, dim=-1) # y_pred_class.shape = (BATCH_SIZE,)
        train_accuracy += (y_pred_class == y).sum().item()/len(y_pred)

    train_loss = train_loss / len(dataloader)
    train_accuracy = train_accuracy / len(dataloader)

    return train_loss, train_accuracy

In [None]:
def eval_step(model: torch.nn.Module,
              dataloader: torch.utils.data.DataLoader,
              loss_fn: torch.nn.Module,
              device=device):
    """
    Evaluates the given model on the given dataloader without gradient updates.
    Dataloader should either be the validation or test dataloader.
    Returns the average loss and accuracy across all batches.
    """

    
    # Put model in eval mode
    model.eval()

    test_loss, test_accuracy = 0, 0

    with torch.inference_mode():
        for batch, (X,y) in enumerate(dataloader):
            X, y = X.to(device), y.to(device)

            # Forward Pass
            test_pred = model(X)

            # Calculate the loss
            loss = loss_fn(test_pred, y)
            test_loss += loss.item()

            # Calculate accuracy metrics
            test_pred_labels = torch.argmax(test_pred, dim=1)
            test_accuracy += (test_pred_labels == y).sum().item()/len(test_pred_labels)

    test_loss = test_loss / len(dataloader)
    test_accuracy = test_accuracy / len(dataloader)

    return test_loss, test_accuracy

In [None]:
def run_experiment():
    pass