 # Training Notebook for ChessNN



 This notebook trains the `ChessNN` model from `simple_nn_eval.py` using processed data from `lichess_processed.pt`.



 **Key Techniques**:

 - **Data Handling**: Load tensors, split into train/validation (80/20), use `DataLoader` for batching.

 - **Model**: Use `ChessNN` for regression (predict normalized CP scores).

 - **Training**: MSE loss, Adam optimizer, learning rate scheduler, early stopping to prevent overfitting.

 - **Hardware**: GPU support if available.

 - **Logging**: Track loss, save best model.

 - **Assumptions**: Data is pre-processed (FEN -> tensors, normalized labels in [-1, 1]).



 **Requirements**: Run after processing in `process_lichess_evals.ipynb`. Install `torch`, `chess`, `tqdm`.

In [None]:
# Imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, random_split
from tqdm import tqdm
import matplotlib.pyplot as plt
from src.engine.evaluators.simple_nn_eval import ChessNN  # Import the model class

# Configuration
DATA_PATH = '../../data/processed/lichess_eval/lichess_processed.pt'  # From processing step
BATCH_SIZE = 64
EPOCHS = 50
LEARNING_RATE = 0.001
PATIENCE = 5  # For early stopping
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

# Load processed data
data = torch.load(DATA_PATH)
inputs = data['inputs'].to(DEVICE)  # Shape: [num_samples, NUM_PLANES, 8, 8]
labels = data['labels'].to(DEVICE)  # Shape: [num_samples, 1]
print(f"Loaded {len(inputs)} samples. Inputs shape: {inputs.shape}, Labels shape: {labels.shape}")

# Create dataset and split (80% train, 20% validation)
dataset = TensorDataset(inputs, labels)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Initialize model, loss, optimizer
model = ChessNN().to(DEVICE)
criterion = nn.MSELoss()  # Suitable for regression
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3, verbose=True)

# Training function with early stopping
def train_model():
    best_val_loss = float('inf')
    patience_counter = 0
    train_losses, val_losses = [], []
    
    for epoch in range(EPOCHS):
        # Train
        model.train()
        train_loss = 0.0
        for batch_inputs, batch_labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS} - Training"):
            optimizer.zero_grad()
            outputs = model(batch_inputs)
            loss = criterion(outputs, batch_labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        avg_train_loss = train_loss / len(train_loader)
        train_losses.append(avg_train_loss)
        
        # Validate
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for batch_inputs, batch_labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{EPOCHS} - Validation"):
                outputs = model(batch_inputs)
                loss = criterion(outputs, batch_labels)
                val_loss += loss.item()
        avg_val_loss = val_loss / len(val_loader)
        val_losses.append(avg_val_loss)
        
        print(f"Epoch {epoch+1}: Train Loss = {avg_train_loss:.4f}, Val Loss = {avg_val_loss:.4f}")
        scheduler.step(avg_val_loss)
        
        # Early stopping
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            patience_counter = 0
            torch.save(model.state_dict(), 'best_chess_nn.pth')
            print("Saved best model.")
        else:
            patience_counter += 1
            if patience_counter >= PATIENCE:
                print("Early stopping triggered.")
                break
    
    # Plot losses
    plt.figure(figsize=(10, 5))
    plt.plot(train_losses, label='Train Loss')
    plt.plot(val_losses, label='Val Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.title('Training and Validation Loss')
    plt.show()

# Run training
train_model()


Using device: cpu
Loaded 99365 samples. Inputs shape: torch.Size([99365, 28, 8, 8]), Labels shape: torch.Size([99365, 1])


TypeError: ReduceLROnPlateau.__init__() got an unexpected keyword argument 'verbose'

 ### Post-Training Notes

 - **Model Saving**: The best model is saved as `best_chess_nn.pth`. Load it in `simple_nn_eval.py` using `model.load_state_dict(torch.load('best_chess_nn.pth'))`.

 - **Evaluation**: The model predicts normalized scores [-1, 1], which are scaled back in `evaluate()` of `simple_nn_eval.py`.

 - **Improvements**: For larger datasets, consider distributed training or gradient accumulation. Monitor for overfitting via the loss plot.

 - **Testing**: After training, test with a sample board in `simple_nn_eval.py` to verify.