# Episode 124. Early Stopping (ver. 2024)

Early stopping is a technique that helps prevent overfitting and optimize model performance by monitoring validation loss during training. We can avoid unnecessary iterations and save computational resources by stopping the training process when the validation loss starts increasing. Additionally, we will explore how to save and load PyTorch networks, allowing us to store trained models and reuse them for predictions or further training.

In [None]:
# Check Google CoLab is running and maps Google Drive if needed. We also initialize the PyTorch device to either GPU/MPS (if available) or CPU.
import torch

try:
    import google.colab

    COLAB = True
    print("Note: using Google CoLab")
except:
    print("Note: not using Google CoLab")
    COLAB = False

# Make use of a GPU or MPS (Apple) if one is available.  (see module 3.2)
import torch
has_mps = torch.backends.mps.is_built()
device = "mps" if has_mps else "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Note: using Google CoLab
Using device: cpu


## Early Stopping

It can be difficult to determine how many epochs to cycle through to train a neural network. Overfitting will occur if you train the neural network for too many epochs, and the neural network will not perform well on new data, despite attaining a good accuracy on the training set. Overfitting occurs when a neural network is trained to the point that it begins to memorize rather than generalize.

It is important to segment the original dataset into several datasets:

- **Training Set**
- **Validation Set**
- **Holdout Set**


PyTorch does not include a built-in early stopping function. Here we define one.

We need to provide several parameters to the **EarlyStopping** object:

- **min_delta** This value should be kept small; it specifies the minimum change that should be considered an improvement. Setting it even smaller will not likely have a great deal of impact.
- **patience** How long should the training wait for the validation error to improve?
- **restore_best_weights** You should usually set this to true, as it restores the weights to the values they were at when the validation set is the highest.

In [None]:
import copy


class EarlyStopping:
    def __init__(self, patience=5, min_delta=0, restore_best_weights=True):
        self.patience = patience
        self.min_delta = min_delta
        self.restore_best_weights = restore_best_weights
        self.best_model = None
        self.best_loss = None
        self.counter = 0
        self.status = ""

    def __call__(self, model, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
            self.best_model = copy.deepcopy(model.state_dict())
        elif self.best_loss - val_loss >= self.min_delta:
            self.best_model = copy.deepcopy(model.state_dict())
            self.best_loss = val_loss
            self.counter = 0
            self.status = f"Improvement found, counter reset to {self.counter}"
        else:
            self.counter += 1
            self.status = f"No improvement in the last {self.counter} epochs"
            if self.counter >= self.patience:
                self.status = f"Early stopping triggered after {self.counter} epochs."
                if self.restore_best_weights:
                    model.load_state_dict(self.best_model)
                return True
        return False

### Example 1: Early Stopping with Classification

In [None]:
import time

import numpy as np
import pandas as pd
import torch
import tqdm
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from torch import nn
from torch.autograd import Variable
from torch.utils.data import DataLoader, TensorDataset

# Set random seed for reproducibility
np.random.seed(42)
torch.manual_seed(42)

def load_data():
    df = pd.read_csv(
        "https://raw.githubusercontent.com/yunssamfinance/DeepLearningInFinance/main/Default_Fin.csv", na_values=["NA", "?"]
    )

    le = LabelEncoder()

    x = df[["Employed", "Bank Balance", "Annual Salary"]].values
    y = le.fit_transform(df["Defaulted?"])
    defaulted = le.classes_

    # Split into validation and training sets
    x_train, x_test, y_train, y_test = train_test_split(
        x, y, test_size=0.25, random_state=42
    )

    scaler = StandardScaler()
    x_train = scaler.fit_transform(x_train)
    x_test = scaler.transform(x_test)

    # Numpy to Torch Tensor
    x_train = torch.tensor(x_train, device=device, dtype=torch.float32)
    y_train = torch.tensor(y_train, device=device, dtype=torch.long)

    x_test = torch.tensor(x_test, device=device, dtype=torch.float32)
    y_test = torch.tensor(y_test, device=device, dtype=torch.long)

    return x_train, x_test, y_train, y_test, defaulted


x_train, x_test, y_train, y_test, defaulted = load_data()

# Create datasets
BATCH_SIZE = 16

dataset_train = TensorDataset(x_train, y_train)
dataloader_train = DataLoader(
    dataset_train, batch_size=BATCH_SIZE, shuffle=True)

dataset_test = TensorDataset(x_test, y_test)
dataloader_test = DataLoader(dataset_test, batch_size=BATCH_SIZE, shuffle=True)

# Create model using nn.Sequential
model = nn.Sequential(
    nn.Linear(x_train.shape[1], 50),
    nn.ReLU(),
    nn.Linear(50, 25),
    nn.ReLU(),
    nn.Linear(25, len(defaulted)),
    nn.LogSoftmax(dim=1),
)

model = torch.compile(model,backend="aot_eager").to(device)

loss_fn = nn.CrossEntropyLoss()  # cross entropy loss

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
es = EarlyStopping()

epoch = 0
done = False
while epoch < 1000 and not done:
    epoch += 1
    steps = list(enumerate(dataloader_train))
    pbar = tqdm.tqdm(steps)
    model.train()
    for i, (x_batch, y_batch) in pbar:
        y_batch_pred = model(x_batch.to(device))
        loss = loss_fn(y_batch_pred, y_batch.to(device))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        loss, current = loss.item(), (i + 1) * len(x_batch)
        if i == len(steps) - 1:
            model.eval()
            pred = model(x_test)
            vloss = loss_fn(pred, y_test)
            if es(model, vloss):
                done = True
            pbar.set_description(
                f"Epoch: {epoch}, tloss: {loss}, vloss: {vloss:>7f}, {es.status}"
            )
        else:
            pbar.set_description(f"Epoch: {epoch}, tloss {loss:}")

Epoch: 1, tloss: 0.07578587532043457, vloss: 0.084684, : 100%|██████████| 469/469 [00:18<00:00, 25.13it/s]
Epoch: 2, tloss: 0.4055657386779785, vloss: 0.084303, Improvement found, counter reset to 0: 100%|██████████| 469/469 [00:06<00:00, 77.10it/s] 
Epoch: 3, tloss: 0.07186264544725418, vloss: 0.086865, No improvement in the last 1 epochs: 100%|██████████| 469/469 [00:03<00:00, 117.81it/s]
Epoch: 4, tloss: 0.0284480731934309, vloss: 0.083243, Improvement found, counter reset to 0: 100%|██████████| 469/469 [00:04<00:00, 101.33it/s]
Epoch: 5, tloss: 0.009286901913583279, vloss: 0.087126, No improvement in the last 1 epochs: 100%|██████████| 469/469 [00:04<00:00, 110.33it/s]
Epoch: 6, tloss: 0.254237562417984, vloss: 0.080455, Improvement found, counter reset to 0: 100%|██████████| 469/469 [00:03<00:00, 123.72it/s]
Epoch: 7, tloss: 0.30666109919548035, vloss: 0.081701, No improvement in the last 1 epochs: 100%|██████████| 469/469 [00:03<00:00, 121.01it/s]
Epoch: 8, tloss: 0.1993370801210

We did not use the total number of requested epochs. The neural network training stopped once the validation set no longer improved.

In [None]:
pred = model(x_test)
vloss = loss_fn(pred, y_test)
print(f"Loss = {vloss}")

Loss = 0.08045466244220734


In [None]:
from sklearn.metrics import accuracy_score

pred = model(x_test)
_, predict_classes = torch.max(pred, 1)
correct = accuracy_score(y_test.cpu(), predict_classes.cpu())
print(f"Accuracy: {correct}")

Accuracy: 0.974


### Example 2: Early Stopping with Regression

In [None]:
import time

import numpy as np
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import tqdm
from sklearn import preprocessing
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from torch.autograd import Variable
from torch.utils.data import DataLoader, TensorDataset

# Read the MPG dataset.
df = pd.read_csv(
    "https://raw.githubusercontent.com/yunssamfinance/DeepLearningInFinance/main/boston.csv", na_values=["NA", "?"]
)

# Pandas to Numpy
x = df[
    [
        "CRIM",
        "ZN",
        "INDUS",
        "CHAS",
        "NOX",
        "RM",
        "AGE",
        "DIS",
        "RAD",
        "TAX",
        "PTRATIO",
        "B",
        "LSTAT"
    ]
].values
y = df["MEDV"].values  # regression

# Split into validation and training sets
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.25, random_state=42
)

# Numpy to Torch Tensor
x_train = torch.tensor(x_train, device=device, dtype=torch.float32)
y_train = torch.tensor(y_train, device=device, dtype=torch.float32)

x_test = torch.tensor(x_test, device=device, dtype=torch.float32)
y_test = torch.tensor(y_test, device=device, dtype=torch.float32)


# Create datasets
BATCH_SIZE = 16

dataset_train = TensorDataset(x_train, y_train)
dataloader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True)

dataset_test = TensorDataset(x_test, y_test)
dataloader_test = DataLoader(dataset_test, batch_size=BATCH_SIZE, shuffle=True)


# Create model

model = nn.Sequential(
    nn.Linear(x_train.shape[1], 50),
    nn.ReLU(),
    nn.Linear(50, 25),
    nn.ReLU(),
    nn.Linear(25, 1)
)

model = torch.compile(model, backend="aot_eager").to(device)

# Define the loss function for regression
loss_fn = nn.MSELoss()

# Define the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

es = EarlyStopping()

epoch = 0
done = False
while epoch < 1000 and not done:
    epoch += 1
    steps = list(enumerate(dataloader_train))
    pbar = tqdm.tqdm(steps)
    model.train()
    for i, (x_batch, y_batch) in pbar:
        y_batch_pred = model(x_batch).flatten()  #
        loss = loss_fn(y_batch_pred, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        loss, current = loss.item(), (i + 1) * len(x_batch)
        if i == len(steps) - 1:
            model.eval()
            pred = model(x_test).flatten()
            vloss = loss_fn(pred, y_test)
            if es(model, vloss):
                done = True
            pbar.set_description(
                f"Epoch: {epoch}, tloss: {loss}, vloss: {vloss:>7f}, EStop:[{es.status}]"
            )
        else:
            pbar.set_description(f"Epoch: {epoch}, tloss {loss:}")

Epoch: 1, tloss: 40.7878532409668, vloss: 58.127304, EStop:[]: 100%|██████████| 24/24 [00:00<00:00, 25.56it/s]
Epoch: 2, tloss: 64.16509246826172, vloss: 51.466068, EStop:[Improvement found, counter reset to 0]: 100%|██████████| 24/24 [00:00<00:00, 125.68it/s]
Epoch: 3, tloss: 48.284423828125, vloss: 48.127918, EStop:[Improvement found, counter reset to 0]: 100%|██████████| 24/24 [00:00<00:00, 126.43it/s]
Epoch: 4, tloss: 27.726341247558594, vloss: 37.862492, EStop:[Improvement found, counter reset to 0]: 100%|██████████| 24/24 [00:00<00:00, 120.76it/s]
Epoch: 5, tloss: 48.02397155761719, vloss: 54.010014, EStop:[No improvement in the last 1 epochs]: 100%|██████████| 24/24 [00:00<00:00, 112.73it/s]
Epoch: 6, tloss: 34.65900802612305, vloss: 38.356411, EStop:[No improvement in the last 2 epochs]: 100%|██████████| 24/24 [00:00<00:00, 129.67it/s]
Epoch: 7, tloss: 100.47348022460938, vloss: 31.589128, EStop:[Improvement found, counter reset to 0]: 100%|██████████| 24/24 [00:00<00:00, 122.0

Evaluate the error.

In [None]:
from sklearn import metrics

# Measure RMSE error.  RMSE is common for regression.
pred = model(x_test)
score = torch.sqrt(torch.nn.functional.mse_loss(pred.flatten(), y_test))
print(f"Final score (RMSE): {score}")

Final score (RMSE): 4.546853542327881


## Saving and Loading a PyTorch Neural Network

Complex neural networks will take a long time to fit/train. It is helpful to be able to save these neural networks so that you can reload them later. A reloaded neural network will not require retraining. PyTorch usually saves neural networks as [pickle](https://wiki.python.org/moin/UsingPickle) files. The following code trains a neural network to predict Boston housing price and saves the model.

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset

# For reproducibility
torch.manual_seed(0)
np.random.seed(0)

# Read the MPG dataset.
df = pd.read_csv(
    "https://raw.githubusercontent.com/yunssamfinance/DeepLearningInFinance/main/boston.csv", na_values=["NA", "?"]
)


# Select features and target
features = df[
    [
        "CRIM",
        "ZN",
        "INDUS",
        "CHAS",
        "NOX",
        "RM",
        "AGE",
        "DIS",
        "RAD",
        "TAX",
        "PTRATIO",
        "B",
        "LSTAT"
    ]
]
target = df["MEDV"]  # regression


# Normalize the features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Convert Numpy to PyTorch tensors
features_tensor = torch.tensor(
    scaled_features, device=device, dtype=torch.float32)
target_tensor = torch.tensor(target.values, device=device, dtype=torch.float32)

# Convert to TensorDataset
dataset = TensorDataset(features_tensor, target_tensor)

# Convert to DataLoader
data_loader = DataLoader(dataset, batch_size=32)

# Define the neural network using nn.Sequential
model = nn.Sequential(
    nn.Linear(features_tensor.shape[1], 50),
    nn.ReLU(),
    nn.Linear(50, 25),
    nn.ReLU(),
    nn.Linear(25, 1),
).to(device)

# Define the loss function for regression
loss_fn = nn.MSELoss()

# Define the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Train for 1000 epochs.
model.train()
for epoch in range(1000):
    for batch_features, batch_target in data_loader:
        optimizer.zero_grad()
        out = model(batch_features).flatten()
        loss = loss_fn(out, batch_target)
        loss.backward()
        optimizer.step()

    # Display status every 100 epochs.
    if epoch % 100 == 0:
        print(f"Epoch {epoch}, loss: {loss.item()}")

model.eval()
pred = model(features_tensor)

# Measure RMSE error.  RMSE is common for regression.
score = torch.sqrt(torch.nn.functional.mse_loss(pred.flatten(), target_tensor))
print(f"Before save score (RMSE): {score}")
torch.save(model, "medv.pkl")

Epoch 0, loss: 216.08139038085938
Epoch 100, loss: 8.785181999206543
Epoch 200, loss: 4.241296291351318
Epoch 300, loss: 3.1135926246643066
Epoch 400, loss: 3.286428689956665
Epoch 500, loss: 3.1825332641601562
Epoch 600, loss: 1.73397696018219
Epoch 700, loss: 1.6065764427185059
Epoch 800, loss: 1.547377347946167
Epoch 900, loss: 1.5836694240570068
Before save score (RMSE): 1.085649847984314


The code below sets up a neural network and reads the data (for predictions), but it does not clear the model directory or fit the neural network. The code loads the weights from the previous fit. Now we reload the network and perform another prediction. The RMSE should match the previous one exactly if we saved and reloaded the neural network correctly.


In [None]:
# Measure RMSE error for loaded network.  RMSE is common for regression.
model.eval()
pred = model(features_tensor)
score = torch.sqrt(torch.nn.functional.mse_loss(pred.flatten(), target_tensor))
print(f"Before save score (RMSE): {score}")
torch.save(model, "medv.pkl")

Before save score (RMSE): 1.085649847984314
