In [1]:
import torch
from torch import nn #nn contains all of PyTorch's building blocks for neural networks
import matplotlib.pyplot as plt

## 1. Data Preparing and Loading

In [2]:
#create known parameters
weight = 0.7
bias = 0.3

#create data
X = torch.arange(0, 1, 0.02).unsqueeze(dim=1)
Y = weight*X + bias

In [3]:
X[:10], Y[:10]

(tensor([[0.0000],
         [0.0200],
         [0.0400],
         [0.0600],
         [0.0800],
         [0.1000],
         [0.1200],
         [0.1400],
         [0.1600],
         [0.1800]]),
 tensor([[0.3000],
         [0.3140],
         [0.3280],
         [0.3420],
         [0.3560],
         [0.3700],
         [0.3840],
         [0.3980],
         [0.4120],
         [0.4260]]))

## Split data into training and test sets


In [4]:
train_split = int(0.8 * len(X)) #80% of data is used for training and the rest 20% for testing
Xtrain = X[:train_split]
Ytrain = Y[:train_split]
Xtest = X[train_split:]
Ytest = Y[train_split:]

In [5]:
len(Xtrain), len(Ytrain), len(Xtest), len(Ytest)

(40, 40, 10, 10)

In [6]:
def plot_predictions(train_data=Xtrain, 
                     train_labels=Ytrain, 
                     test_data=Xtest, 
                     test_labels=Ytest, 
                     predictions=None):
  """
  Plots training data, test data and compares predictions.
  """
  plt.figure(figsize=(10, 7))

  # Plot training data in blue
  plt.scatter(train_data, train_labels, c="b", s=4, label="Training data")
  
  # Plot test data in green
  plt.scatter(test_data, test_labels, c="g", s=4, label="Testing data")

  if predictions is not None:
    # Plot the predictions in red (predictions were made on the test data)
    plt.scatter(test_data, predictions, c="r", s=4, label="Predictions")

  # Show the legend
  plt.legend(prop={"size": 14});

## Building the model

In [7]:
#create a Linear Regression class model

In [8]:
class LinearRegressionModel(nn.Module): #nn.module is the base class for all neural network modules
    def __init__(self):
        super().__init__()
        self.weights = nn.Parameter(torch.randn(1, requires_grad = True, dtype = torch.float64))
        self.bias = nn.Parameter(torch.randn(1, requires_grad=True, dtype = torch.float64))

    #Forward method to define the complutation in a model
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.weights*x + self.bias

nn.Module - contains the larger building blocks (layers)

nn.Parameter - contains the smaller parameters like weights and biases (put these together to make nn.Module(s))

forward() - tells the larger blocks how to make calculations on inputs (tensors full of data) within nn.Module(s)

torch.optim - contains optimization methods on how to improve the parameters within nn.Parameter to better represent input data

## Checking the contents of the PyTorch model

In [9]:
# Set manual seed since nn.Parameter are randomly initialzied
torch.manual_seed(42)

# Create an instance of the model (this is a subclass of nn.Module that contains nn.Parameter(s)
model_0 = LinearRegressionModel()

# Check the nn.Parameter(s) within the nn.Module subclass we created
list(model_0.parameters())

[Parameter containing:
 tensor([0.3367], dtype=torch.float64, requires_grad=True),
 Parameter containing:
 tensor([0.1288], dtype=torch.float64, requires_grad=True)]

In [10]:
#List named parameters
model_0.state_dict()

OrderedDict([('weights', tensor([0.3367], dtype=torch.float64)),
             ('bias', tensor([0.1288], dtype=torch.float64))])

## Making predictions using torch.inference_mode()

In [11]:
with torch.inference_mode():
    y_preds = model_0(Xtest)
y_preds

tensor([[0.3982],
        [0.4049],
        [0.4116],
        [0.4184],
        [0.4251],
        [0.4318],
        [0.4386],
        [0.4453],
        [0.4520],
        [0.4588]], dtype=torch.float64)

In [12]:
Ytest - y_preds

tensor([[0.4618],
        [0.4691],
        [0.4764],
        [0.4836],
        [0.4909],
        [0.4982],
        [0.5054],
        [0.5127],
        [0.5200],
        [0.5272]], dtype=torch.float64)

In [13]:
#create a loss function
loss_fn = nn.L1Loss() #Mean Absolute Error is same as L1 Loss

#create the optimizer
optimizer = torch.optim.SGD(params=model_0.parameters(), lr = 0.01)
#params is the target model parameters you'd like to optimize

## Creating an optimization loop in PyTorch.

it's now time to create a training loop (and testing loop).

The training loop involves the model going through the training data and learning the relationships between the features and labels.

The testing loop involves going through the testing data and evaluating how good the patterns are that the model learned on the training data (the model never see's the testing data during training).

## PyTorch training loop


1	**Forward pass**	-     model(x_train)

The model goes through all of the training data once, performing its forward() function calculations.

2	**Calculate the loss**	 -   loss = loss_fn(y_pred, y_train)

The model's outputs (predictions) are compared to the ground truth and evaluated to see how wrong they are.

3	**Zero gradients**	-  optimizer.zero_grad()

The optimizers gradients are set to zero (they are accumulated by default) so they can be recalculated for the specific training step.

4	**Perform backpropagation on the loss**  -  loss.backward()

Computes the gradient of the loss with respect for every model parameter to be updated (each parameter with requires_grad=True). This is known as backpropagation, hence "backwards".

5	**Update the optimizer (gradient descent)**  -  optimizer.step()

Update the parameters with requires_grad=True with respect to the loss gradients in order to improve them.

In [14]:
torch.manual_seed(42)

epochs = 100 #an epoch is one loop through the data
epoch_count = []
train_loss_values = []
test_loss_values = []
for epoch in range(epochs):
    #set the model to training mode
    model_0.train()

    #forward pass
    y_pred = model_0(Xtrain)

    #calculate the loss
    loss = loss_fn(y_pred, Ytrain) 

    #optimize zero grad
    optimizer.zero_grad()

    #perform backpropagation on the loss with respect to the parameters of the model
    loss.backward()

    #step the optimizer (perform gradient descent)
    optimizer.step()
    
    model_0.eval() #turns off gradient tracking

    with torch.inference_mode():
        test_pred = model_0(Xtest)
        test_loss = loss_fn(test_pred, Ytest.type(torch.float))
        if epoch%10==0:
            epoch_count.append(epoch)
            train_loss_values.append(loss.detach().numpy)
            test_loss_values.append(test_loss.detach().numpy())
            print(f"Epoch: {epoch} | MAE Train Loss: {loss} | MAE Test Loss: {test_loss}")

Epoch: 0 | MAE Train Loss: 0.31288135683755547 | MAE Test Loss: 0.4810651841540759
Epoch: 10 | MAE Train Loss: 0.1976713574739093 | MAE Test Loss: 0.346355184508239
Epoch: 20 | MAE Train Loss: 0.08908721056311557 | MAE Test Loss: 0.21729648486064157
Epoch: 30 | MAE Train Loss: 0.05314849742260115 | MAE Test Loss: 0.14464008519991425
Epoch: 40 | MAE Train Loss: 0.04543793101588776 | MAE Test Loss: 0.11360938544629148
Epoch: 50 | MAE Train Loss: 0.041678606478126046 | MAE Test Loss: 0.09919938569279392
Epoch: 60 | MAE Train Loss: 0.03818929484512264 | MAE Test Loss: 0.08886628595826254
Epoch: 70 | MAE Train Loss: 0.03476085499350197 | MAE Test Loss: 0.08059388621223337
Epoch: 80 | MAE Train Loss: 0.031323806993240874 | MAE Test Loss: 0.07232148646620422
Epoch: 90 | MAE Train Loss: 0.02788736528831538 | MAE Test Loss: 0.06473598671634251


In [15]:
model_0.state_dict()

OrderedDict([('weights', tensor([0.5784], dtype=torch.float64)),
             ('bias', tensor([0.3513], dtype=torch.float64))])

In [16]:
weight, bias

(0.7, 0.3)

In [17]:
# Find our model's learned parameters
print("The model learned the following values for weights and bias:")
print(model_0.state_dict())
print("\nAnd the original values for weights and bias are:")
print(f"weights: {weight}, bias: {bias}")

The model learned the following values for weights and bias:
OrderedDict([('weights', tensor([0.5784], dtype=torch.float64)), ('bias', tensor([0.3513], dtype=torch.float64))])

And the original values for weights and bias are:
weights: 0.7, bias: 0.3


## Saving the model

If you've trained a PyTorch model, chances are you'll want to save it and export it somewhere.

As in, you might train it on Google Colab or your local machine with a GPU but you'd like to now export it to some sort of application where others can use it.

Or maybe you'd like to save your progress on a model and come back and load it back later.

**torch.save** - Saves a serialized object to disk using Python's pickle utility. Models, tensors and various other Python objects like dictionaries can be saved using torch.save.

**torch.load** - Uses pickle's unpickling features to deserialize and load pickled Python object files (like models, tensors or dictionaries) into memory. You can also set which device to load the object to (CPU, GPU etc).

**torch.nn.Module.load_state_dict** - Loads a model's parameter dictionary (model.state_dict()) using a saved state_dict() object.

## Saving a PyTorch model's state_dict()


In [18]:
from pathlib import Path

#create Model Directory
MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, exist_ok = True)

#create model save path
MODEL_NAME = "01_pytorch_workflow_model_0.pth"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

MODEL_SAVE_PATH

WindowsPath('models/01_pytorch_workflow_model_0.pth')

In [19]:
model_0

LinearRegressionModel()

In [20]:
#save the model state dict
torch.save(obj=model_0.state_dict(), f=MODEL_SAVE_PATH)

## Loading a saved PyTorch model's state_dict()


In [21]:
# Instantiate a new instance of our model (this will be instantiated with random weights)
loaded_model_0 = LinearRegressionModel()

# Load the state_dict of our saved model (this will update the new instance of our model with trained weights)
loaded_model_0.load_state_dict(torch.load(f = MODEL_SAVE_PATH))

<All keys matched successfully>

In [22]:
# 1. Put the loaded model into evaluation mode
loaded_model_0.eval()

# 2. Use the inference mode context manager to make predictions
with torch.inference_mode():
    loaded_model_preds = loaded_model_0(Xtest) # perform a forward pass on the test data with the loaded model

In [26]:
test_pred == loaded_model_preds 

tensor([[True],
        [True],
        [True],
        [True],
        [True],
        [True],
        [True],
        [True],
        [True],
        [True]])

In [27]:
loaded_model_0.state_dict()

OrderedDict([('weights', tensor([0.5784], dtype=torch.float64)),
             ('bias', tensor([0.3513], dtype=torch.float64))])