In [None]:
import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# known params
weight = 0.7
bias = 0.3

start = 0
end = 1
step = 0.02
#inputs
X = torch.arange(start, end, step).unsqueeze(1)
#outputs
y = weight * X + bias

In [None]:
# len(X) == len(y) == 50, splitting data 80/20 yeilds 40/10 samples for training and testing respectively
train_split = 40
X_train, y_train = X[:train_split],y[:train_split]
X_test, y_test = X[train_split:],y[train_split:]

In [None]:
# visualize both training and test data!
def plot_predictions(train_data=X_train,
                    train_labels=y_train,
                    test_data=X_test,
                    test_labels=y_test,
                    predictions=None):
    
    plt.figure(figsize=(10,7))

    # plot training data
    plt.scatter(train_data, train_labels, c='b', s=4, label='Training data')

    # plot testing data
    plt.scatter(test_data, test_labels, c='g', s=4, label='Testing data')

    # are there predictions?
    if predictions is not None:
        plt.scatter(test_data, predictions, c='r', s=4, label='Predictions')

    # legend
    plt.legend(prop={'size':14})

plot_predictions()

In [None]:
# we can now actually build the model to try and predict the green values shown above.
# we will use gradient descent and backpropagation

class LinearRegressionModel(nn.Module): # almost everything from PyTorch inherits from nn.Module
    def __init__(self):
        super().__init__()
        self.weights = nn.Parameter(torch.randn(1,                  # start w radom value and adjust
                                                requires_grad=True, # keep track of grad to update with grad desc
                                                dtype=torch.float)) # pytorch dtype
        
        self.bias = nn.Parameter(torch.randn(1, 
                                             requires_grad=True, 
                                             dtype=torch.float))
        
    def forward(self, x:torch.Tensor) -> torch.Tensor: # x is training data
        return self.weights * x + self.bias # linear regression formula

In [None]:
# create random seed
torch.manual_seed(42)

# create an instance of the model
model_0 = LinearRegressionModel()

# check out params. We want to move these values closer to the actual weight and bias values
model_0.state_dict(), weight, bias

In [None]:
# make predictions with current model
with torch.inference_mode(): 
    y_preds = model_0(X_test)
    
plot_predictions(predictions=y_preds) # very bad :( 

### Steps to building a training loop
0. Loop through the data
1. Forward pass to make predictions on data - aka forward propagation
2. Calculate the loss (compare forward pass predictions to ground truth labels)
3. Optimizer zero grad
4. Loss backward - move backwards throguh the network to calculate the gradients of each parameter wrt the loss - aka backpropagation
5. Optimizer step - use the optimizer to adjust the model parameters to try and improve the loss - aka gradient descent

In [None]:
loss_fn = nn.L1Loss() # MAE / L1

optimizer = torch.optim.SGD(params=model_0.parameters(), # params to optimize
                           lr=0.01) # learning rate

# an epoch is one loop through the data
epochs = 201

# track different values
epoch_count = []
loss_values = []
test_loss_values = []

# step 0
for epoch in range(epochs):
    
    # set the model to training mode
    model_0.train() 
    
    # 1. forward pass
    y_pred = model_0(X_train)
    
    # 2. calculate loss
    loss = loss_fn(y_pred, y_train) # prediction first, truths second
    
    # 3. zero the gradients of the optimizer (they accumulate by default)
    optimizer.zero_grad()
    
    # 4. backpropagation to calculate gradients
    loss.backward() # watch 3b1b backpropagation
    
    # 5. gradient descent
    optimizer.step() 
    
############################################################################################################################
    
    # set the model to testing mode
    model_0.eval()
    
    # turn off gradient tracking (optimization), better vesion of torch.no_grad()
    with torch.inference_mode():
        
        # 1. forward pass
        test_pred = model_0(X_test)
        
        # 2. calculate loss
        test_loss = loss_fn(test_pred, y_test)
    
    # keep track of values
    if epoch % 10 == 0:
        epoch_count.append(epoch)
        loss_values.append(loss)
        test_loss_values.append(test_loss)
        print(f"Epoch: {epoch} | Loss: {loss} | Test Loss: {test_loss}")
        print(f"Current State: {model_0.state_dict()}")
        print()

In [None]:
with torch.inference_mode(): 
    y_preds_new = model_0(X_test)

plot_predictions(predictions=y_preds_new) # better!

In [None]:
loss_values_np = []
for value in range(len(loss_values)):
    loss_values_np.append(loss_values[value].detach().numpy())

In [None]:
# we can also plot the loss curves
plt.plot(epoch_count, loss_values_np, label="Training Loss")
plt.plot(epoch_count, test_loss_values, label="Test Loss")
plt.title("Training and Test Loss Curves")
plt.ylabel("Loss")
plt.xlabel("Epoch")
plt.legend()

In [None]:
# saving the model
from pathlib import Path

# create model directory
MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)

# create model save path 
MODEL_NAME = 'pytorch_tutorial_model_0.pth'
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

# save the state_dict()
print(f"Saving model to: {MODEL_SAVE_PATH}")
torch.save(obj=model_0.state_dict(),
          f=MODEL_SAVE_PATH)

In [None]:
# loading the model
# since the models state_dict() was saved, we can create a new instance of the model and load the saved state_dict()

loaded_model_0 = LinearRegressionModel()
loaded_model_0.load_state_dict(torch.load(f=MODEL_SAVE_PATH))

In [None]:
loaded_model_0.state_dict()

In [None]:
# make some predictions with the loaded model
loaded_model_0.eval()
with torch.inference_mode():
    loaded_model_preds = loaded_model_0(X_test)

loaded_model_preds == test_pred

## Putting it together

In [None]:
# create target weight and bias value along with dummy data
weight = 0.5
bias = 0.8

X = torch.arange(0, 1, 0.01).unsqueeze(1)
y = weight * X + bias

split = int(0.8 * len(X))

X_train = X[:split]
y_train = y[:split]

X_test = X[split:]
y_test = y[split:]

# create a function to plot the data
def plot_predictions(predictions=None):
    
    plt.figure(figsize=(10,7))

    plt.scatter(X_train, y_train, c='b', s=4, label='Training Data')
    plt.scatter(X_test, y_test, c='r', s=4, label='Testing Data')
    
    if predictions is not None:
        plt.scatter(X_test, predictions, c='g', s=4, label='Predictions')
        
    plt.legend()
    
# create the regression class
class LinearRegression(nn.Module):
    
    def __init__(self):
        torch.manual_seed(0)
        super().__init__()
        self.linear_layer = nn.Linear(in_features=1, out_features=1)
        
    def forward(self, x):
        return self.linear_layer(x)
    
# instantiate the model
model_1 = LinearRegression()

# check the starting point
with torch.inference_mode():
    y_pred = model_1(X_test)
plot_predictions(y_pred)

# set up loss function and optimizer for training 
loss_fn = nn.L1Loss()
optimizer = torch.optim.SGD(params=model_1.parameters(), lr=0.01)

# initialize lists for analysis
epoch_count = []
loss_value = []
test_loss_value = []

# training loop
def train(model, epochs):

    for epoch in range(epochs + 1):

        model.train()

        y_pred = model(X_train)
        loss = loss_fn(y_pred, y_train)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        model.eval()

        with torch.inference_mode():

            test_pred = model(X_test)
            test_loss = loss_fn(test_pred, y_test)

        if epoch % 10 == 0:
            print(f"Epoch: {epoch} | Loss: {loss} | Test Loss: {test_loss}")
            print(model.state_dict())
            print()
            
            epoch_count.append(epoch)
            loss_value.append(loss)
            test_loss_value.append(test_loss)

train(model_1, 200)

# re-plot
with torch.inference_mode(): 
    y_new = model_1(X_test)
plot_predictions(y_new)

# plot the cost curve
plt.figure()
loss_value_np = [loss_value[i].detach().numpy() for i in range(len(loss_value))]
plt.plot(epoch_count, loss_value_np, c='y',label='Loss')
plt.plot(epoch_count, test_loss_value, c='b',label='Test Loss')
plt.legend()