In [110]:
# Import PyTorch and matplotlib
import torch
from torch import nn # nn contains all of PyTorch's building blocks for neural networks
import matplotlib.pyplot as plt
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
# Check PyTorch version
torch.__version__

'2.0.1+cu117'

In [111]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: cuda


In [112]:
# Create weight and bias
weight = 0.7
bias = 0.3

# Create range values
start = 0
end = 1000000
step = 0.02

# Create X and y (features and labels)
X = torch.arange(start, end, step).unsqueeze(dim=1) # without unsqueeze, errors will happen later on (shapes within linear layers)
y = weight * X + bias 
X[:10], y[:10]

(tensor([[0.0000],
         [0.0200],
         [0.0400],
         [0.0600],
         [0.0800],
         [0.1000],
         [0.1200],
         [0.1400],
         [0.1600],
         [0.1800]]),
 tensor([[0.3000],
         [0.3140],
         [0.3280],
         [0.3420],
         [0.3560],
         [0.3700],
         [0.3840],
         [0.3980],
         [0.4120],
         [0.4260]]))

In [113]:
# Split data
train_split = int(0.8 * len(X))
X_train, y_train = X[:train_split], y[:train_split]
X_test, y_test = X[train_split:], y[train_split:]

len(X_train), len(y_train), len(X_test), len(y_test)

(40000000, 40000000, 10000000, 10000000)

In [None]:
def plot_predictions(train_data=X_train, 
                     train_labels=y_train, 
                     test_data=X_test, 
                     test_labels=y_test, 
                     predictions=None):
    plt.figure(figsize=(5, 3.5))
    plt.scatter(train_data, train_labels, c="b", s=4, label="Dados de treinamento")
    plt.scatter(test_data, test_labels, c="g", s=4, label="Dados de teste")
    if predictions is not None:
        # Plot the predictions in red (predictions were made on the test data)
        plt.scatter(test_data, predictions, c="r", s=4, label="Predictions")


    plt.legend(prop={"size":14})

In [None]:
# Note: If you've reset your runtime, this function won't work, 
# you'll have to rerun the cell above where it's instantiated.
plot_predictions(X_train, y_train, X_test, y_test)

In [114]:
# Subclass nn.Module to make our model
class LinearRegressionModelV2(nn.Module):
    def __init__(self):
        super().__init__()
        # Use nn.Linear() for creating the model parameters
        self.linear_layer = nn.Linear(in_features=1, 
                                      out_features=1)
    
    # Define the forward computation (input data x flows through nn.Linear())
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.linear_layer(x)


In [115]:
# Set the manual seed when creating the model (this isn't always need but is used for demonstrative purposes, try commenting it out and seeing what happens)
torch.manual_seed(42)
model_1 = LinearRegressionModelV2()
model_1, model_1.state_dict()

(LinearRegressionModelV2(
   (linear_layer): Linear(in_features=1, out_features=1, bias=True)
 ),
 OrderedDict([('linear_layer.weight', tensor([[0.7645]])),
              ('linear_layer.bias', tensor([0.8300]))]))

In [None]:
# Check model device
next(model_1.parameters()).device

In [None]:
# Set model to GPU if it's availalble, otherwise it'll default to CPU
model_1.to(device) # the device variable was set above to be "cuda" if available or "cpu" if not
next(model_1.parameters()).device

In [None]:
# Create loss function
loss_fn = nn.L1Loss()

# Create optimizer
optimizer = torch.optim.AdamW(params=model_1.parameters(), # optimize newly created model's parameters
                            lr=0.0001)

In [None]:
torch.manual_seed(42)

# Set the number of epochs 
epochs = 1000000 

# Put data on the available device
# Without this, error will happen (not all model/data on device)
X_train = X_train.to(device)
X_test = X_test.to(device)
y_train = y_train.to(device)
y_test = y_test.to(device)

for epoch in range(epochs):
    ### Training
    model_1.train() # train mode is on by default after construction

    # 1. Forward pass
    y_pred = model_1(X_train)

    # 2. Calculate loss
    loss = loss_fn(y_pred, y_train)

    # 3. Zero grad optimizer
    optimizer.zero_grad()

    # 4. Loss backward
    loss.backward()

    # 5. Step the optimizer
    optimizer.step()

    ### Testing
    model_1.eval() # put the model in evaluation mode for testing (inference)
    # 1. Forward pass
    with torch.inference_mode():
        test_pred = model_1(X_test)
    
        # 2. Calculate the loss
        test_loss = loss_fn(test_pred, y_test)

    if epoch % 100 == 0:
        print(f"Epoch: {epoch} | Train loss: {loss} | Test loss: {test_loss}")

In [None]:
# Find our model's learned parameters
from pprint import pprint # pprint = pretty print, see: https://docs.python.org/3/library/pprint.html 
print("The model learned the following values for weights and bias:")
pprint(model_1.state_dict())
print("\nAnd the original values for weights and bias are:")
print(f"weights: {weight}, bias: {bias}")

In [None]:
# Turn model into evaluation mode
model_1.eval()

# Make predictions on the test data
with torch.inference_mode():
    y_preds = model_1(torch.tensor([[10]], device="cuda", dtype=torch.float32).T)
y_preds


In [None]:
# plot_predictions(predictions=y_preds) # -> won't work... data not on CPU

# Put data on the CPU and plot it
plot_predictions(predictions=y_preds.cpu())

### Salvar modelo

In [116]:
from pathlib import Path

MODEL_PATH = Path("../models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)

MODEL_NAME = "my_model_v01.pth"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

In [None]:
print(f"Salvando modelo: {MODEL_SAVE_PATH}")
torch.save(obj=loaded_model_1.state_dict(), f=MODEL_SAVE_PATH)

### Carregando modelo salvo

In [117]:
loaded_model_1 = LinearRegressionModelV2()
loaded_model_1.load_state_dict(torch.load(MODEL_SAVE_PATH))
loaded_model_1.to(device=device)

LinearRegressionModelV2(
  (linear_layer): Linear(in_features=1, out_features=1, bias=True)
)

In [126]:
# Create loss function
loss_fn = nn.L1Loss()

# Create optimizer
optimizer = torch.optim.Adam(params=loaded_model_1.parameters(), # optimize newly created model's parameters
                            lr=0.000001)

In [127]:
torch.manual_seed(42)

# Set the number of epochs 
epochs = 1000000 

# Put data on the available device
# Without this, error will happen (not all model/data on device)
X_train = X_train.to(device)
X_test = X_test.to(device)
y_train = y_train.to(device)
y_test = y_test.to(device)

for epoch in range(epochs):
    ### Training
    loaded_model_1.train() # train mode is on by default after construction

    # 1. Forward pass
    y_pred = loaded_model_1(X_train)

    # 2. Calculate loss
    loss = loss_fn(y_pred, y_train)

    # 3. Zero grad optimizer
    optimizer.zero_grad()

    # 4. Loss backward
    loss.backward()

    # 5. Step the optimizer
    optimizer.step()

    ### Testing
    loaded_model_1.eval() # put the model in evaluation mode for testing (inference)
    # 1. Forward pass
    with torch.inference_mode():
        test_pred = loaded_model_1(X_test)
    
        # 2. Calculate the loss
        test_loss = loss_fn(test_pred, y_test)

    if epoch % 100 == 0:
        print(f"Epoch: {epoch} | Train loss: {loss} | Test loss: {test_loss}")

Epoch: 0 | Train loss: 0.007823426276445389 | Test loss: 0.8980956077575684
Epoch: 100 | Train loss: 0.00782475620508194 | Test loss: 0.040073562413454056
Epoch: 200 | Train loss: 0.017352791503071785 | Test loss: 0.012500000186264515
Epoch: 300 | Train loss: 0.017339637503027916 | Test loss: 0.012500000186264515
Epoch: 400 | Train loss: 0.017326543107628822 | Test loss: 0.012500000186264515
Epoch: 500 | Train loss: 0.017314566299319267 | Test loss: 0.012500000186264515
Epoch: 600 | Train loss: 0.007838912308216095 | Test loss: 0.012500000186264515
Epoch: 700 | Train loss: 0.05403891205787659 | Test loss: 0.04002825543284416
Epoch: 800 | Train loss: 0.017286796122789383 | Test loss: 0.0932701826095581
Epoch: 900 | Train loss: 0.007841438986361027 | Test loss: 0.012500000186264515
Epoch: 1000 | Train loss: 0.007842247374355793 | Test loss: 0.012500000186264515
Epoch: 1100 | Train loss: 0.01724867895245552 | Test loss: 0.012500000186264515
Epoch: 1200 | Train loss: 0.007843988947570324 |

KeyboardInterrupt: 

In [130]:
# Turn model into evaluation mode
loaded_model_1.eval()

# Make predictions on the test data
with torch.inference_mode():
    y_preds = loaded_model_1(torch.tensor([[10]], device="cuda", dtype=torch.float32).T)
y_preds

tensor([[7.2984]], device='cuda:0')