In [1]:
import torch
from torch import nn
import matplotlib.pyplot as plt

torch.__version__

'2.4.0'

In [2]:
# Setup device agnostic code
if torch.cuda.is_available():
    device = 'cuda'
elif torch.backends.mps.is_available():
    device = 'mps'
else:
    device = 'cpu'

print(f"Using device: {device}")

Using device: mps


# Data

In [3]:
weight = 0.7
bias = 0.3

start = 0
end = 1
step = 0.02

X = torch.arange(start, end, step).unsqueeze(dim=1)
y = weight * X + bias 
X[:10], y[:10]

(tensor([[0.0000],
         [0.0200],
         [0.0400],
         [0.0600],
         [0.0800],
         [0.1000],
         [0.1200],
         [0.1400],
         [0.1600],
         [0.1800]]),
 tensor([[0.3000],
         [0.3140],
         [0.3280],
         [0.3420],
         [0.3560],
         [0.3700],
         [0.3840],
         [0.3980],
         [0.4120],
         [0.4260]]))

In [4]:
torch.manual_seed(42)

idx = torch.randperm(X.size(0))
X = X[idx]
y = y[idx]

train_split = int(0.8 * len(X))
X_train, y_train = X[:train_split], y[:train_split]
X_test, y_test = X[train_split:], y[train_split:]

# Model building

We'll use `nn.Linear(in_features, out_features)` to do it for us.

Where `in_features` is the number of dimensions your input data has and `out_features` is the number of dimensions you'd like it to be output to.

In [5]:
class LinearRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_layer = nn.Linear(1, 1)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.linear_layer(x)
    
model = LinearRegressionModel()
model.state_dict()

OrderedDict([('linear_layer.weight', tensor([[-0.6095]])),
             ('linear_layer.bias', tensor([-0.9899]))])

In [6]:
# Check model device
next(model.parameters()).device

device(type='cpu')

In [7]:
model.to(device)
next(model.parameters()).device

device(type='mps', index=0)

# Model training

In [8]:
# Loss function
loss_fn = nn.MSELoss()

# Optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

In [9]:
epochs = 1000

X_train = X_train.to(device)
y_train = y_train.to(device)
X_test = X_test.to(device)
y_test = y_test.to(device)

for epoch in range(epochs):
    model.train()

    y_pred = model(X_train)
    loss = loss_fn(y_pred, y_train)

    optimizer.zero_grad()
    loss.backward()

    optimizer.step()

    model.eval()
    with torch.inference_mode():
        y_pred_test = model(X_test)
        loss_test = loss_fn(y_pred_test, y_test)

    if epoch % 100 == 0:
        print(f"Epoch {epoch} - Train Loss: {loss.item()} - Test Loss: {loss_test.item()}")

Epoch 0 - Train Loss: 3.7767555713653564 - Test Loss: 4.0589776039123535
Epoch 100 - Train Loss: 0.04283986613154411 - Test Loss: 0.055265165865421295
Epoch 200 - Train Loss: 0.013779409229755402 - Test Loss: 0.013494606129825115
Epoch 300 - Train Loss: 0.010380047373473644 - Test Loss: 0.009498926810920238
Epoch 400 - Train Loss: 0.00791314709931612 - Test Loss: 0.007194265723228455
Epoch 500 - Train Loss: 0.006033158861100674 - Test Loss: 0.0054817101918160915
Epoch 600 - Train Loss: 0.004599820356816053 - Test Loss: 0.004179146606475115
Epoch 700 - Train Loss: 0.0035070031881332397 - Test Loss: 0.0031862519681453705
Epoch 800 - Train Loss: 0.002673821523785591 - Test Loss: 0.002429274609312415
Epoch 900 - Train Loss: 0.0020385857205837965 - Test Loss: 0.0018521398305892944


In [10]:
# Find our model's learned parameters
from pprint import pprint # pprint = pretty print, see: https://docs.python.org/3/library/pprint.html 
print("The model learned the following values for weights and bias:")
pprint(model.state_dict())
print("\nAnd the original values for weights and bias are:")
print(f"weights: {weight}, bias: {bias}")

The model learned the following values for weights and bias:
OrderedDict([('linear_layer.weight', tensor([[0.5648]], device='mps:0')),
             ('linear_layer.bias', tensor([0.3682], device='mps:0'))])

And the original values for weights and bias are:
weights: 0.7, bias: 0.3


  nonzero_finite_vals = torch.masked_select(


# Model inference

In [11]:
model.eval()

with torch.inference_mode():
    y_pred = model(X_test)

y_pred

tensor([[0.8879],
        [0.6280],
        [0.6167],
        [0.7636],
        [0.4473],
        [0.6393],
        [0.9217],
        [0.6958],
        [0.8201],
        [0.4699]], device='mps:0')

**Note:** Many data science libraries such as pandas, matplotlib and NumPy aren't capable of using data that is stored on GPU. So you might run into some issues when trying to use a function from one of these libraries with tensor data not stored on the CPU. To fix this, you can call `.cpu()` on your target tensor to return a copy of your target tensor on the CPU.