# 02 · Linear Regression with PyTorch

In this notebook we train a linear regression model from scratch using the `lsf` library.

Topics covered:
1. Generating synthetic data
2. Defining a `nn.Module` model
3. Training loop (loss → backward → step)
4. Evaluating with MSE, MAE and R²
5. Visualising predictions vs. ground truth

In [None]:
import sys
sys.path.insert(0, '../src')   # allow importing lsf when running from notebooks/

import torch
import torch.nn as nn
import matplotlib.pyplot as plt

from lsf import (
    Config,
    LinearRegression,
    make_regression_data,
    mae,
    mse,
    r2_score,
    set_seed,
    train_one_epoch,
    evaluate,
)

print(f'PyTorch {torch.__version__}')

## 1. Configuration

In [None]:
cfg = Config(
    n_samples=500,
    n_features=1,
    noise=0.3,
    lr=0.05,
    epochs=80,
    batch_size=32,
    seed=42,
)
print(cfg)

## 2. Seed & Device

In [None]:
set_seed(cfg.seed)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('device:', device)

## 3. Data

In [None]:
train_loader, test_loader, true_w, true_b = make_regression_data(
    n_samples=cfg.n_samples,
    n_features=cfg.n_features,
    noise=cfg.noise,
    test_size=cfg.test_size,
    seed=cfg.seed,
)

print(f'true weight: {true_w.squeeze().item():.4f}')
print(f'true bias  : {true_b.item():.4f}')
print(f'train batches: {len(train_loader)}  |  test samples: {cfg.n_test}')

In [None]:
# Visualise training data (works for n_features=1)
X_all, y_all = [], []
for xb, yb in train_loader:
    X_all.append(xb)
    y_all.append(yb)
X_all = torch.cat(X_all).squeeze().numpy()
y_all = torch.cat(y_all).squeeze().numpy()

plt.figure(figsize=(6, 4))
plt.scatter(X_all, y_all, s=10, alpha=0.5, label='train')
plt.xlabel('x'); plt.ylabel('y'); plt.title('Training data')
plt.tight_layout(); plt.show()

## 4. Model, Loss & Optimiser

In [None]:
model     = LinearRegression(in_features=cfg.n_features).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=cfg.lr)

print(model)
total_params = sum(p.numel() for p in model.parameters())
print(f'Total parameters: {total_params}')

## 5. Training Loop

In [None]:
train_losses = []

for epoch in range(1, cfg.epochs + 1):
    loss = train_one_epoch(model, train_loader, criterion, optimizer, device)
    train_losses.append(loss)
    if epoch % 10 == 0:
        print(f'Epoch {epoch:3d}/{cfg.epochs}  loss={loss:.6f}')

print('Training complete.')

In [None]:
plt.figure(figsize=(6, 3))
plt.plot(train_losses)
plt.xlabel('Epoch'); plt.ylabel('MSE Loss')
plt.title('Training loss curve')
plt.tight_layout(); plt.show()

## 6. Evaluation

In [None]:
_, preds, targets = evaluate(model, test_loader, criterion, device)

print(f'Test MSE : {mse(preds, targets):.6f}')
print(f'Test MAE : {mae(preds, targets):.6f}')
print(f'Test R²  : {r2_score(preds, targets):.4f}')

In [None]:
# Parity plot: predictions vs. ground truth
p = preds.squeeze().numpy()
t = targets.squeeze().numpy()

plt.figure(figsize=(5, 5))
plt.scatter(t, p, s=15, alpha=0.6)
lim = [min(t.min(), p.min()) - 0.2, max(t.max(), p.max()) + 0.2]
plt.plot(lim, lim, 'r--', linewidth=1)
plt.xlabel('True'); plt.ylabel('Predicted')
plt.title('Parity plot (test set)')
plt.tight_layout(); plt.show()

## 7. Learned Parameters vs. True Parameters

In [None]:
learned_w = model.linear.weight.detach().cpu().item()
learned_b = model.linear.bias.detach().cpu().item()

print(f'           weight        bias')
print(f'True    :  {true_w.item():.4f}        {true_b.item():.4f}')
print(f'Learned :  {learned_w:.4f}        {learned_b:.4f}')

## Summary

We trained a linear regression model using gradient descent and verified that the learned
parameters converge close to the true data-generating parameters.

Key takeaways:
- `nn.Module` + `nn.Linear` encapsulate model parameters.
- `loss.backward()` computes gradients via autograd.
- `optimizer.step()` updates parameters using those gradients.
- R² close to 1 means the model explains most of the variance.