### General training pipeline in PyTorch

1. Design model (# of inputs and outputs, forward pass)
2. Construct loss and optimizer
3. Build training loop
    - forward pass: compute prediction
    - backward pass: gradients 
    - update weights

In [58]:
import torch
import torch.nn as nn

In [59]:
# Function f = 2 * x 

# Create training samples
x = torch.tensor([[1], [2], [3], [4]], dtype = torch.float32)
y = torch.tensor([[2], [4], [6], [8]], dtype = torch.float32)

x_test = torch.tensor([5], dtype = torch.float32)

n_samples, n_features = x.shape

input_size = n_features
output_size = n_features

model = nn.Linear(input_size, output_size)

# model prediction before training
print(f"Prediction before training: f(5) = {model(x_test).item():.3f}")

Prediction before training: f(5) = 0.678


In [60]:
class LinearRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
        # define layers
        self.lin = nn.Linear(input_dim, output_dim)
    
    def forward(self, x):
        return(self.lin(x))
    
model = LinearRegression(input_size, output_size)

In [61]:
# Training
learning_rate = 0.01
n_iters = 100

loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(n_iters):
    # prediction
    y_pred = model(x)
    
    # loss
    l = loss(y, y_pred)
    
    # gradients --> backward pass
    l.backward() # calculate dl/dw and will accumulate the result in w.grad
    
    # update weights
    optimizer.step()
        
    # Zero the gradients
    optimizer.zero_grad()
    
    if epoch % 10 == 0:
        [w, b] = model.parameters() 
        print(f"Epoch {epoch + 1}: w = {w[0][0].item():.3f}, loss = {l:.8f}")

print(f"Prediction after training: f(5) = {model(x_test).item():.3f}")

Epoch 1: w = 0.523, loss = 30.30854416
Epoch 11: w = 1.783, loss = 0.78502476
Epoch 21: w = 1.985, loss = 0.02112963
Epoch 31: w = 2.017, loss = 0.00131824
Epoch 41: w = 2.022, loss = 0.00076075
Epoch 51: w = 2.022, loss = 0.00070403
Epoch 61: w = 2.021, loss = 0.00066272
Epoch 71: w = 2.021, loss = 0.00062415
Epoch 81: w = 2.020, loss = 0.00058782
Epoch 91: w = 2.020, loss = 0.00055361
Prediction after training: f(5) = 10.039


Note that the output is not perfect and is subject to change because the initialization of the model is random and the optimizer technique can be different based on the optimizer you choose and the random seed / learning rate it uses. 

---
In fact, the nn.LinearRegression is the same as below.

In [None]:
class LinearRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
        # define layers
        self.lin = nn.Linear(input_dim, output_dim)
    
    def forward(self, x):
        return(self.lin(x))
    
model = LinearRegression(input_size, output_size)