# Linear Regression

This notebook shows how to train a linear regression model in PyTorch in two ways:
- from scratch, functions are built manually.
- using PyTorch built-ins function.

## 1. Linear Regression from scratch

In [1]:
import numpy as np
import torch

### 1.1. Data

In [2]:
# inputs
inputs = np.array([[73, 67, 43], 
              [91, 88, 64], 
              [87, 134, 58], 
              [102, 43, 37], 
              [69, 96, 70]], dtype='float32')

# targets
targets = np.array([[56, 70], 
              [81, 101], 
              [119, 133], 
              [22, 37], 
              [103, 119]], dtype='float32')

# convert inputs and targets to tensors
X = torch.from_numpy(inputs)
Y = torch.from_numpy(targets)

### 1.2. Initialize parameters

In [3]:
# get number of samples (m) and of features (n)
m, n = X.shape
print('number of samples: %s' % m)
print('number of features: %s' % n)

# get number of outputs (a)
_, a = Y.shape
print('number of outputs: %s' % a)

number of samples: 5
number of features: 3
number of outputs: 2


In [4]:
# initialize parameters
W = torch.randn(a, n, requires_grad=True)  # weights
b = torch.randn(a, requires_grad=True)  # bias

### 1.3. Define functions

#### 1.3.1. Hypothesis function / Model

In [5]:
def model(X, W, b):
    Y_hat = X @ W.t() + b
    return Y_hat

#### 1.3.2. Cost function / Loss function

In [6]:
def cost_fn(Y_hat, Y):
    diff = Y_hat - Y
    return torch.sum(diff * diff)/diff.numel()

### 1.4. Train the model using gradient descent

In [7]:
epochs = 100  # define number of iteration
lr = 1e-5  # learning rate
for i in range(epochs):
    Y_hat = model(X, W, b)
    cost = cost_fn(Y_hat, Y)
    cost.backward()
    with torch.no_grad():
        W -= W.grad * lr
        b -= b.grad * lr
        W.grad.zero_()
        b.grad.zero_()

  Variable._execution_engine.run_backward(


### 1.5. Predict

In [8]:
x = torch.tensor([[75, 63, 44.]])
y_hat = model(x, W, b)
print(y_hat)

tensor([[55.2381, 68.8495]], grad_fn=<AddBackward0>)


## 2. Linear Regression using PyTorch built-ins

In [9]:
import torch.nn as nn
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
import torch.nn.functional as F

### 2.1 Data

In [10]:
# inputs
inputs = np.array([[73, 67, 43], 
              [91, 88, 64], 
              [87, 134, 58], 
              [102, 43, 37], 
              [69, 96, 70], 
              [74, 66, 43], 
              [91, 87, 65], 
              [88, 134, 59], 
              [101, 44, 37], 
              [68, 96, 71], 
              [73, 66, 44], 
              [92, 87, 64], 
              [87, 135, 57], 
              [103, 43, 36], 
              [68, 97, 70]], dtype='float32')

# targets
targets = np.array([[56, 70], 
              [81, 101], 
              [119, 133], 
              [22, 37], 
              [103, 119],
              [57, 69], 
              [80, 102], 
              [118, 132], 
              [21, 38], 
              [104, 118], 
              [57, 69], 
              [82, 100], 
              [118, 134], 
              [20, 38], 
              [102, 120]], dtype='float32')

# convert to tensors
X = torch.from_numpy(inputs)
Y = torch.from_numpy(targets)

In [11]:
# define dataset
dataset = TensorDataset(X, Y)

In [12]:
# define data loader
batch_size = 5
dataloader = DataLoader(dataset, batch_size, shuffle=True)

In [25]:
for batch in dataloader:
    print(batch)
    print('\n')
    xs, ys = batch
    print(xs)
    print(ys)
    print('\n')
    print(xs.shape)
    print(ys.shape)
    break;

[tensor([[101.,  44.,  37.],
        [ 88., 134.,  59.],
        [ 91.,  88.,  64.],
        [102.,  43.,  37.],
        [ 92.,  87.,  64.]]), tensor([[ 21.,  38.],
        [118., 132.],
        [ 81., 101.],
        [ 22.,  37.],
        [ 82., 100.]])]


tensor([[101.,  44.,  37.],
        [ 88., 134.,  59.],
        [ 91.,  88.,  64.],
        [102.,  43.,  37.],
        [ 92.,  87.,  64.]])
tensor([[ 21.,  38.],
        [118., 132.],
        [ 81., 101.],
        [ 22.,  37.],
        [ 82., 100.]])


torch.Size([5, 3])
torch.Size([5, 2])


### 2.2 Define functions

#### 2.2.1 Hypothesis function / Model

In [26]:
# get number of samples (m) and of features (n)
m, n = X.shape

# get number of outputs
_, a = Y.shape

# define hypothesis function
model = nn.Linear(n, a)

print(model.weight)
print(model.bias)
print(list(model.parameters()))

Parameter containing:
tensor([[-0.0591, -0.0670,  0.0513],
        [ 0.0832,  0.0762,  0.1309]], requires_grad=True)
Parameter containing:
tensor([ 0.2436, -0.5457], requires_grad=True)
[Parameter containing:
tensor([[-0.0591, -0.0670,  0.0513],
        [ 0.0832,  0.0762,  0.1309]], requires_grad=True), Parameter containing:
tensor([ 0.2436, -0.5457], requires_grad=True)]


#### 2.2.2 Cost function / Loss function

In [27]:
cost_fn = F.mse_loss

#### 2.2.3 Optimizer

In [28]:
opt = torch.optim.SGD(model.parameters(), lr=1e-5)

### 2.3 Train the model

In [29]:
def fit(epochs, model, cost_fn, opt, dataloader):
    for epoch in range(epochs):
        for xs, ys in dataloader:
            ys_hat = model(xs)  # predict
            cost = cost_fn(ys_hat, ys)  # compute cost
            cost.backward()  # compute gradients
            opt.step()  # optimize parameters, gradient descent
            opt.zero_grad()  # reset gradients to 0
        if (epoch+1) % 10 == 0:
            print('epoch {}/{}, cost: {:.4f}'.format(epoch+1, epochs, cost.item()))

In [30]:
fit(100, model, cost_fn, opt, dataloader)

epoch 10/100, cost: 345.8451
epoch 20/100, cost: 217.3510
epoch 30/100, cost: 139.1735
epoch 40/100, cost: 8.7400
epoch 50/100, cost: 141.9800
epoch 60/100, cost: 57.5990
epoch 70/100, cost: 59.6549
epoch 80/100, cost: 45.3930
epoch 90/100, cost: 25.8473
epoch 100/100, cost: 23.6790


### 2.4 Predict

In [31]:
x = torch.tensor([[75, 63, 44.]])
y_hat = model(x)
print(y_hat)

tensor([[54.8035, 68.4162]], grad_fn=<AddmmBackward>)
