Pipeline
1. design the model (input, output, forward pass)
2. Construct the loss and optimizer 
3. Training loop   
    3.1 forward pass (compute prediction)  
    3.2 compute gradients (backwad pass)  
    3.3 update the weights  



In [None]:
"""

    - optimizer from torch.nn.optim
    - losses from torch.nn 
    - Activation functions from torch.nn.functional 


        nn.MSELoss()
        torch.optim.SGD()
    METHODS: 
        loss.backward()
        optimizer.step()    
        optimier.zero_grad() : set the gradients to zero 
"""

In [1]:
import torch
from torch import nn 

In [2]:
X = torch.tensor([1.0, 2.0, 3.0, 4.0], dtype=torch.float32)
Y = torch.tensor([2.0, 4.0, 6.0, 8.0], dtype=torch.float32)

W = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)


# return Y_pred
def forward(X):
    return W * X; 


epochs = 200
lr = 0.01
loss = nn.MSELoss() # this returns a callable function 

optimizer = torch.optim.SGD([W], lr=lr) # for updating the weighs


# training loops
for epoch in range(epochs) : 

    # predict
    y_pred = forward(X)

    # calculate loss
    l = loss(Y,y_pred)

    # gradient bacward pass
    l.backward() # puts in w.grads the dl/dw

    # calculate the gradients 
    optimizer.step()

    # now empty the gradients 
    # W.grad.zero_()
    optimizer.zero_grad()

    print(f'epoch:{epoch+1}, loss:{l}, w:{W:.3f}')



print(f'prediction after training: {forward(5):.3f}' )

epoch:1, loss:30.0, w:0.300
epoch:2, loss:21.674999237060547, w:0.555
epoch:3, loss:15.660187721252441, w:0.772
epoch:4, loss:11.314486503601074, w:0.956
epoch:5, loss:8.17471694946289, w:1.113
epoch:6, loss:5.9062323570251465, w:1.246
epoch:7, loss:4.2672529220581055, w:1.359
epoch:8, loss:3.083089828491211, w:1.455
epoch:9, loss:2.227532148361206, w:1.537
epoch:10, loss:1.609391689300537, w:1.606
epoch:11, loss:1.1627856492996216, w:1.665
epoch:12, loss:0.8401124477386475, w:1.716
epoch:13, loss:0.6069811582565308, w:1.758
epoch:14, loss:0.4385439455509186, w:1.794
epoch:15, loss:0.3168478012084961, w:1.825
epoch:16, loss:0.22892260551452637, w:1.851
epoch:17, loss:0.1653965264558792, w:1.874
epoch:18, loss:0.11949898302555084, w:1.893
epoch:19, loss:0.08633805811405182, w:1.909
epoch:20, loss:0.0623791441321373, w:1.922
epoch:21, loss:0.0450688973069191, w:1.934
epoch:22, loss:0.03256231173872948, w:1.944
epoch:23, loss:0.02352631464600563, w:1.952
epoch:24, loss:0.01699772477149963

### Now let's make it with a model 
few things change:   
- we  need to define the input and output shapes  
- we need to convert teh X array to be a 2d array   
- when we predict, we need to pass a tensor as input   

In [12]:
# training examples 
X = torch.tensor([[1], [2], [3], [4]], dtype=torch.float32)
Y = torch.tensor([[2], [4], [6], [8]], dtype=torch.float32)

# X = torch.tensor([1,2,3,4], dtype=torch.float32)
# Y = torch.tensor([2,4,6,8], dtype=torch.float32)

x_test = torch.tensor([5], dtype=torch.float32)

# we won't need this 
# W = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)



n_samples , n_features = X.shape
print('X.shape ', X.shape)
print('n samples :', n_samples)
print('n_features: ', n_features)

input_size = n_features
output_size = n_features

# create a linear model 
model = nn.Linear(
    input_size, # 1
    output_size #1
)


epochs = 200
lr = 0.01
loss = nn.MSELoss() # this is a callable function 
optimizer = torch.optim.SGD(model.parameters(), lr=lr) # for updating the weighs


print('prediction before training: ', model(x_test).item())
# training loops
for epoch in range(epochs) : 

    # predict
    y_pred = model(X)

    # calculate loss
    l = loss(Y,y_pred)

    # gradient bacward pass
    l.backward() # puts in w.grads the dl/dw

    # calculate the gradients 
    optimizer.step()

    # now empty the gradients 
    # W.grad.zero_()
    optimizer.zero_grad()

    print(f'epoch:{epoch+1}, loss:{l}, w:{W:.3f}')



print(f'prediction after training: {model(x_test).item():3f}' )

X.shape  torch.Size([4, 1])
n samples : 4
n_features:  1
prediction before training:  -0.31068968772888184
epoch:1, loss:30.16092300415039, w:2.000
epoch:2, loss:20.982641220092773, w:2.000
epoch:3, loss:14.613698959350586, w:2.000
epoch:4, loss:10.194101333618164, w:2.000
epoch:5, loss:7.127113342285156, w:2.000
epoch:6, loss:4.998673439025879, w:2.000
epoch:7, loss:3.5214760303497314, w:2.000
epoch:8, loss:2.4961628913879395, w:2.000
epoch:9, loss:1.7844040393829346, w:2.000
epoch:10, loss:1.2902166843414307, w:2.000
epoch:11, loss:0.9469995498657227, w:2.000
epoch:12, loss:0.7085397243499756, w:2.000
epoch:13, loss:0.5427693128585815, w:2.000
epoch:14, loss:0.42743945121765137, w:2.000
epoch:15, loss:0.3471103012561798, w:2.000
epoch:16, loss:0.29106947779655457, w:2.000
epoch:17, loss:0.2518841326236725, w:2.000
epoch:18, loss:0.2243955135345459, w:2.000
epoch:19, loss:0.20502489805221558, w:2.000
epoch:20, loss:0.1912895143032074, w:2.000
epoch:21, loss:0.18146544694900513, w:2.00

### Using classes 

In [14]:
class LinearRegression(nn.Module): 

    def __init__(self, input_dim, output_dim): 
        super(LinearRegression, self).__init__()

        # define the layer
        self.linear = nn.Linear(
            input_dim, 
            output_dim
        )


    def forward(self, X): 
        return self.linear(X)
    


input_size = 1
output_size = 1
model = LinearRegression(input_size, output_size)