In [36]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

Backprop follows three rules: 
1) calculate the loss from the inputs (forward pass)
2) calculate the gradients at each node
3) calculate the gradients of the loss with respect to the inputs witht the chain rule(backward pass)

In [31]:
# The simplest of all examples

x = torch.tensor(1.0,requires_grad=True) # inputs
y = torch.tensor(2.0,requires_grad=True) # outputs
w = torch.tensor(1.0,requires_grad=True) # weights

x.grad_zero = True
y.grad_zero = True
w.grad_zero = True

In [32]:
# forward pass: 

yhat = x * w
loss = (y - yhat)**2

# backward pass:

loss.backward()
w.grad

tensor(-2.)

In [35]:
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70]], dtype='float32')


# Targets (apples, oranges)
targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119]], dtype='float32')


# Convert inputs and targets to tensors
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)
print(inputs)
print(targets)



tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


The general equation for linear regression is $y = m \times x + b$ which will be the follwing in vector notation:

$$\hspace{1.5cm} X \hspace{1.1cm} \times \hspace{0.9cm} W^T \hspace{0.8cm}  + \hspace{1cm} b \hspace{1cm}$$
$$\left[ \begin{array}{cc}
73 & 67 & 43 \\
91 & 88 & 64 \\
\vdots & \vdots & \vdots \\
69 & 96 & 70
\end{array} \right]
%
\times
%
\left[ \begin{array}{cc}
w_{11} & w_{21} \\
w_{12} & w_{22} \\
w_{13} & w_{23}
\end{array} \right]
%
+
%
\left[ \begin{array}{cc}
b_{1} & b_{2} \\
b_{1} & b_{2} \\
\vdots & \vdots \\
b_{1} & b_{2} \\
\end{array} \right]$$

In [77]:
# simple regession from scratch

w = torch.rand(2,3)
b = torch.rand(5,2)

def linreg(x):
    return x @ w.T + b


# # forward pass, calculates the difference between the targets and the calculated model values yhat
# yhat = linreg(inputs)
# loss = (yhat - targets)**2

# loss

# # calculate gradients from scratch with chain rule

# # derivatives of the linear model
# dydw = x
# dydb = 1

# # derivative of the loss fuction for the weights and the intercept aka the gradients

# dlossdw = 2*(yhat - targets).T @ inputs
# dlossdb = 2*(yhat - targets).T @ torch.ones(5,3)


## training loop: 
epochs = 25000
lr = 0.00001

for i in range(epochs):
    # forward pass, calculates the difference between the targets and the calculated model values yhat
    yhat = linreg(inputs)
    loss = (yhat - targets)**2

    # calculate gradients from scratch with chain rule
    # derivatives of the linear model
    dydw = x
    dydb = 1

    # derivative of the loss fuction for the weights and the intercept aka the gradients

    dlossdw = 2*(yhat - targets).T @ inputs
    dlossdb = 2*(yhat - targets).T @ torch.ones(5,5)

    # update weights and intercept 
    w -= lr * dlossdw 
    b -= lr * dlossdb.T


print(yhat,y)

tensor([[ 57.3252,  70.3803],
        [ 82.0602, 100.5875],
        [118.6694, 132.9487],
        [ 21.0798,  37.0214],
        [101.9770, 119.1747]]) tensor(2., requires_grad=True)


In [79]:
# lets use more data

# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], [91, 88, 64], [87, 134, 58], [102, 43, 37], [69, 96, 70], [73, 67, 43], [91, 88, 64], [87, 134, 58], [102, 43, 37], [69, 96, 70], [73, 67, 43], [91, 88, 64], [87, 134, 58], [102, 43, 37], [69, 96, 70]], dtype='float32')
# Targets (apples, oranges)
targets = np.array([[56, 70], [81, 101], [119, 133], [22, 37], [103, 119], 
                    [56, 70], [81, 101], [119, 133], [22, 37], [103, 119], 
                    [56, 70], [81, 101], [119, 133], [22, 37], [103, 119]], dtype='float32')

inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

print(inputs.shape, targets.shape)

torch.Size([15, 3]) torch.Size([15, 2])


In [96]:
# simple regession from scratch

w = torch.rand(2,3)
b = torch.rand(15,2)

def linreg(x):
    return x @ w.T + b


## training loop: 
epochs = 10000
lr = 0.000001

for i in range(epochs):
    # forward pass, calculates the difference between the targets and the calculated model values yhat
    yhat = linreg(inputs)
    loss = (yhat - targets)**2

    # calculate gradients from scratch with chain rule
    # derivatives of the linear model
    dydw = x
    dydb = 1

    # derivative of the loss fuction for the weights and the intercept aka the gradients

    dlossdw = 2*(yhat - targets).T @ inputs
    dlossdb = 2*(yhat - targets).T @ torch.ones(15,15)

    # update weights and intercept 
    w -= lr * dlossdw 
    b -= lr * dlossdb.T


print(yhat, targets)

tensor([[ 57.3906,  69.9340],
        [ 82.3561, 100.9502],
        [118.6994, 132.6333],
        [ 21.3155,  36.8726],
        [102.1955, 119.3424],
        [ 56.7626,  70.5834],
        [ 81.6821, 100.2627],
        [118.7080, 133.1086],
        [ 20.7266,  36.9454],
        [101.7264, 119.4273],
        [ 57.3634,  70.1312],
        [ 82.1432, 100.6637],
        [118.6931, 133.1965],
        [ 21.3693,  37.3857],
        [102.1225, 118.8021]]) tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.],
        [ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.],
        [ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [None]:
class ModelRegression(nn.Module):
    def __init__(self):
        super(ModelRegression, self).__init__()
        