In [1]:
# -*- coding: utf-8 -*-
import torch
import math

## Construct input and output data
- X: 2000 samples in [-Pi, Pi]
- Y: Sin(x)

In [2]:
# Create Tensors to hold input and outputs.
x = torch.linspace(-math.pi, math.pi, 2000)
x

tensor([-3.1416, -3.1384, -3.1353,  ...,  3.1353,  3.1384,  3.1416])

In [3]:
y = torch.sin(x)
y

tensor([ 8.7423e-08, -3.1430e-03, -6.2863e-03,  ...,  6.2863e-03,
         3.1430e-03, -8.7423e-08])

### Calculate elements as input to a linear model

In [4]:
# For this example, the output y is a linear function of (x, x^2, x^3), so
# we can consider it as a linear layer neural network. Let's prepare the
# tensor (x, x^2, x^3).
p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)

# In the above code, x.unsqueeze(-1) has shape (2000, 1), and p has shape
# (3,), for this case, broadcasting semantics will apply to obtain a tensor
# of shape (2000, 3) 
xx.size()

torch.Size([2000, 3])

In [5]:
xx

tensor([[ -3.1416,   9.8696, -31.0063],
        [ -3.1384,   9.8499, -30.9133],
        [ -3.1353,   9.8301, -30.8205],
        ...,
        [  3.1353,   9.8301,  30.8205],
        [  3.1384,   9.8499,  30.9133],
        [  3.1416,   9.8696,  31.0063]])

### Create a model that can take the elements calculated previously as input

In [6]:
# Use the nn package to define our model as a sequence of layers. nn.Sequential
# is a Module which contains other Modules, and applies them in sequence to
# produce its output. The Linear Module computes output from input using a
# linear function, and holds internal Tensors for its weight and bias.
# The Flatten layer flatens the output of the linear layer to a 1D tensor,
# to match the shape of `y`.
model = torch.nn.Sequential(
    torch.nn.Linear(3, 1),
    torch.nn.Flatten(0, 1)
)

### Choose a loss function

In [7]:
# The nn package also contains definitions of popular loss functions; in this
# case we will use Mean Squared Error (MSE) as our loss function.
loss_fn = torch.nn.MSELoss(reduction='sum')

### Train the model
- Simple manual gradient descent

In [32]:
learning_rate = 1e-3
for t in range(2000):

    # Forward pass: compute predicted y by passing x to the model. Module objects
    # override the __call__ operator so you can call them like functions. When
    # doing so you pass a Tensor of input data to the Module and it produces
    # a Tensor of output data.
    y_pred = model(xx)

    # Compute and print loss. We pass Tensors containing the predicted and true
    # values of y, and the loss function returns a Tensor containing the
    # loss.
    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    # Zero the gradients before running the backward pass.
    model.zero_grad()

    # Backward pass: compute gradient of the loss with respect to all the learnable
    # parameters of the model. Internally, the parameters of each Module are stored
    # in Tensors with requires_grad=True, so this call will compute gradients for
    # all learnable parameters in the model.
    loss.backward()

    # Update the weights using gradient descent. Each parameter is a Tensor, so
    # we can access its gradients like we did before.
    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad

99 8.817169189453125
199 8.817169189453125
299 8.817169189453125
399 8.817169189453125
499 8.817168235778809
599 8.817168235778809
699 8.817168235778809
799 8.817167282104492
899 8.817167282104492
999 8.817168235778809
1099 8.817168235778809
1199 8.817167282104492
1299 8.817168235778809
1399 8.817168235778809
1499 8.817168235778809
1599 8.817168235778809
1699 8.817168235778809
1799 8.817168235778809
1899 8.817168235778809
1999 8.817168235778809


### Train using Optimizer
- https://pytorch.org/docs/stable/optim.html#algorithms

In [9]:
# Use the optim package to define an Optimizer that will update the weights of
# the model for us. Here we will use RMSprop; the optim package contains many other
# optimization algorithms. The first argument to the RMSprop constructor tells the
# optimizer which Tensors it should update.
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)
for t in range(2000):

    # Forward pass: compute predicted y by passing x to the model. Module objects
    # override the __call__ operator so you can call them like functions. When
    # doing so you pass a Tensor of input data to the Module and it produces
    # a Tensor of output data.
    y_pred = model(xx)

    # Compute and print loss. We pass Tensors containing the predicted and true
    # values of y, and the loss function returns a Tensor containing the
    # loss.
    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    # Zero the gradients before running the backward pass.
    model.zero_grad()

    # Backward pass: compute gradient of the loss with respect to all the learnable
    # parameters of the model. Internally, the parameters of each Module are stored
    # in Tensors with requires_grad=True, so this call will compute gradients for
    # all learnable parameters in the model.
    loss.backward()

    # Calling the step function on an Optimizer makes an update to its
    # parameters
    optimizer.step()

99 7088.3662109375
199 2921.380859375
299 1942.6575927734375
399 1689.4520263671875
499 1478.5975341796875
599 1262.191162109375
699 1056.8172607421875
799 870.1187744140625
899 703.888916015625
999 557.6370849609375
1099 430.8931579589844
1199 322.7921447753906
1299 232.63229370117188
1399 159.7252655029297
1499 103.25030517578125
1599 61.99900436401367
1699 34.46175003051758
1799 18.523012161254883
1899 11.321237564086914
1999 9.233871459960938


In [9]:
# You can access the first layer of `model` like accessing the first item of a list
linear_layer = model[0]

# For linear layer, its parameters are stored as `weight` and `bias`.
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

Result: y = 0.018982043489813805 + 0.8470728993415833 x + -0.003274718066677451 x^2 + -0.09195520728826523 x^3


In [9]:
x1= torch.tensor([1.0, 2.0])
xx1=x1.unsqueeze(-1).pow(p)

In [13]:
y1 = model(xx1)

In [14]:
y1

tensor([-0.0832, -3.0546], grad_fn=<ReshapeAliasBackward0>)

### Custom _nn_ Modules

In [10]:
class Polynomial3(torch.nn.Module):
    def __init__(self):
        """
        In the constructor we instantiate four parameters and assign them as
        member parameters.
        """
        super().__init__()
        self.a = torch.nn.Parameter(torch.randn(()))
        self.b = torch.nn.Parameter(torch.randn(()))
        self.c = torch.nn.Parameter(torch.randn(()))
        self.d = torch.nn.Parameter(torch.randn(()))

    def forward(self, x):
        """
        In the forward function we accept a Tensor of input data and we must return
        a Tensor of output data. We can use Modules defined in the constructor as
        well as arbitrary operators on Tensors.
        """
        return self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3

    def string(self):
        """
        Just like any class in Python, you can also define custom method on PyTorch modules
        """
        return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3'


In [11]:
# Construct our model by instantiating the class defined above
model = Polynomial3()

# Construct our loss function and an Optimizer. The call to model.parameters()
# in the SGD constructor will contain the learnable parameters (defined 
# with torch.nn.Parameter) which are members of the model.
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-6)
for t in range(2000):
    # Forward pass: Compute predicted y by passing x to the model
    y_pred = model(x)

    # Compute and print loss
    loss = criterion(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(f'Result: {model.string()}')

99 452.2678527832031
199 317.92047119140625
299 224.44505310058594
399 159.35040283203125
499 113.9825439453125
599 82.3386001586914
699 60.25032424926758
799 44.82081985473633
899 34.035308837890625
999 26.490928649902344
1099 21.210403442382812
1199 17.512117385864258
1299 14.920485496520996
1399 13.103334426879883
1499 11.828554153442383
1599 10.933823585510254
1699 10.305523872375488
1799 9.864119529724121
1899 9.553893089294434
1999 9.33576774597168
Result: y = -0.02317672409117222 + 0.8506575226783752 x + 0.0039983708411455154 x^2 + -0.09246508777141571 x^3


## Exercise
- Data: https://www.kaggle.com/code/gantalaswetha/usa-housing-dataset-linear-regression/notebook
- X: Any columns except _Price_
- Y: _Price_