## NumPy example

In [1]:
import numpy as np
import math

First we will create random input and output data

In [2]:
x = np.linspace(-math.pi, math.pi, 2000)
y = np.sin(x)

Next we will randomly initialize the weights

In [3]:
a = np.random.randn()
b = np.random.randn()
c = np.random.randn()
d = np.random.randn()

In [4]:
learning_rate = 1e-6
for t in range(2000):
    # forward pass, compute the predicted y
    # y = a + b x + c x^2 + d x^3
    y_pred = a + (b * x) + (c * x**2) + (d * x**3)

    # compute and print loss
    loss = np.square(y_pred - y).sum()
    if t % 100 == 99:
        print(t, loss)

    # backprop to compute gradients of a, b, c, d w.r.t loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x**2).sum()
    grad_d = (grad_y_pred * x**3).sum()

    # update the weights
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c 
    d -= learning_rate * grad_d

print(f'Result: y = {a} + {b} x + {c} x^2 + {d} x^3')

99 1214.3046676345257
199 849.7868867068299
299 595.9331464666743
399 419.002418144746
499 295.5885873057413
599 209.4391905945169
699 149.2587339025383
799 107.18988591315106
899 77.7622848048099
999 57.16426005119314
1099 42.73778086765044
1199 32.627876845590656
1299 25.539060355945093
1399 20.565940975049653
1499 17.07533166660241
1599 14.624125102223253
1699 12.902041011743524
1799 11.691681443284686
1899 10.84064083546196
1999 10.242018972827665
Result: y = -0.03851693340263842 + 0.8469848262436391 x + 0.006644811936493448 x^2 + -0.09194268060265268 x^3


# PyTorch Tensors
---
a Tensor is an n-dimensional array, and PyTorch provides many functions for operating on these Tensors. Behind the scenes, Tensors can keep track of a computational graph and gradients, but they’re also useful as a generic tool for scientific computing.

In [5]:
import torch
import math

dtype = torch.float
device = torch.device("cpu")

# Create random input and output values
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype = dtype)
y = torch.sin(x)

# randomly initialize weights
a = torch.randn((), device = device, dtype = dtype)
b = torch.randn((), device = device, dtype = dtype)
c = torch.randn((), device = device, dtype = dtype)
d = torch.randn((), device = device, dtype = dtype)

learning_rate = 1e-6
for t in range(2000):
    y_pred = a + (b * x) + (c * x**2) + (d * x**3)
    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)

    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x**2).sum()
    grad_d = (grad_y_pred * x**3).sum()

    # Update weights using gradient descent
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d

print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

99 10471.7880859375
199 6942.3837890625
299 4604.1982421875
399 3054.98583984375
499 2028.384765625
599 1347.99853515625
699 896.9992065429688
799 598.0015258789062
899 399.74151611328125
999 268.25433349609375
1099 181.03408813476562
1199 123.16554260253906
1299 84.76253509521484
1399 59.2713737487793
1499 42.346622467041016
1599 31.106372833251953
1699 23.6392822265625
1799 18.677242279052734
1899 15.378902435302734
1999 13.185656547546387
Result: y = 0.02246849052608013 + 0.7958948612213135 x + -0.0038761894684284925 x^2 + -0.08467558026313782 x^3


# PyTorch nn
---
In PyTorch, the nn package provides higher-level abstractions over raw computational graphs that are useful for building neural networks.. The nn package defines a set of Modules, which are roughly equivalent to neural network layers. A Module receives input Tensors and computes output Tensors, but may also hold internal state such as Tensors containing learnable parameters. The nn package also defines a set of useful loss functions that are commonly used when training neural networks.

In [6]:
import torch
import math

# Create Tensors to hold input and outputs.
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

# For this example, the output y is a linear function of (x, x^2, x^3), so
# we can consider it as a linear layer neural network. Let's prepare the
# tensor (x, x^2, x^3).
p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)

In the above code, x.unsqueeze(-1) has shape (2000, 1), and p has shape
(3,), for this case, broadcasting semantics will apply to obtain a tensor
of shape (2000, 3) 

Use the nn package to define our model as a sequence of layers. nn.Sequential
is a Module which contains other Modules, and applies them in sequence to
produce its output. The Linear Module computes output from input using a
linear function, and holds internal Tensors for its weight and bias.
The Flatten layer flatens the output of the linear layer to a 1D tensor,
to match the shape of `y`.

In [11]:
model = torch.nn.Sequential(
    torch.nn.Linear(3, 1),
    torch.nn.Flatten(0, 1)
)

# The nn package also contains definitions of popular loss functions; in this
# case we will use Mean Squared Error (MSE) as our loss function.
loss_fn = torch.nn.MSELoss(reduction='sum')

learning_rate = 1e-6

for t in range(2000):
    # Forward pass: compute predicted y by passing x to the model. Module objects
    # override the __call__ operator so you can call them like functions. When
    # doing so you pass a Tensor of input data to the Module and it produces
    # a Tensor of output data.
    y_pred = model(xx)

    # Compute and print loss. We pass Tensors containing the predicted and true
    # values of y, and the loss function returns a Tensor containing the
    # loss.
    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    # Zero the gradients before running the backward pass.
    model.zero_grad()

    # Backward pass: compute gradient of the loss with respect to all the learnable
    # parameters of the model. Internally, the parameters of each Module are stored
    # in Tensors with requires_grad=True, so this call will compute gradients for
    # all learnable parameters in the model.
    loss.backward()

    # Update the weights using gradient descent. Each parameter is a Tensor, so
    # we can access its gradients like we did before.
    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad
        
# You can access the first layer of `model` like accessing the first item of a list
linear_layer = model[0]

# For linear layer, its parameters are stored as `weight` and `bias`.
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

99 817.4127197265625
199 543.5316772460938
299 362.4188232421875
399 242.6515655517578
499 163.4510955810547
599 111.0765380859375
699 76.44140625
799 53.53745651245117
899 38.39120101928711
999 28.37488555908203
1099 21.751033782958984
1199 17.37067222595215
1299 14.47387409210205
1399 12.558138847351074
1499 11.291229248046875
1599 10.453353881835938
1699 9.899251937866211
1799 9.532816886901855
1899 9.290472984313965
1999 9.13019847869873
Result: y = -0.0010115851182490587 + 0.8395671248435974 x + 0.00017451480380259454 x^2 + -0.09088757634162903 x^3
