In [1]:
import torch
import math


dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0") # Uncomment this to run on GPU

# Create random input and output data
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

# Randomly initialize weights
a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

learning_rate = 1e-6
for t in range(2000):
    # Forward pass: compute predicted y
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)

    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # Update weights using gradient descent
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d


print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

99 4866.76806640625
199 3239.227294921875
299 2157.7294921875
399 1438.845458984375
499 960.8348999023438
599 642.8760986328125
699 431.29925537109375
799 290.4554443359375
899 196.65821838378906
999 134.16481018066406
1099 92.50853729248047
1199 64.72795867919922
1299 46.19129943847656
1399 33.81612014770508
1499 25.549640655517578
1599 20.024396896362305
1699 16.32904815673828
1799 13.855916976928711
1899 12.199682235717773
1999 11.089689254760742
Result: y = -0.024742014706134796 + 0.8163537383079529 x + 0.004268407355993986 x^2 + -0.08758567273616791 x^3
