In [1]:
import torch
import math

dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0") # Uncomment this to run on GPU

# Create random input and output data
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

# Randomly initialize weights
a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

learning_rate = 1e-6
for t in range(2000):
    # Forward pass: compute predicted y
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)

    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # Update weights using gradient descent
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d

print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

99 1058.135986328125
199 705.487060546875
299 471.47698974609375
399 316.1570739746094
499 213.0406036376953
599 144.56402587890625
699 99.07814025878906
799 68.85489654541016
899 48.76660919189453
999 35.4102783203125
1099 26.5268611907959
1199 20.61612892150879
1299 16.68184471130371
1399 14.061962127685547
1499 12.316625595092773
1599 11.153350830078125
1699 10.377668380737305
1799 9.860149383544922
1899 9.514678955078125
1999 9.283957481384277
Result: y = -0.009746472351253033 + 0.837741494178772 x + 0.0016814281698316336 x^2 + -0.09062790125608444 x^3
