In [11]:
import numpy as np
import math

x = np.linspace(-math.pi, math.pi, 2000)
y = np.sin(x)

a = np.random.randn()
b = np.random.randn()
c = np.random.randn()
d = np.random.randn()

learning_rate = 1e-6

for t in range(2000):
    y_pred = a + b  * x + c * x ** 2 + d * x ** 3
    loss = np.square(y_pred - y).sum()
    if t % 100 == 99:
        print(t, loss)
    
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d

print(f'Result: y = {a} + {b} x + {c} x^2 + {d} x^3')

99 6918.668738628027
199 4638.37232482348
299 3112.864065896745
399 2091.622837456856
499 1407.4822067994623
599 948.8351228832519
699 641.1254765447841
799 434.5180937953635
899 295.6809098015015
999 202.30521078305577
1099 139.44978972142104
1199 97.10062380773913
1299 68.54100911750314
1399 49.2623467685605
1499 36.23578676186018
1599 27.424860933322613
1699 21.45914848220742
1799 17.415612102889956
1899 14.671971860617406
1999 12.808308847673064
Result: y = 0.04521409113415909 + 0.8114942883435148 x + -0.007800183086366451 x^2 + -0.08689446064068312 x^3


In [10]:
import torch
import math

dtype = torch.float
device = torch.device('cpu')
device = torch.device('cuda:0')

x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

learning_rate = 1e-6
for t in range(2000):
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)

    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d

print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

99 4048.65673828125
199 2760.921875
299 1885.800537109375
399 1290.41748046875
499 884.896240234375
599 608.378173828125
699 419.60955810546875
799 290.59613037109375
899 202.32089233398438
999 141.8505859375
1099 100.3797607421875
1199 71.90643310546875
1299 52.33477020263672
1399 38.866878509521484
1499 29.58891487121582
1599 23.190303802490234
1699 18.772777557373047
1799 15.719722747802734
1899 13.607540130615234
1999 12.144755363464355
Result: y = -0.05227762460708618 + 0.8277679681777954 x + 0.009018761105835438 x^2 + -0.08920925110578537 x^3


In [12]:
import torch
import math

dtype = torch.float
device = "cuda" if torch.cuda.is_available() else "cpu"
torch.set_default_device(device)

x = torch.linspace(-math.pi, math.pi, 2000, dtype=dtype)
y = torch.sin(x)

a = torch.randn((), dtype=dtype, requires_grad=True)
b = torch.randn((), dtype=dtype, requires_grad=True)
c = torch.randn((), dtype=dtype, requires_grad=True)
d = torch.randn((), dtype=dtype, requires_grad=True)

learning_rate = 1e-6
for t in range(2000):
    y_pred = a + b * x + c * x ** 2 + d * x ** 3
    loss = (y_pred - y).pow(2).sum()
    if t % 100 == 99:
        print(t, loss.item())

    loss.backward()
    with torch.no_grad():
        a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        c -= learning_rate * c.grad
        d -= learning_rate * d.grad

        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None

print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

99 3264.040771484375
199 2161.35498046875
299 1432.1976318359375
399 950.037841796875
499 631.206787109375
599 420.37750244140625
699 280.9649963378906
799 188.77740478515625
899 127.81759643554688
999 87.50749206542969
1099 60.85204315185547
1199 43.22584533691406
1299 31.570411682128906
1399 23.86300277709961
1499 18.766437530517578
1599 15.396308898925781
1699 13.167710304260254
1799 11.694015502929688
1899 10.719532012939453
1999 10.075157165527344
Result: y = 0.0008098944672383368 + 0.8222707509994507 x + -0.00013971913722343743 x^2 + -0.08842732012271881 x^3


In [17]:
import torch
import math

class LegendrePolynomial3(torch.autograd.Function):
    @staticmethod
    def forward(ctx, input):
        ctx.save_for_backward(input)
        return 0.5 * (5 * input ** 3 - 3 * input)
    
    @staticmethod
    def backward(ctx, grad_output) -> torch.Any:
        input, = ctx.saved_tensors
        return grad_output * 1.5 * (5 * input ** 2 - 1)
    
dtype = torch.float
device = torch.device('cpu')

x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

a = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
b = torch.full((), -1.0, device=device, dtype=dtype, requires_grad=True)
c = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
d = torch.full((), 0.3, device=device, dtype=dtype, requires_grad=True)

learning_rate = 5e-6
for t in range(2000):
    P3 = LegendrePolynomial3.apply
    y_pred = a + b * P3(c + d * x)
    loss = (y_pred - y).pow(2).sum()
    if t % 100 == 99:
        print(t, loss.item())
    loss.backward()

    with torch.no_grad():
        a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        c -= learning_rate * c.grad
        d -= learning_rate * d.grad

        a.grad = None
        b.gard = None
        c.grad = None
        d.grad = None

print(f'Result: y = {a.item()} + {b.item()} * P3{c.item()} + {d.item()}x')

99 36.503482818603516
199 223.77720642089844
299 191.7639617919922
399 91.0580062866211
499 514.5823974609375
599 8.834718704223633
699 435.1639709472656
799 69.97366333007812
899 336.98199462890625
999 207.81956481933594
1099 122.70377349853516
1199 17.697589874267578
1299 85.27622985839844
1399 79.56981658935547
1499 21.59762191772461
1599 119.80097198486328
1699 17.818653106689453
1799 83.89440155029297
1899 77.04509735107422
1999 20.899330139160156
Result: y = 1.0061893540580513e-09 + -1.9533222913742065 * P31.4447491814006952e-10 + 0.25384876132011414x


In [3]:
import torch
import math

x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)

model = torch.nn.Sequential(
    torch.nn.Linear(3, 1),
    torch.nn.Flatten(0, 1)
)

loss_fn = torch.nn.MSELoss(reduction='sum')
learning_rate = 1e-6
for t in range(2000):
    y_pred = model(xx)
    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    model.zero_grad()
    loss.backward()
    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad

linear_layer = model[0]
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

99 1154.955810546875
199 766.7158203125
299 509.9878234863281
399 340.2236328125
499 227.96485900878906
599 153.73220825195312
699 104.64480590820312
799 72.18505859375
899 50.72043228149414
999 36.526588439941406
1099 27.140609741210938
1199 20.933992385864258
1299 16.829753875732422
1399 14.115728378295898
1499 12.320963859558105
1599 11.134169578552246
1699 10.34936237335205
1799 9.8303861618042
1899 9.48719596862793
1999 9.260249137878418
Result: y = 0.0006882319576106966 + 0.8362886309623718 x + -0.00011873249604832381 x^2 + -0.09042124450206757 x^3


In [6]:
import torch
import math

x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)

model = torch.nn.Sequential(
    torch.nn.Linear(3, 1),
    torch.nn.Flatten(0, 1)
)

loss_fn = torch.nn.MSELoss(reduction='sum')

learning_rate = 1e-3
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)
for t in range(2000):
    y_pred = model(xx)
    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

linear_layer = model[0]
print(
    f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

99 49044.1171875
199 27225.88671875
299 14016.3935546875
399 6212.6416015625
499 2271.318603515625
599 819.9610595703125
699 510.2360534667969
799 455.29742431640625
899 404.35089111328125
999 338.88409423828125
1099 265.1843566894531
1199 193.26278686523438
1299 131.0565643310547
1399 82.25634002685547
1499 47.456695556640625
1599 25.50249481201172
1699 14.05775260925293
1799 9.826037406921387
1899 8.906379699707031
1999 8.844865798950195
Result: y = -0.0004999393713660538 + 0.8556613922119141 x + -0.0004999525262974203 x^2 + -0.09296795725822449 x^3


In [5]:
import torch
import math

class Polynomial3(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.a = torch.nn.Parameter(torch.randn(()))
        self.b = torch.nn.Parameter(torch.randn(()))
        self.c = torch.nn.Parameter(torch.randn(()))
        self.d = torch.nn.Parameter(torch.randn(()))

    def forward(self, x):
        return self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3
    
    def string(self):
        return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3'
    

x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

model = Polynomial3()
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-6)
for t in range(2000):
    y_pred = model(x)
    loss = criterion(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(f'Result: {model.string()}')

99 56.53201675415039
199 41.45883560180664
299 31.174381256103516
399 24.1478271484375
499 19.341625213623047
599 16.050390243530273
699 13.793956756591797
799 12.24524974822998
899 11.181035995483398
999 10.448969841003418
1099 9.944812774658203
1199 9.597224235534668
1299 9.357312202453613
1399 9.191564559936523
1499 9.076927185058594
1599 8.997559547424316
1699 8.942557334899902
1799 8.904400825500488
1899 8.877908706665039
1999 8.859496116638184
Result: y = -0.006079425569623709 + 0.8537698984146118 x + 0.0010488034458830953 x^2 + -0.09290780127048492 x^3


In [9]:
import random
import torch
import math

class DynaimcNet(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.a = torch.nn.Parameter(torch.randn(()))
        self.b = torch.nn.Parameter(torch.randn(()))
        self.c = torch.nn.Parameter(torch.randn(()))
        self.d = torch.nn.Parameter(torch.randn(()))
        self.e = torch.nn.Parameter(torch.randn(()))

    def forward(self, x):
        y = self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3
        for exp in range(4, random.randint(4, 6)):
            y = y + self.e * x ** exp

        return y
    
    def string(self):
        return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3 + {self.e.item()} x^4 ? + {self.e.item()} x^5 ?'
    
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

model = DynaimcNet()

criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-8, momentum=0.9)
for t in range(30000):
    y_pred = model(x)
    loss = criterion(y_pred, y)
    if t % 2000 == 1999:
        print(t, loss.item())

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(f'Result: {model.string()}')

1999 67.81381225585938
3999 39.554656982421875
5999 23.717920303344727
7999 15.937784194946289
9999 12.908527374267578
11999 10.678178787231445
13999 9.46798038482666
15999 8.917146682739258
17999 9.120320320129395
19999 8.961034774780273
21999 8.908239364624023
23999 8.907230377197266
25999 8.859283447265625
27999 8.837788581848145
29999 8.574172019958496
Result: y = 0.0027873206418007612 + 0.8578000068664551 x + -0.0010571506572887301 x^2 + -0.09375525265932083 x^3 + 0.00011257371079409495 x^4 ? + 0.00011257371079409495 x^5 ?
