## LEARNING PYTORCH WITH EXAMPLES
### Numpy

In [1]:
import numpy as np
import math

# 데이터 생성
x = np.linspace(-math.pi, math.pi, 2000)
y = np.sin(x)

# 가중치 초기화
a = np.random.randn()
b = np.random.randn()
c = np.random.randn()
d = np.random.randn()

learning_rate = 1e-6

for t in range(2000):
    # 순전파 : y = a + b x + c x^2 + d x^3
    y_pred = a + b * x + c * x ** 2 + d * x ** 3
    loss = np.square(y_pred - y).sum()

    if t % 100 == 99:
        print(t, loss)

    # 역전파
    grad_y_pred = 2.0 * (y_pred - y)

    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # 가중치 업데이트
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d

print(f'Result: y = {a} + {b} x + {c} x^2 + {d} x^3')

99 1167.653824996293
199 786.4662472088327
299 531.0824699728796
399 359.85805844132904
499 244.97159623198377
599 167.82516253184474
699 115.97863766805162
799 81.10533288321383
899 57.62799323817435
999 41.80825141588632
1099 31.138441043840608
1199 23.93512506756354
1299 19.06726769253079
1399 15.774330665936743
1499 13.54446274392813
1599 12.032872104759933
1699 11.007085579093824
1799 10.310210295894024
1899 9.836257591401836
1999 9.51355501595376
Result: y = -0.01961318249462664 + 0.8384756937656868 x + 0.003383600345088538 x^2 + -0.0907323306058415 x^3


### Tensor
- numpy는 GPU를 통한 수치 계산을 가속화 할 수 없다는 단점 존재 >> Tensor!

In [1]:
import torch
import math

dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0") # Uncomment this to run on GPU

x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

learning_rate = 1e-6

for t in range(2000):
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)

    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d

print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

99 259.7029113769531
199 181.18560791015625
299 127.37487030029297
399 90.4537582397461
499 65.09239196777344
599 47.65182876586914
699 35.644832611083984
799 27.3695068359375
899 21.659923553466797
999 17.716371536254883
1099 14.989710807800293
1199 13.1025390625
1299 11.795077323913574
1399 10.888360023498535
1499 10.25895881652832
1599 9.82165241241455
1699 9.517553329467773
1799 9.305888175964355
1899 9.158442497253418
1999 9.05565071105957
Result: y = -0.01479713711887598 + 0.8503621220588684 x + 0.002552750753238797 x^2 + -0.09242306649684906 x^3


### Autograd
앞선 예제에서는 순전파와 역전파를 코드로 직접 구현했다. pytorch의 autograd을 통해 역전파 계싼을 자동화할 수 있다.

In [2]:
import torch
import math

dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0")  # Uncomment this to run on GPU

x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

# requires_grad=True : 모든 연산을 추적할 수 있도록 하는 옵션

a = torch.randn((), device=device, dtype=dtype, requires_grad=True)
b = torch.randn((), device=device, dtype=dtype, requires_grad=True)
c = torch.randn((), device=device, dtype=dtype, requires_grad=True)
d = torch.randn((), device=device, dtype=dtype, requires_grad=True)

learning_rate = 1e-6

for t in range(2000):
    y_pred = a + b * x + c * x ** 2 + d * x ** 3
    loss = (y_pred - y).pow(2).sum()

    if t % 100 == 99:
        print(t, loss.item())

    loss.backward()

    with torch.no_grad():
        a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        c -= learning_rate * c.grad
        d -= learning_rate * d.grad

        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None

print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

99 1261.82958984375
199 837.378173828125
299 556.707763671875
399 371.1131896972656
499 248.38766479492188
599 167.2349090576172
699 113.57209014892578
799 78.08720397949219
899 54.622535705566406
999 39.106292724609375
1099 28.8460693359375
1199 22.06153106689453
1299 17.575115203857422
1399 14.608402252197266
1499 12.646676063537598
1599 11.349492073059082
1699 10.49169921875
1799 9.92447280883789
1899 9.549394607543945
1999 9.301365852355957
Result: y = -0.0004479656636249274 + 0.8353545069694519 x + 7.728050695732236e-05 x^2 + -0.09028837084770203 x^3


In [3]:
import torch
import math


class LegendrePolynomial3(torch.autograd.Function):
    def forward(ctx, input):
        ctx.save_for_backward(input)
        return 0.5 * (5 * input ** 3 - 3 * input)

    def backward(ctx, grad_output):
        input, = ctx.saved_tensors
        return grad_output * 1.5 * (5 * input ** 2 - 1)


dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0")  # Uncomment this to run on GPU

x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

a = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
b = torch.full((), -1.0, device=device, dtype=dtype, requires_grad=True)
c = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
d = torch.full((), 0.3, device=device, dtype=dtype, requires_grad=True)

learning_rate = 5e-6

for t in range(2000):
    P3 = LegendrePolynomial3.apply

    # P3를 통해 앞서 선언한 autograd 메소드 사용 : 순전파
    y_pred = a + b * P3(c + d * x)

    loss = (y_pred - y).pow(2).sum()

    if t % 100 == 99:
        print(t, loss.item())

    loss.backward()

    with torch.no_grad():
        a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        c -= learning_rate * c.grad
        d -= learning_rate * d.grad

        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None

print(f'Result: y = {a.item()} + {b.item()} * P3({c.item()} + {d.item()} x)')

99 209.95834350585938
199 144.66018676757812
299 100.70249938964844
399 71.03519439697266
499 50.97850799560547
599 37.403133392333984
699 28.206867218017578
799 21.97318458557129
899 17.7457275390625
999 14.877889633178711
1099 12.93176555633545
1199 11.610918998718262
1299 10.71425724029541
1399 10.10548210144043
1499 9.692106246948242
1599 9.411375045776367
1699 9.220745086669922
1799 9.091285705566406
1899 9.003360748291016
1999 8.943639755249023
Result: y = -5.394172664097141e-09 + -2.208526849746704 * P3(1.367587154632588e-09 + 0.2554861009120941 x)


### NN 모듈

In [10]:
import torch
import math

x = torch.linspace(-math.pi, math.pi, 2000)  # (2000,)
y = torch.sin(x)

# the output y is a linear function of (x, x^2, x^3) : tensor (x, x^2, x^3).
# squeeze : 차원수가 1인 차원 제거, unsqueeze : 차원 추가 
p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)

# x.unsqueeze(-1) : (2000, 1)
# x.unsqueeze(-1).pow(p) : (2000, 3)

model = torch.nn.Sequential(
    torch.nn.Linear(3, 1),
    torch.nn.Flatten(0, 1)
)

loss_fn = torch.nn.MSELoss(reduction='sum')
learning_rate = 1e-6

for t in range(2000):
    y_pred = model(xx)
    loss = loss_fn(y_pred, y)

    if t % 100 == 99:
        print(t, loss.item())

    # 업데이트를 위해 loss.backward()를 호출하면 각 파라미터들의 .grad 값에 변화도가 저장이 된다. 
    # 이때, zero_grad()를 하지않고 역전파를 시키면 이전 루프에서 .grad에 저장된 값이 다음 루프의 업데이트에도 간섭을 한다.
    # 따라서 루프가 한번 돌고나서 역전파를 하기 전에 zero_grad()로 .grad 값들을 0으로 초기화시킨 후 학습을 진행해야 한다.
    model.zero_grad()
    loss.backward()

    with torch.no_grad(): ## no_grad : gradient 옵션 끄기 >> 새로 생성된 텐서들은 requires_grad=False 상태가 되어, 메모리 사용량을 아껴준다.
        for param in model.parameters():
            param -= learning_rate * param.grad

linear_layer = model[0]
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

99 354.50775146484375
199 243.15798950195312
299 167.8461456298828
399 116.85607147216797
499 82.29669952392578
599 58.848182678222656
699 42.92101287841797
799 32.09075927734375
899 24.718082427978516
999 19.693462371826172
1099 16.265172958374023
1199 13.92332935333252
1299 12.321823120117188
1399 11.22535514831543
1499 10.473762512207031
1599 9.95802116394043
1699 9.60371208190918
1799 9.360025405883789
1899 9.192229270935059
1999 9.07657241821289
Result: y = 0.013952391222119331 + 0.8477441072463989 x + -0.0024070199579000473 x^2 + -0.09205068647861481 x^3


### Optimize

In [11]:
import torch
import math

x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

# input tensor (x, x^2, x^3).
p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)

model = torch.nn.Sequential(
    torch.nn.Linear(3, 1),
    torch.nn.Flatten(0, 1)
)

loss_fn = torch.nn.MSELoss(reduction='sum')
learning_rate = 1e-3
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)

for t in range(2000):
    y_pred = model(xx)
    loss = loss_fn(y_pred, y)

    if t % 100 == 99:
        print(t, loss.item())

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

linear_layer = model[0]
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

99 9717.9453125
199 2855.639892578125
299 655.4539794921875
399 198.40789794921875
499 146.142333984375
599 125.30291748046875
699 102.4905776977539
799 76.74386596679688
899 51.14042663574219
999 30.328144073486328
1099 17.01519203186035
1199 10.800752639770508
1299 9.036584854125977
1399 8.823295593261719
1499 8.991766929626465
1599 8.847822189331055
1699 8.843000411987305
1799 8.853471755981445
1899 8.906072616577148
1999 8.959297180175781
Result: y = -0.0005403545219451189 + 0.8561495542526245 x + -0.000540409586392343 x^2 + -0.09392372518777847 x^3


### Custom NN 모듈

In [12]:
import torch
import math


class Polynomial3(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.a = torch.nn.Parameter(torch.randn(()))
        self.b = torch.nn.Parameter(torch.randn(()))
        self.c = torch.nn.Parameter(torch.randn(()))
        self.d = torch.nn.Parameter(torch.randn(()))

    def forward(self, x):
        return self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3

    def string(self):
        return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3'

x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)
model = Polynomial3()

criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-6)

for t in range(2000):
    y_pred = model(x)
    loss = criterion(y_pred, y)

    if t % 100 == 99:
        print(t, loss.item())

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(f'Result: {model.string()}')

99 521.2109375
199 356.1614990234375
299 244.5342254638672
399 168.95619201660156
499 117.7315673828125
599 82.97565460205078
699 59.36813735961914
799 43.31511688232422
899 32.38686752319336
999 24.93895721435547
1099 19.857288360595703
1199 16.386043548583984
1299 14.012168884277344
1399 12.386882781982422
1499 11.272833824157715
1599 10.508325576782227
1699 9.98311710357666
1799 9.621886253356934
1899 9.373140335083008
1999 9.20169448852539
Result: y = 0.01698845438659191 + 0.8457887768745422 x + -0.002930788788944483 x^2 + -0.09177255630493164 x^3


### 흐름 제어 및 가중치 공유

In [13]:
import random
import torch
import math


class DynamicNet(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.a = torch.nn.Parameter(torch.randn(()))
        self.b = torch.nn.Parameter(torch.randn(()))
        self.c = torch.nn.Parameter(torch.randn(()))
        self.d = torch.nn.Parameter(torch.randn(()))
        self.e = torch.nn.Parameter(torch.randn(()))

    def forward(self, x):
        # 4, 5 중 하나를 랜덤하게 선택하여(random.randint(4, 6)) e 모수를 재사용하여 y 계산합니다.
        y = self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3
        for exp in range(4, random.randint(4, 6)):
            y = y + self.e * x ** exp
        return y

    def string(self):
        return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3 + {self.e.item()} x^4 ? + {self.e.item()} x^5 ?'

x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)
model = DynamicNet()

criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-8, momentum=0.9)

for t in range(30000):
    y_pred = model(x)
    loss = criterion(y_pred, y)

    if t % 2000 == 1999:
        print(t, loss.item())

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(f'Result: {model.string()}')

1999 1088.4119873046875
3999 500.101806640625
5999 229.4305419921875
7999 117.51200866699219
9999 59.17053985595703
11999 32.43724060058594
13999 20.130762100219727
15999 14.129984855651855
17999 11.278861999511719
19999 10.093719482421875
21999 9.378060340881348
23999 9.076822280883789
25999 8.986943244934082
27999 8.668816566467285
29999 8.874432563781738
Result: y = -0.004613111261278391 + 0.8546837568283081 x + 0.0002802866802085191 x^2 + -0.09333021193742752 x^3 + 0.00014044702402316034 x^4 ? + 0.00014044702402316034 x^5 ?
