In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
# For reproducibility
torch.manual_seed(1)

<torch._C.Generator at 0x1bef5d2dfd0>

Linear Regression

(Hypothesis)    $y = Wx + b$

W는 weight, b는 bias

In [3]:
'''
x_train = torch.tensor([[1], [2], [3]])
y_train = torch.tensor([[1.], [2], [3]])

print(x_train.dtype)
print(y_train.dtype)
'''

'\nx_train = torch.tensor([[1], [2], [3]])\ny_train = torch.tensor([[1.], [2], [3]])\n\nprint(x_train.dtype)\nprint(y_train.dtype)\n'

In [4]:
# x_train = torch.FloatTensor([[1], [2], [3]])    # 구식 방식
# y_train = torch.FloatTensor([[1], [2], [3]])

x_train = torch.tensor([1, 2, 3], dtype=torch.float32)
y_train = torch.tensor([1, 2, 3], dtype=torch.float32)


print(x_train)
print(x_train.shape)

tensor([1., 2., 3.])
torch.Size([3])


In [5]:
print(y_train)
print(y_train.shape)

tensor([1., 2., 3.])
torch.Size([3])


In [6]:
# weight, bias 초기화_requires_grad=True로 학습할 것이라 명시
W = torch.zeros(1, requires_grad=True)
b = torch.zeros(1, requires_grad=True)

print(W)
print(b)

tensor([0.], requires_grad=True)
tensor([0.], requires_grad=True)


In [7]:
hypothesis = x_train * W + b
print(hypothesis)

tensor([0., 0., 0.], grad_fn=<AddBackward0>)


loss 즉 cost function은 MSE:

$cost(W, b) = \frac{1}{m} \Sigma_{i=1}^{m}[H(x^{i})-y^{i}]^{2}$

$x^{i}$는 prediction, $y^{i}$는 target

In [8]:
print(hypothesis - y_train)

tensor([-1., -2., -3.], grad_fn=<SubBackward0>)


In [9]:
print((hypothesis - y_train) ** 2)

tensor([1., 4., 9.], grad_fn=<PowBackward0>)


In [10]:
cost = torch.mean((hypothesis - y_train) ** 2)
print(cost)

tensor(4.6667, grad_fn=<MeanBackward0>)


Gradient descent

In [11]:
optimizer = optim.SGD([W, b], lr=0.01)  # [W, b]는 학습할 tensor들, lr=learning rate

optimizer.zero_grad()   # gradient 초기화_초기화 안하면 gradient 누적됨
cost.backward()         # grad 계산_역전파 수행하여 기울기 자동 계산
optimizer.step()        # step()으로 개선_SGD 업데이트(파라미터 업데이트)

print(W)
print(b)

tensor([0.0933], requires_grad=True)
tensor([0.0400], requires_grad=True)


In [12]:
hypothesis = x_train * W + b
print(hypothesis)

tensor([0.1333, 0.2267, 0.3200], grad_fn=<AddBackward0>)


In [13]:
cost = torch.mean((hypothesis - y_train) ** 2)
print(cost)

tensor(3.6927, grad_fn=<MeanBackward0>)


In [14]:
# Full training code
# 데이터
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[4], [5], [6]])

# 모델 초기화
W = torch.zeros(1, requires_grad=True)
b = torch.zeros(1, requires_grad=True)

# optimizer 설정
optimizer = optim.SGD([W, b], lr=0.01)

nb_epochs = 1000
for epoch in range(1, nb_epochs + 1):
    # H(x) 계산
    hypothesis = x_train * W + b
    # cost 계산
    cost = torch.mean((hypothesis - y_train) ** 2)
    # cost로 H(x) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    if epoch % 100 == 0:
        print('Epoch {:4d}/{} W: {: .3f}, b: {: .3f}, Cost: {: .6f}'.format(
            epoch, nb_epochs, W.item(), b.item(), cost.item()
        ))

Epoch  100/1000 W:  1.742, b:  1.314, Cost:  0.409840
Epoch  200/1000 W:  1.583, b:  1.675, Cost:  0.253256
Epoch  300/1000 W:  1.458, b:  1.958, Cost:  0.156497
Epoch  400/1000 W:  1.360, b:  2.181, Cost:  0.096705
Epoch  500/1000 W:  1.283, b:  2.356, Cost:  0.059758
Epoch  600/1000 W:  1.223, b:  2.494, Cost:  0.036927
Epoch  700/1000 W:  1.175, b:  2.602, Cost:  0.022818
Epoch  800/1000 W:  1.138, b:  2.687, Cost:  0.014100
Epoch  900/1000 W:  1.108, b:  2.754, Cost:  0.008713
Epoch 1000/1000 W:  1.085, b:  2.807, Cost:  0.005384


high-level implementation w/ nn.Module

In [15]:
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])

In [16]:
class LinearRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(1, 1)   # 입력 크기 1, 출력 크기 1인 선형 변환(y=Wx + b)을 수행하는 linear layer 생성
    
    def forward(self, x):
        return self.linear(x)

In [17]:
model = LinearRegressionModel()

In [18]:
print(model)

LinearRegressionModel(
  (linear): Linear(in_features=1, out_features=1, bias=True)
)


In [19]:
'''
# 실험_무시해도 되는 코드
# 입력 텐서 정의
x = torch.tensor([[2.0], [3.0], [4.0]])

# 모델을 사용하여 예측값 계산
y_pred = model(x)

print(y_pred)  # 예측된 값 출력
'''

'\n# 실험_무시해도 되는 코드\n# 입력 텐서 정의\nx = torch.tensor([[2.0], [3.0], [4.0]])\n\n# 모델을 사용하여 예측값 계산\ny_pred = model(x)\n\nprint(y_pred)  # 예측된 값 출력\n'

Hypothesis

모델을 생성해서 예측값 $H(x)$ 구하기

In [20]:
hypothesis = model(x_train)
print(hypothesis)

tensor([[0.0739],
        [0.5891],
        [1.1044]], grad_fn=<AddmmBackward0>)


MSE로 cost 구하기

In [21]:
print(hypothesis)
print(y_train)

tensor([[0.0739],
        [0.5891],
        [1.1044]], grad_fn=<AddmmBackward0>)
tensor([[1.],
        [2.],
        [3.]])


In [22]:
cost = F.mse_loss(hypothesis, y_train)
print(cost)

tensor(2.1471, grad_fn=<MseLossBackward0>)


Gradient Descent

In [23]:
optimizer = optim.SGD(model.parameters(), lr=0.01)

optimizer.zero_grad()
cost.backward()
optimizer.step()

In [24]:
# Full code
# 데이터
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])
# 모델 초기화
model = LinearRegressionModel()
# optimizer 설정
optimizer = optim.SGD(model.parameters(), lr=0.01)

nb_epochs = 1000
for epoch in range(nb_epochs + 1):
    
    # H(x) 계산
    prediction = model(x_train)
    
    # cost 계산
    cost = F.mse_loss(prediction, y_train)
    
    # cost로 H(x) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    # 100번마다 로그 출력
    if epoch % 100 == 0:
        params = list(model.parameters())
        W = params[0].item()
        b = params[1].item()
        print('Epoch {:4d}/{} W: {:.3f}, b: {:.3f} Cost: {:.6f}'.format(
            epoch, nb_epochs, W, b, cost.item()
        ))

Epoch    0/1000 W: -0.101, b: 0.508 Cost: 4.630286
Epoch  100/1000 W: 0.713, b: 0.653 Cost: 0.061555
Epoch  200/1000 W: 0.774, b: 0.514 Cost: 0.038037
Epoch  300/1000 W: 0.822, b: 0.404 Cost: 0.023505
Epoch  400/1000 W: 0.860, b: 0.317 Cost: 0.014525
Epoch  500/1000 W: 0.890, b: 0.250 Cost: 0.008975
Epoch  600/1000 W: 0.914, b: 0.196 Cost: 0.005546
Epoch  700/1000 W: 0.932, b: 0.154 Cost: 0.003427
Epoch  800/1000 W: 0.947, b: 0.121 Cost: 0.002118
Epoch  900/1000 W: 0.958, b: 0.095 Cost: 0.001309
Epoch 1000/1000 W: 0.967, b: 0.075 Cost: 0.000809
