In [56]:
import random
import torch
import torch.nn as nn
from datetime import datetime, timedelta

In [76]:
batch_size = 1
n_epochs = 1000
n_iter = 10000

In [48]:
class MyModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(MyModel, self).__init__()
        self.linear = nn.Linear(input_size, output_size)
        
    def forward(self, x):
        y = self.linear(x)
        return y

In [49]:
def ground_truth(x):
    return 3*x[:, 0] + x[:, 1] - 2*x[:, 2]

In [50]:
def train(model, x, y, optim):
    # initialize gradients in all parameters in module.
    optim.zero_grad()
    
    # feed-forward
    y_hat = model(x)
    # get error between answer and inferenced.
    loss = ((y - y_hat)**2).sum() / x.size(0)
    
    # back-propagation
    loss.backward()
    
    # one-step of gradient descent
    optim.step()
    
    return loss.data

In [63]:
model = MyModel(3, 1)
# GPU 학습
model.cuda()
optim = torch.optim.SGD(model.parameters(), lr=0.0001, momentum=0.1)

x = torch.rand(batch_size, 3)
x

In [62]:
start_time = datetime.now()

for epoch in range(n_epochs):
    avg_loss = 0
    
    model.train()
    for i in range(n_iter):
        x = torch.rand(batch_size, 3)
        y = ground_truth(x.data)
        
        loss = train(model, x, y, optim)
        
        avg_loss += loss
    avg_loss = avg_loss / n_iter
        
    # simple test sample to check the network.
    x_valid = torch.FloatTensor([[.3, .2, .1]])
    y_valid = ground_truth(x_valid.data)
    
    model.eval()
    y_hat = model(x_valid)
    
    print(avg_loss, y_valid.data[0], y_hat.data[0, 0])
    
    if avg_loss < .001:
        break;
        
end_time = datetime.now()

print(end_time - start_time)

tensor(1.3941) tensor(0.9000) tensor(0.9365)
tensor(0.8638) tensor(0.9000) tensor(0.9835)
tensor(0.5885) tensor(0.9000) tensor(1.0118)
tensor(0.4124) tensor(0.9000) tensor(1.0140)
tensor(0.2936) tensor(0.9000) tensor(1.0268)
tensor(0.2082) tensor(0.9000) tensor(1.0334)
tensor(0.1475) tensor(0.9000) tensor(1.0284)
tensor(0.1047) tensor(0.9000) tensor(1.0238)
tensor(0.0740) tensor(0.9000) tensor(1.0218)
tensor(0.0523) tensor(0.9000) tensor(1.0191)
tensor(0.0389) tensor(0.9000) tensor(1.0128)
tensor(0.0273) tensor(0.9000) tensor(1.0079)
tensor(0.0199) tensor(0.9000) tensor(0.9994)
tensor(0.0146) tensor(0.9000) tensor(0.9950)
tensor(0.0111) tensor(0.9000) tensor(0.9869)
tensor(0.0081) tensor(0.9000) tensor(0.9812)
tensor(0.0060) tensor(0.9000) tensor(0.9768)
tensor(0.0046) tensor(0.9000) tensor(0.9700)
tensor(0.0035) tensor(0.9000) tensor(0.9638)
tensor(0.0026) tensor(0.9000) tensor(0.9596)
tensor(0.0020) tensor(0.9000) tensor(0.9545)
tensor(0.0015) tensor(0.9000) tensor(0.9499)
tensor(0.0

## GPU로 학습

In [74]:
x = torch.rand(4, 3).cuda()
print(x)
y = ground_truth(x.data)
print(y)

loss = train(model, x, y, optim)
print(loss)

tensor([[0.3183, 0.1278, 0.1655],
        [0.7874, 0.6380, 0.9990],
        [0.8796, 0.5975, 0.6009],
        [0.3585, 0.3965, 0.2459]], device='cuda:0')
tensor([0.7515, 1.0022, 2.0345, 0.9800], device='cuda:0')
tensor(1.1303, device='cuda:0')


In [77]:
start_time = datetime.now()

for epoch in range(n_epochs):
    avg_loss = 0
    model.train()
    for i in range(n_iter):
        x = torch.rand(batch_size, 3).cuda()
        y = ground_truth(x.data)
        
        loss = train(model, x, y, optim)
        
        avg_loss += loss
    avg_loss = avg_loss / n_iter / batch_size
        
    # simple test sample to check the network.
    x_valid = torch.cuda.FloatTensor([[.3, .2, .1]])
    y_valid = ground_truth(x_valid.data)
    
    model.eval()
    y_hat = model(x_valid).cuda()
    
    print(avg_loss, y_valid.data[0], y_hat.data[0, 0])
    
    if avg_loss < .001:
        break;
        
end_time = datetime.now()

print(end_time - start_time)

tensor(0.5410, device='cuda:0') tensor(0.9000, device='cuda:0') tensor(1.0318, device='cuda:0')
tensor(0.4031, device='cuda:0') tensor(0.9000, device='cuda:0') tensor(1.0374, device='cuda:0')
tensor(0.2784, device='cuda:0') tensor(0.9000, device='cuda:0') tensor(1.0381, device='cuda:0')
tensor(0.1904, device='cuda:0') tensor(0.9000, device='cuda:0') tensor(1.0356, device='cuda:0')
tensor(0.1371, device='cuda:0') tensor(0.9000, device='cuda:0') tensor(1.0441, device='cuda:0')
tensor(0.0975, device='cuda:0') tensor(0.9000, device='cuda:0') tensor(1.0299, device='cuda:0')
tensor(0.0692, device='cuda:0') tensor(0.9000, device='cuda:0') tensor(1.0361, device='cuda:0')
tensor(0.0512, device='cuda:0') tensor(0.9000, device='cuda:0') tensor(1.0280, device='cuda:0')
tensor(0.0366, device='cuda:0') tensor(0.9000, device='cuda:0') tensor(1.0187, device='cuda:0')
tensor(0.0272, device='cuda:0') tensor(0.9000, device='cuda:0') tensor(1.0133, device='cuda:0')
tensor(0.0196, device='cuda:0') tensor(0

batch 1 : 0:04:28.399974