# Neural Network

In [1]:
import torch

### manual

In [2]:
batch_n = 100
hidden_layer = 100
input_data = 1000
output_data = 10

In [3]:
x = torch.randn(batch_n, input_data)
y = torch.randn(batch_n, output_data)
print(x.shape)
print(y.shape)

torch.Size([100, 1000])
torch.Size([100, 10])


In [4]:
w1 = torch.randn(input_data, hidden_layer)
w2 = torch.randn(hidden_layer, output_data)
print(w1.shape)
print(w2.shape)

torch.Size([1000, 100])
torch.Size([100, 10])


In [5]:
epoch_n = 1000
learning_rate = 1e-6
for i in range(epoch_n):
    h1 = x.mm(w1) # (100, 100) = (100, 1000) * (1000, 100)
    h1 = h1.clamp(min=0) # ReLu
    y_pred = h1.mm(w2) # (100, 10) = (100, 100) * (100, 10)
    loss = (y_pred - y).pow(2).sum() # SE
    if i % 100 == 0:
        print('epoch: %s, loss: %s, y_true: %s, y_pred: %s' % (str(i), str(loss), str(y[0][0]), str(y_pred[0][0])))
    grad_y_pred = 2 * (y_pred - y)
    # grad_w2
    grda_w2 = h1.t().mm(grad_y_pred)
    grad_h = grad_y_pred.clone()
    grad_h = grad_h.mm(w2.t())
    grad_h.clamp(min=0)
    # grad_w1    
    grad_w1 = x.t().mm(grad_h)
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grda_w2

epoch: 0, loss: tensor(50865004.), y_true: tensor(1.6471), y_pred: tensor(615.6661)
epoch: 100, loss: tensor(43341.0586), y_true: tensor(1.6471), y_pred: tensor(-1.2249)
epoch: 200, loss: tensor(8607.0127), y_true: tensor(1.6471), y_pred: tensor(0.4496)
epoch: 300, loss: tensor(2932.5850), y_true: tensor(1.6471), y_pred: tensor(1.1609)
epoch: 400, loss: tensor(1229.3286), y_true: tensor(1.6471), y_pred: tensor(1.4198)
epoch: 500, loss: tensor(628.3430), y_true: tensor(1.6471), y_pred: tensor(1.4608)
epoch: 600, loss: tensor(380.9638), y_true: tensor(1.6471), y_pred: tensor(1.4663)
epoch: 700, loss: tensor(259.8894), y_true: tensor(1.6471), y_pred: tensor(1.4684)
epoch: 800, loss: tensor(192.2761), y_true: tensor(1.6471), y_pred: tensor(1.4757)
epoch: 900, loss: tensor(149.2819), y_true: tensor(1.6471), y_pred: tensor(1.4839)


### aotograd

In [6]:
from torch.autograd import Variable

In [7]:
batch_n = 100
hidden_layer = 100
input_data = 1000
output_data = 10

In [8]:
x = Variable(torch.randn(batch_n, input_data), requires_grad=False)
y = Variable(torch.randn(batch_n, output_data), requires_grad=False)
print(x.shape)
print(y.shape)

torch.Size([100, 1000])
torch.Size([100, 10])


In [9]:
w1 = Variable(torch.randn(input_data, hidden_layer), requires_grad=True)
w2 = Variable(torch.randn(hidden_layer, output_data), requires_grad=True)
print(w1.shape)
print(w2.shape)

torch.Size([1000, 100])
torch.Size([100, 10])


In [10]:
epoch_n = 1000
learning_rate = 1e-6
for i in range(epoch_n):
    h1 = x.mm(w1) # (100, 100) = (100, 1000) * (1000, 100) 
    h1 = h1.clamp(min=0) # ReLu
    y_pred = h1.mm(w2) # (100, 10) = (100, 100) * (100, 10)
    loss = (y_pred - y).pow(2).sum() # SE
    if i % 100 == 0:
        print('epoch: %s, loss: %s, y_true: %s, y_pred: %s' % (str(i), str(loss), str(y[0][0]), str(y_pred[0][0])))
    loss.backward()
    w1.data -= learning_rate * w1.grad.data
    w2.data -= learning_rate * w2.grad.data
    w1.grad.data.zero_()
    w2.grad.data.zero_()

epoch: 0, loss: tensor(40722936., grad_fn=<SumBackward0>), y_true: tensor(-0.8257), y_pred: tensor(96.6597, grad_fn=<SelectBackward>)
epoch: 100, loss: tensor(41216.3125, grad_fn=<SumBackward0>), y_true: tensor(-0.8257), y_pred: tensor(-1.9019, grad_fn=<SelectBackward>)
epoch: 200, loss: tensor(9559.8496, grad_fn=<SumBackward0>), y_true: tensor(-0.8257), y_pred: tensor(-0.5099, grad_fn=<SelectBackward>)
epoch: 300, loss: tensor(3520.5991, grad_fn=<SumBackward0>), y_true: tensor(-0.8257), y_pred: tensor(-0.0955, grad_fn=<SelectBackward>)
epoch: 400, loss: tensor(1603.3752, grad_fn=<SumBackward0>), y_true: tensor(-0.8257), y_pred: tensor(-0.0625, grad_fn=<SelectBackward>)
epoch: 500, loss: tensor(851.7950, grad_fn=<SumBackward0>), y_true: tensor(-0.8257), y_pred: tensor(-0.1455, grad_fn=<SelectBackward>)
epoch: 600, loss: tensor(513.6158, grad_fn=<SumBackward0>), y_true: tensor(-0.8257), y_pred: tensor(-0.2597, grad_fn=<SelectBackward>)
epoch: 700, loss: tensor(343.4406, grad_fn=<SumBack

### model

In [11]:
class Model(torch.nn.Module):
    
    def __init__(self):
        super(Model, self).__init__()
        
    def forward(self, input_data, w1, w2):
        x = torch.mm(input_data, w1) # h1 = input * w1
        x = torch.clamp(x, min=0) # ReLu
        x = torch.mm(x, w2) # y = h1 * w2
        return x
    
    def backward(self):
        pass

model = Model()
print(model)

Model()


In [12]:
batch_n = 100
hidden_layer = 100
input_data = 1000
output_data = 10

In [13]:
x = Variable(torch.randn(batch_n, input_data), requires_grad=False)
y = Variable(torch.randn(batch_n, output_data), requires_grad=False)
print(x.shape)
print(y.shape)

torch.Size([100, 1000])
torch.Size([100, 10])


In [14]:
w1 = Variable(torch.randn(input_data, hidden_layer), requires_grad=True)
w2 = Variable(torch.randn(hidden_layer, output_data), requires_grad=True)
print(w1.shape)
print(w2.shape)

torch.Size([1000, 100])
torch.Size([100, 10])


In [15]:
epoch_n = 1000
learning_rate = 1e-6
for i in range(epoch_n):
    h1 = x.mm(w1) # h1 = x * w1
    h1 = h1.clamp(min=0) # ReLu
    y_pred = model(x, w1, w2) # y = x * w1 *w2
    loss = (y_pred - y).pow(2).sum() # SE
    if i % 100 == 0:
        print('epoch: %s, loss: %s, y_true: %s, y_pred: %s' % (str(i), str(loss), str(y[0][0]), str(y_pred[0][0])))
    loss.backward()
    w1.data -= learning_rate * w1.grad.data
    w2.data -= learning_rate * w2.grad.data
    w1.grad.data.zero_()
    w2.grad.data.zero_()

epoch: 0, loss: tensor(42468268., grad_fn=<SumBackward0>), y_true: tensor(1.0570), y_pred: tensor(79.8385, grad_fn=<SelectBackward>)
epoch: 100, loss: tensor(55786.7578, grad_fn=<SumBackward0>), y_true: tensor(1.0570), y_pred: tensor(-10.0055, grad_fn=<SelectBackward>)
epoch: 200, loss: tensor(15443.3662, grad_fn=<SumBackward0>), y_true: tensor(1.0570), y_pred: tensor(-5.3197, grad_fn=<SelectBackward>)
epoch: 300, loss: tensor(7095.4106, grad_fn=<SumBackward0>), y_true: tensor(1.0570), y_pred: tensor(-3.0542, grad_fn=<SelectBackward>)
epoch: 400, loss: tensor(3965.4519, grad_fn=<SumBackward0>), y_true: tensor(1.0570), y_pred: tensor(-1.8260, grad_fn=<SelectBackward>)
epoch: 500, loss: tensor(2469.5662, grad_fn=<SumBackward0>), y_true: tensor(1.0570), y_pred: tensor(-1.0806, grad_fn=<SelectBackward>)
epoch: 600, loss: tensor(1659.4929, grad_fn=<SumBackward0>), y_true: tensor(1.0570), y_pred: tensor(-0.5815, grad_fn=<SelectBackward>)
epoch: 700, loss: tensor(1173.9072, grad_fn=<SumBackwa

### nn

In [16]:
models = torch.nn.Sequential(
    torch.nn.Linear(input_data, hidden_layer), # w1
    torch.nn.ReLU(),
    torch.nn.Linear(hidden_layer, output_data) # w2
)
print(models)

Sequential(
  (0): Linear(in_features=1000, out_features=100, bias=True)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=10, bias=True)
)


In [17]:
batch_n = 100
hidden_layer = 100
input_data = 1000
output_data = 10

In [18]:
x = Variable(torch.randn(batch_n, input_data), requires_grad=False)
y = Variable(torch.randn(batch_n, output_data), requires_grad=False)
print(x.shape)
print(y.shape)

torch.Size([100, 1000])
torch.Size([100, 10])


In [19]:
epoch_n = 10000
learning_rate = 1e-4
loss_fn = torch.nn.MSELoss()
for i in range(epoch_n):
    y_pred = models(x)
    loss = loss_fn(y_pred, y)
    if i % 1000 == 0:
        print('epoch: %s, loss: %s, y_true: %s, y_pred: %s' % (str(i), str(loss), str(y[0][0]), str(y_pred[0][0])))
    loss.backward()
    for j in models.parameters():
        j.data -= learning_rate * j.grad.data
    models.zero_grad()

epoch: 0, loss: tensor(1.0343, grad_fn=<MseLossBackward>), y_true: tensor(-0.6250), y_pred: tensor(-0.0283, grad_fn=<SelectBackward>)
epoch: 1000, loss: tensor(0.9531, grad_fn=<MseLossBackward>), y_true: tensor(-0.6250), y_pred: tensor(-0.0290, grad_fn=<SelectBackward>)
epoch: 2000, loss: tensor(0.8838, grad_fn=<MseLossBackward>), y_true: tensor(-0.6250), y_pred: tensor(-0.0289, grad_fn=<SelectBackward>)
epoch: 3000, loss: tensor(0.8231, grad_fn=<MseLossBackward>), y_true: tensor(-0.6250), y_pred: tensor(-0.0308, grad_fn=<SelectBackward>)
epoch: 4000, loss: tensor(0.7695, grad_fn=<MseLossBackward>), y_true: tensor(-0.6250), y_pred: tensor(-0.0364, grad_fn=<SelectBackward>)
epoch: 5000, loss: tensor(0.7215, grad_fn=<MseLossBackward>), y_true: tensor(-0.6250), y_pred: tensor(-0.0423, grad_fn=<SelectBackward>)
epoch: 6000, loss: tensor(0.6783, grad_fn=<MseLossBackward>), y_true: tensor(-0.6250), y_pred: tensor(-0.0502, grad_fn=<SelectBackward>)
epoch: 7000, loss: tensor(0.6386, grad_fn=<M

### optim

In [20]:
models = torch.nn.Sequential(
    torch.nn.Linear(input_data, hidden_layer), # w1
    torch.nn.ReLU(),
    torch.nn.Linear(hidden_layer, output_data) # w2
)
print(models)

Sequential(
  (0): Linear(in_features=1000, out_features=100, bias=True)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=10, bias=True)
)


In [21]:
batch_n = 100
hidden_layer = 100
input_data = 1000
output_data = 10

In [22]:
x = Variable(torch.randn(batch_n, input_data), requires_grad=False)
y = Variable(torch.randn(batch_n, output_data), requires_grad=False)
print(x.shape)
print(y.shape)

torch.Size([100, 1000])
torch.Size([100, 10])


In [23]:
epoch_n = 10000
learning_rate = 1e-4
loss_fn = torch.nn.MSELoss()
optimzer = torch.optim.Adam(models.parameters(), lr=learning_rate)
for i in range(epoch_n):
    y_pred = models(x)
    loss = loss_fn(y_pred, y)
    if i % 1000 == 0:
        print('epoch: %s, loss: %s, y_true: %s, y_pred: %s' % (str(i), str(loss), str(y[0][0]), str(y_pred[0][0])))
    optimzer.zero_grad()
    loss.backward()
    optimzer.step()

epoch: 0, loss: tensor(1.0492, grad_fn=<MseLossBackward>), y_true: tensor(0.1126), y_pred: tensor(0.0124, grad_fn=<SelectBackward>)
epoch: 1000, loss: tensor(1.8917e-14, grad_fn=<MseLossBackward>), y_true: tensor(0.1126), y_pred: tensor(0.1126, grad_fn=<SelectBackward>)
epoch: 2000, loss: tensor(3.1021e-15, grad_fn=<MseLossBackward>), y_true: tensor(0.1126), y_pred: tensor(0.1126, grad_fn=<SelectBackward>)
epoch: 3000, loss: tensor(3.9565e-15, grad_fn=<MseLossBackward>), y_true: tensor(0.1126), y_pred: tensor(0.1126, grad_fn=<SelectBackward>)
epoch: 4000, loss: tensor(4.5731e-15, grad_fn=<MseLossBackward>), y_true: tensor(0.1126), y_pred: tensor(0.1126, grad_fn=<SelectBackward>)
epoch: 5000, loss: tensor(5.9083e-15, grad_fn=<MseLossBackward>), y_true: tensor(0.1126), y_pred: tensor(0.1126, grad_fn=<SelectBackward>)
epoch: 6000, loss: tensor(9.2542e-15, grad_fn=<MseLossBackward>), y_true: tensor(0.1126), y_pred: tensor(0.1126, grad_fn=<SelectBackward>)
epoch: 7000, loss: tensor(1.4840e-