In [1]:
import torch
from torch import Tensor
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F

In [2]:
poly_degree = 4

W_target = torch.ones(poly_degree, 1) * 5
b_target = torch.ones(1) * 5

def f(x):
    """Approximated function."""
    return x.mm(W_target) + b_target[0]

def make_features(x):
    """Builds features i.e. a matrix with columns [x, x^2, x^3, x^4]."""
    x = x.unsqueeze(1)
    return torch.cat([x ** i for i in range(1, poly_degree+1)], 1)
  
def next_batch(batch_size=32):
    """Builds a batch i.e. (x, f(x)) pair."""
    random = torch.randn(batch_size)
    x = make_features(random)
    y = f(x)
    return Variable(x, requires_grad=False), Variable(y, requires_grad=False)

In [3]:
learning_rate = 0.001

pred = torch.nn.Linear(W_target.size(0), 1)
optimizer = optim.SGD(pred.parameters(), lr=learning_rate)

In [4]:
log_interval = 500

for i in range(5000):
    # Get data
    x, y_hat = next_batch()

    # Reset gradients
    pred.zero_grad()

    # Forward pass
    y = pred(x)
    loss = F.smooth_l1_loss(y, y_hat)

    # Backward pass
    loss.backward()

    # Apply gradients
    
    optimizer.step()
#     for param in pred.parameters():
#       param.data.add_(-learning_rate * param.grad.data)

    if i % log_interval == 0:
      print('step: {}, loss: {}'.format(i, loss.data[0]))
    
print(list(pred.parameters()))

step: 0, loss: 33.25333786010742
step: 500, loss: 11.235184669494629
step: 1000, loss: 8.635750770568848
step: 1500, loss: 10.732982635498047
step: 2000, loss: 13.996150016784668
step: 2500, loss: 8.966940879821777
step: 3000, loss: 6.052102088928223
step: 3500, loss: 8.52223014831543
step: 4000, loss: 6.985746383666992
step: 4500, loss: 3.861328125
[Parameter containing:
 1.8577  2.3841  5.0106  5.4465
[torch.FloatTensor of size 1x4]
, Parameter containing:
 3.6810
[torch.FloatTensor of size 1]
]
