In [0]:
# Install Pytorch.
from os import path
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())

accelerator = 'cu80' if path.exists('/opt/bin/nvidia-smi') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.3.0.post4-{platform}-linux_x86_64.whl torchvision

In [0]:
%matplotlib inline


PyTorch: optim
--------------

A fully-connected ReLU network with one hidden layer, trained to predict y from x
by minimizing squared Euclidean distance.

This implementation uses the nn package from PyTorch to build the network.

Rather than manually updating the weights of the model as we have been doing,
we use the optim package to define an Optimizer that will update the weights
for us. The optim package defines many optimization algorithms that are commonly
used for deep learning, including SGD+momentum, RMSProp, Adam, etc.



In [3]:
import torch
from torch.autograd import Variable

# N: batch size, D_in: input dim, H: hidden dim, D_out: output dim
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold inputs and outputs, and wrap them in Variables.
x = Variable(torch.randn(N, D_in))
y = Variable(torch.randn(N, D_out), requires_grad=False)

# Use the nn package to define our model and loss function.
model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H),
    torch.nn.ReLU(),
    torch.nn.Linear(H, D_out),
)
loss_fn = torch.nn.MSELoss(size_average=False)

# Use the optim package to define an Optimizer that will update the weights of 
# the model for us. Here we will use Adam. The optim package contains many 
# other optimization algorithms. The first argument to the Adam constructor 
# tells the optimizer which Variables it should update.
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for t in range(500):
    # Forward pass: compute predicted y by passing x to the model.
    y_pred = model(x)
    
    # Compute and print loss.
    loss = loss_fn(y_pred, y)
    print(t, loss.data[0])
    
    # Before the backward pass, use the optimizer object to zero all of the 
    # gradients for the variables it will update (which are the learnable 
    # weights of the model). This is because by default, gradients are 
    # accumulated in buffers(i.e, not overwritten) whenever .backward() is 
    # called. Checkout docs of torch.autograd.backward for more details.
    optimizer.zero_grad()
    
    # Backward pass: compute gradient of the loss with respect to model 
    # parameters.
    loss.backward()
    
    # Calling the step function on an Optimizer makes an update to its 
    # parameters.
    optimizer.step()

0 719.44873046875
1 702.1841430664062
2 685.3877563476562
3 669.0961303710938
4 653.2897338867188
5 637.9321899414062
6 623.0003662109375
7 608.5007934570312
8 594.4061889648438
9 580.6451416015625
10 567.2606811523438
11 554.2449951171875
12 541.5911254882812
13 529.2683715820312
14 517.3071899414062
15 505.6618957519531
16 494.27703857421875
17 483.2029724121094
18 472.4294738769531
19 461.9127502441406
20 451.63348388671875
21 441.5830383300781
22 431.77392578125
23 422.2545471191406
24 412.9529724121094
25 403.86932373046875
26 395.0513610839844
27 386.45147705078125
28 378.0884094238281
29 369.9294738769531
30 361.96783447265625
31 354.2317199707031
32 346.67254638671875
33 339.2497863769531
34 331.9783020019531
35 324.8374938964844
36 317.8170471191406
37 310.9396667480469
38 304.2132263183594
39 297.6184387207031
40 291.1590881347656
41 284.8349609375
42 278.6349182128906
43 272.568115234375
44 266.60968017578125
45 260.7722473144531
46 255.0743865966797
47 249.47116088867188
48