In [0]:
# Install Pytorch.
from os import path
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())

accelerator = 'cu80' if path.exists('/opt/bin/nvidia-smi') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.3.0.post4-{platform}-linux_x86_64.whl torchvision

In [0]:
%matplotlib inline


PyTorch: Variables and autograd
-------------------------------

A fully-connected ReLU network with one hidden layer and no biases, trained to
predict y from x by minimizing squared Euclidean distance.

This implementation computes the forward pass using operations on PyTorch
Variables, and uses PyTorch autograd to compute gradients.

A PyTorch Variable is a wrapper around a PyTorch Tensor, and represents a node
in a computational graph. If x is a Variable then x.data is a Tensor giving its
value, and x.grad is another Variable holding the gradient of x with respect to
some scalar value.

PyTorch Variables have the same API as PyTorch tensors: (almost) any operation
you can do on a Tensor you can also do on a Variable; the difference is that
autograd allows you to automatically compute gradients.



In [3]:
import torch
from torch.autograd import Variable

# dtype = torch.FloatTensor # Run on CPU
dtype = torch.cuda.FloatTensor

# N: batch size, D_in: input dim, H: hidden dim, D_out: output dim
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold input and outputs, and wrap them in Variables.
# Setting requires_grad=False indicates that we do not need to compute gradients
# with respect to these Variables during the backward pass.
x = Variable(torch.randn(N, D_in).type(dtype), requires_grad=False)
y = Variable(torch.randn(N, D_out).type(dtype), requires_grad=False)

# Create random Tensors for weights, and wrap them in Variables.
# Setting requires_grad=True indicates that we want to compute gradients with
# respect to these Variables during the backward pass.
w1 = Variable(torch.randn(D_in, H).type(dtype), requires_grad=True)
w2 = Variable(torch.randn(H, D_out).type(dtype), requires_grad=True)

learning_rate = 1e-6

for t in range(500):
    # Forward pass: compute predicted y using operations on Variables; these
    # are exactly the same operations we used to compute the forward pass using
    # Tensors, but we do not need to keep references to intermediate values since
    # we are not implementing the backward pass by hand.
    y_pred = x.mm(w1).clamp(min=0).mm(w2)
    
    # Compute and print loss using operations on Variables.
    # Now loss is a Variable of shape (1,) and loss.data is a Tensor of shape
    # (1,); loss.data[0] is a scalar value holding the loss.
    loss = (y_pred - y).pow(2).sum()
    print(t, loss.data[0])
    
    # Use autograd to compute the backward pass. This call will compute the
    # gradient of loss with respect to all Variables with requires_grad=True.
    # After this call w1.grad and w2.grad will be Variables holding the gradient
    # of the loss with respect to w1 and w2 respectively.
    loss.backward()

    # Update weights using gradient descent; w1.data and w2.data are Tensors,
    # w1.grad and w2.grad are Variables and w1.grad.data and w2.grad.data are
    # Tensors.
    w1.data -= learning_rate * w1.grad.data
    w2.data -= learning_rate * w2.grad.data
    
    # Manually zero the gradients after updating weights
    w1.grad.data.zero_()
    w2.grad.data.zero_()

0 38236308.0
1 39659528.0
2 47680172.0
3 51015928.0
4 39794716.0
5 21004940.0
6 8223178.5
7 3276394.5
8 1720325.75
9 1172659.25
10 911285.125
11 745672.4375
12 623506.625
13 527283.0
14 449427.9375
15 385316.625
16 332234.875
17 287976.0
18 250752.34375
19 219290.765625
20 192510.96875
21 169614.0625
22 149932.84375
23 132939.9375
24 118211.84375
25 105399.3984375
26 94225.4609375
27 84467.1953125
28 75906.4375
29 68362.375
30 61687.0703125
31 55771.546875
32 50509.44921875
33 45821.5
34 41633.74609375
35 37885.76171875
36 34523.6875
37 31502.4375
38 28781.84375
39 26327.982421875
40 24110.580078125
41 22103.658203125
42 20283.392578125
43 18632.65234375
44 17132.677734375
45 15768.724609375
46 14525.7138671875
47 13391.8232421875
48 12356.19921875
49 11406.3017578125
50 10537.2841796875
51 9741.0849609375
52 9010.8505859375
53 8340.783203125
54 7725.1083984375
55 7159.2705078125
56 6638.5849609375
57 6158.91943359375
58 5716.9169921875
59 5309.36279296875
60 4933.15576171875
61 4585.6