In [0]:
# Install Pytorch.
from os import path
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())

accelerator = 'cu80' if path.exists('/opt/bin/nvidia-smi') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.3.0.post4-{platform}-linux_x86_64.whl torchvision

In [0]:
%matplotlib inline


PyTorch: Custom nn Modules
--------------------------

A fully-connected ReLU network with one hidden layer, trained to predict y from x
by minimizing squared Euclidean distance.

This implementation defines the model as a custom Module subclass. Whenever you
want a model more complex than a simple sequence of existing Modules you will
need to define your model this way.



In [0]:
import torch
from torch.autograd import Variable

class TwoLayerNet(torch.nn.Module):
    def __init__(self, D_in, H, D_out):
        """
        In the constructor we instantiate two nn.Linear modules and assign them 
        as member variables.
        """
        super(TwoLayerNet, self).__init__()
        self.linear1 = torch.nn.Linear(D_in, H)
        self.linear2 = torch.nn.Linear(H, D_out)
        
    def forward(self, x):
        """
        In the forward function we accept a Variable of input data and we must 
        return a Variable of output data. We can use Modules defined in the 
        constructor as well as arbitrary operators on Variables.
        """
        h_relu = self.linear1(x).clamp(min=0)
        y_pred = self.linear2(h_relu)
        return y_pred

In [4]:
# N: batch size, D_in: input dim, H: hidden dim, D_out: output dim
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold inputs and outputs, and wrap them in Variables.
x = Variable(torch.randn(N, D_in))
y = Variable(torch.randn(N, D_out), requires_grad=False)

# Construct our model by instantiating the class defined above.
model = TwoLayerNet(D_in, H, D_out)

# Construct our loss function and an Optimizer. The call to model.parameters() 
# in the SGD constructor will contain the learnable parameters of the two 
# nn.Linear modules which are members of the model.
criterion = torch.nn.MSELoss(size_average=False)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)

for t in range(500):
    # Forward pass: Compute predicted y by passing x to the model.
    y_pred = model(x)
    
    # Compute and print loss
    loss = criterion(y_pred, y)
    print(t, loss.data[0])
    
    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

0 668.7462768554688
1 620.6288452148438
2 579.4578857421875
3 543.2051391601562
4 511.0862731933594
5 481.90667724609375
6 455.17791748046875
7 430.6435241699219
8 407.832275390625
9 386.4626770019531
10 366.2969055175781
11 347.1841735839844
12 329.091552734375
13 312.06097412109375
14 295.9700012207031
15 280.63836669921875
16 265.93328857421875
17 251.87913513183594
18 238.3888397216797
19 225.45223999023438
20 213.02613830566406
21 201.12417602539062
22 189.77418518066406
23 178.8725128173828
24 168.47445678710938
25 158.55284118652344
26 149.1542510986328
27 140.24270629882812
28 131.7775421142578
29 123.7637710571289
30 116.18302154541016
31 109.02812957763672
32 102.27731323242188
33 95.91557312011719
34 89.92681884765625
35 84.28341674804688
36 78.97205352783203
37 73.98590087890625
38 69.30966186523438
39 64.91145324707031
40 60.79029846191406
41 56.932579040527344
42 53.30897903442383
43 49.91728973388672
44 46.74325942993164
45 43.77842712402344
46 41.00311279296875
47 38.40