In [1]:
import torch
import torch.nn as nn

In [2]:
n_in, n_h, n_out, batch_size = 10, 5, 1, 10
x = torch.randn(batch_size, n_in)
y = torch.tensor([[1.0], [0.0], [0.0], 
[1.0], [1.0], [1.0], [0.0], [0.0], [1.0], [1.0]])
# Create a model
model = nn.Sequential(nn.Linear(n_in, n_h),
   nn.ReLU(),
   nn.Linear(n_h, n_out),
   nn.Sigmoid())
# Construct the loss function
criterion = torch.nn.MSELoss()
# Construct the optimizer (Stochastic Gradient Descent in this case)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01)

In [3]:
# Gradient Descent
for epoch in range(50):
   # Forward pass: Compute predicted y by passing x to the model
   y_pred = model(x)

   # Compute and print loss
   loss = criterion(y_pred, y)
   print('epoch: ', epoch,' loss: ', loss.item())

   # Zero gradients, perform a backward pass, and update the weights.
   optimizer.zero_grad()

   # perform a backward pass (backpropagation)
   loss.backward()

   # Update the parameters
   optimizer.step()

epoch:  0  loss:  0.25177857279777527
epoch:  1  loss:  0.2514360249042511
epoch:  2  loss:  0.2510942220687866
epoch:  3  loss:  0.25075313448905945
epoch:  4  loss:  0.2504127323627472
epoch:  5  loss:  0.25007307529449463
epoch:  6  loss:  0.24973413348197937
epoch:  7  loss:  0.24939589202404022
epoch:  8  loss:  0.24905835092067719
epoch:  9  loss:  0.24872155487537384
epoch:  10  loss:  0.24838541448116302
epoch:  11  loss:  0.24804998934268951
epoch:  12  loss:  0.2477157860994339
epoch:  13  loss:  0.24739190936088562
epoch:  14  loss:  0.24706850945949554
epoch:  15  loss:  0.24674533307552338
epoch:  16  loss:  0.24642284214496613
epoch:  17  loss:  0.24610111117362976
epoch:  18  loss:  0.24578005075454712
epoch:  19  loss:  0.24545972049236298
epoch:  20  loss:  0.24514012038707733
epoch:  21  loss:  0.2448211908340454
epoch:  22  loss:  0.244503453373909
epoch:  23  loss:  0.24418681859970093
epoch:  24  loss:  0.24387004971504211
epoch:  25  loss:  0.24355393648147583
epo

In [4]:
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5) # 1 input image channel, 6 output channels, 5x5 square convolution kernel
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1   = nn.Linear(16*5*5, 120) # an affine operation: y = Wx + b
        self.fc2   = nn.Linear(120, 84)
        self.fc3   = nn.Linear(84, 10)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2)) # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv2(x)), 2) # If the size is a square you can only specify a single number
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:] # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

net = Net()
net


Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [5]:
from torch.autograd import Variable
params = list(net.parameters())
print(len(params))
print(params[0].size()) # conv1's .weight

input = Variable(torch.randn(1, 1, 32, 32))
out = net(input)
print(input.shape)
print(out.shape)

10
torch.Size([6, 1, 5, 5])
torch.Size([1, 1, 32, 32])
torch.Size([1, 10])


In [6]:
net.zero_grad()
out.backward(torch.rand(1, 10), retain_graph=True)

output = net(input)
print(output)
target = Variable(torch.Tensor([1,2,3,4,5,6,7,8,9,10]))  # a dummy target, for example
criterion = nn.MSELoss()
loss = criterion(output, target)
print(loss)

tensor([[-0.0678, -0.0346, -0.0489,  0.0146,  0.0358,  0.0892,  0.0144, -0.0107,
         -0.0270,  0.0840]], grad_fn=<AddmmBackward>)


  return F.mse_loss(input, target, reduction=self.reduction)


tensor(38.2825, grad_fn=<MseLossBackward>)


In [9]:
print(loss.grad_fn)
net.zero_grad()
print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)
loss.backward()
print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

<MseLossBackward object at 0x0000020D38199128>
conv1.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0.])


RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.

In [8]:
import torch.optim as optim
# create your optimizer
optimizer = optim.SGD(net.parameters(), lr = 0.01)

# in your training loop:
optimizer.zero_grad() # zero the gradient buffers
output = net(input)
print(output)
loss = criterion(output, target)

loss.backward()
optimizer.step() # Does the update

tensor([[-0.0678, -0.0346, -0.0489,  0.0146,  0.0358,  0.0892,  0.0144, -0.0107,
         -0.0270,  0.0840]], grad_fn=<AddmmBackward>)
