In [2]:
import torch
import torch.nn as nn

In [3]:
n_in, n_h, n_out, batch_size = 10, 5, 1, 10
x = torch.randn(batch_size, n_in)
y = torch.tensor([[1.0], [0.0], [0.0], 
[1.0], [1.0], [1.0], [0.0], [0.0], [1.0], [1.0]])
# Create a model
model = nn.Sequential(nn.Linear(n_in, n_h),
   nn.ReLU(),
   nn.Linear(n_h, n_out),
   nn.Sigmoid())
# Construct the loss function
criterion = torch.nn.MSELoss()
# Construct the optimizer (Stochastic Gradient Descent in this case)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01)

In [4]:
# Gradient Descent
for epoch in range(50):
   # Forward pass: Compute predicted y by passing x to the model
   y_pred = model(x)

   # Compute and print loss
   loss = criterion(y_pred, y)
   print('epoch: ', epoch,' loss: ', loss.item())

   # Zero gradients, perform a backward pass, and update the weights.
   optimizer.zero_grad()

   # perform a backward pass (backpropagation)
   loss.backward()

   # Update the parameters
   optimizer.step()

epoch:  0  loss:  0.2524194121360779
epoch:  1  loss:  0.25228434801101685
epoch:  2  loss:  0.25214970111846924
epoch:  3  loss:  0.25201550126075745
epoch:  4  loss:  0.2518816590309143
epoch:  5  loss:  0.2517482340335846
epoch:  6  loss:  0.2516152262687683
epoch:  7  loss:  0.25148260593414307
epoch:  8  loss:  0.25135037302970886
epoch:  9  loss:  0.2512185275554657
epoch:  10  loss:  0.25108709931373596
epoch:  11  loss:  0.2509560286998749
epoch:  12  loss:  0.25082531571388245
epoch:  13  loss:  0.25069499015808105
epoch:  14  loss:  0.2505650818347931
epoch:  15  loss:  0.2504355013370514
epoch:  16  loss:  0.25030630826950073
epoch:  17  loss:  0.25017744302749634
epoch:  18  loss:  0.250048965215683
epoch:  19  loss:  0.24992084503173828
epoch:  20  loss:  0.24979308247566223
epoch:  21  loss:  0.24966567754745483
epoch:  22  loss:  0.2495386153459549
epoch:  23  loss:  0.24941186606884003
epoch:  24  loss:  0.2492854744195938
epoch:  25  loss:  0.24915944039821625
epoch:  

In [9]:
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5) # 1 input image channel, 6 output channels, 5x5 square convolution kernel
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1   = nn.Linear(16*5*5, 120) # an affine operation: y = Wx + b
        self.fc2   = nn.Linear(120, 84)
        self.fc3   = nn.Linear(84, 10)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2)) # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv2(x)), 2) # If the size is a square you can only specify a single number
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:] # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

net = Net()
net


Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [22]:
from torch.autograd import Variable
params = list(net.parameters())
print(len(params))
print(params[0].size()) # conv1's .weight

input = Variable(torch.randn(1, 1, 32, 32))
out = net(input)
print(input.shape)
print(out.shape)

10
torch.Size([6, 1, 5, 5])
torch.Size([1, 1, 32, 32])
torch.Size([1, 10])


In [40]:
net.zero_grad()
out.backward(torch.rand(1, 10), retain_graph=True)

output = net(input)
print(output)
target = Variable(torch.Tensor([1,2,3,4,5,6,7,8,9,10]))  # a dummy target, for example
criterion = nn.MSELoss()
loss = criterion(output, target)
print(loss)

tensor([[-0.0797, -0.0086,  0.1087,  0.0398, -0.0712, -0.1737, -0.0143, -0.1063,
         -0.1455, -0.0326]], grad_fn=<AddmmBackward>)
tensor(39.2281, grad_fn=<MseLossBackward>)


In [48]:
print(loss.grad_fn)
net.zero_grad()
print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)
loss.backward()
print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

<MseLossBackward object at 0x000001F7D7FD6F98>
conv1.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0.])
conv1.bias.grad after backward
tensor([ 0.0609,  0.1111, -0.0187,  0.1179,  0.1608, -0.1728])


In [50]:
import torch.optim as optim
# create your optimizer
optimizer = optim.SGD(net.parameters(), lr = 0.01)

# in your training loop:
optimizer.zero_grad() # zero the gradient buffers
output = net(input)
print(output)
loss = criterion(output, target)

loss.backward()
optimizer.step() # Does the update

tensor([[-0.1111, -0.0172,  0.1096,  0.0920, -0.0292, -0.0740,  0.0320, -0.0200,
         -0.0838,  0.0637]], grad_fn=<AddmmBackward>)
