# Using `torch.nn` to construct neural networks

A Typical training procedure for a neural network is as follows:
1. Define the neural network that has some learnable parameters(or weights)
2. Iterate over a dataset of inputs
3. Process input through the network
4. Compute the loss (how far is the output from being correct)
5. Propagate gradients back into the network's parameters
6. Update the weights of the network, typically using a simple update rule: `weight = weight - learning_rate * gradient`

## Define the network

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):

  def __init__(self):
    super(Net, self).__init__()
    # 1 input image channel, 6 output channels, 5x5 square convolution
    # kernel
    self.conv1 = nn.Conv2d(1, 6, 5)
    self.conv2 = nn.Conv2d(6, 16, 5)
    # an affine operation: y = Wx + b
    self.fc1 = nn.Linear(16 * 5 * 5, 120) # 5*5 from image dimension
    self.fc2 = nn.Linear(120, 84)
    self.fc3 = nn.Linear(84, 10)

  def forward(self, input):
    # Convolution layer C1: 1 input image channel, 6 output channels,
    # 5x5 square convolution, it use RELU activation function, and
    # outputs a Tensor with size (N, 6, 28, 28), where N is the size of the batch

    c1 = F.relu(self.conv1(input))
    # Subsampling layer S2: 2x2 grid, purely functional,
    # this layer does not have any parameter, and outputs a (N, 6, 14, 14) Tensor
    s2 = F.max_pool2d(c1, (2, 2))
    # Convolution layer C3: 6 input channels, 16 output channels,
    # 5x5 square convplution, it uses RELU activation function, and
    # outputs a (N, 16, 10, 10) Tensor
    c3 = F.relu(self.conv2(s2))
    # Subsampleing layer S4: 2x2 grid, purely functional,
    # this layer does not have any parameter, and outputs a (N, 16, 5, 5) Tensor
    s4 = F.max_pool2d(c3, 2)
    # Flatten operation: purely functional, outputs a (N, 400) Tensor
    s4 = torch.flatten(s4, 1)
    # Fully connected layer F5: (N, 400) Tensor input,
    # and outputs a (N, 120) Tensor, it uses RELU activation function
    f5 = F.relu(self.fc1(s4))
    # Fully connected layer F6: (N, 120) Tensor input,
    # and outputs a (N, 120) Tensor, it uses RELU activation function
    f6 = F.relu(self.fc2(f5))
    # Gaussian layer output: (N, 84) Tensor input, and
    # outputs a (N, 10) Tensor
    output = self.fc3(f6)
    return output

net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [4]:
params = list(net.parameters())
print(len(params))
print(params[0].size()) # conv1's .weight

10
torch.Size([6, 1, 5, 5])


In [5]:
input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out)

tensor([[-0.1475, -0.0576,  0.0431, -0.0798, -0.0567,  0.0784, -0.0793, -0.0639,
          0.0687, -0.1004]], grad_fn=<AddmmBackward0>)


In [6]:
net.zero_grad()
out.backward(torch.randn(1, 10))