In [None]:
%matplotlib inline


Neural Networks
===============

Neural networks can be constructed using the ``torch.nn`` package.

Now that you had a glimpse of ``autograd``, ``nn`` depends on
``autograd`` to define models and differentiate them.
An ``nn.Module`` contains layers, and a method ``forward(input)``\ that
returns the ``output``.

------------------

Let’s define this network:
This network has 2 linear layer with sigmoid activation


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(2, 2) 
        self.fc2 = nn.Linear(2, 2)

    def forward(self, x):
        x = torch.sigmoid(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        return x


net = Net()
print(net)

Net(
  (fc1): Linear(in_features=2, out_features=2, bias=True)
  (fc2): Linear(in_features=2, out_features=2, bias=True)
)


Printing the weights of the Network

In [None]:
params = list(net.parameters())
print(params)


[Parameter containing:
tensor([[ 0.3649, -0.3526],
        [-0.5054,  0.0098]], requires_grad=True), Parameter containing:
tensor([ 0.0160, -0.6365], requires_grad=True), Parameter containing:
tensor([[-0.2593, -0.1377],
        [ 0.2917,  0.1411]], requires_grad=True), Parameter containing:
tensor([0.2468, 0.5889], requires_grad=True)]


Setting the weights to some initial value

In [None]:
with torch.no_grad():
    
    net.fc1.weight=torch.nn.Parameter(torch.tensor([[0.15,0.2],[0.25,0.3]]))
    net.fc1.bias=torch.nn.Parameter(torch.tensor([0.35,0.35]))
    print(net.fc1.weight)
    print(net.fc1.bias)
    net.fc2.weight=torch.nn.Parameter(torch.tensor([[0.4,0.45],[0.5,0.55]]))
    net.fc2.bias=torch.nn.Parameter(torch.tensor([0.6,0.6]))
    print(net.fc2.weight)
    print(net.fc2.bias)

Parameter containing:
tensor([[0.1500, 0.2000],
        [0.2500, 0.3000]], requires_grad=True)
Parameter containing:
tensor([0.3500, 0.3500], requires_grad=True)
Parameter containing:
tensor([[0.4000, 0.4500],
        [0.5000, 0.5500]], requires_grad=True)
Parameter containing:
tensor([0.6000, 0.6000], requires_grad=True)


Network Forward Pass 

In [None]:
input=torch.tensor([0.05,0.1])
output = net(input)
target = torch.tensor([0.01,0.99])
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)
print(output)

tensor(0.2984, grad_fn=<MseLossBackward>)
tensor([0.7514, 0.7729], grad_fn=<SigmoidBackward>)


In [None]:
print(loss.grad_fn)  # MSELoss
print(loss.grad_fn.next_functions[0][0])  # Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])  # ReLU

<MseLossBackward object at 0x7fb986a58ba8>
<SigmoidBackward object at 0x7fb986a58d30>
<AddBackward0 object at 0x7fb986a58ba8>


Backward Pass and Gradients w.r.t weights

In [None]:
net.zero_grad()     # zeroes the gradient buffers of all parameters

print('conv1.bias.grad before backward')
print(net.fc2.weight.grad)

loss.backward()

print('conv1.bias.grad after backward')
print(net.fc2.weight.grad)
print(net.fc1.weight.grad)

conv1.bias.grad before backward
tensor([[0., 0.],
        [0., 0.]])
conv1.bias.grad after backward
tensor([[ 0.0822,  0.0827],
        [-0.0226, -0.0227]])
tensor([[0.0004, 0.0009],
        [0.0005, 0.0010]])


Optimizing the weights with regards to gradients

In [None]:
import torch.optim as optim

# create your optimizer
optimizer = optim.SGD(net.parameters(), lr=0.01)

# in your training loop:
optimizer.zero_grad()   # zero the gradient buffers
output = net(input) # FORWARD
loss = criterion(output, target) # LOSS
loss.backward() # BACKWARD
optimizer.step()    # Does the update

.. Note::

      Observe how gradient buffers had to be manually set to zero using
      ``optimizer.zero_grad()``. This is because gradients are accumulated
      as explained in the `Backprop`_ section.

