In [1]:
import torch 
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [29]:

class Network(nn.Module):
    def __init__(self):
        super(Network,self).__init__()
        self.conv = nn.Conv2d(in_channels=1,
                       out_channels=3, # Fixed to 3 feature maps
                       kernel_size=2, 
                       stride=1,
                       padding=0) 
        self.bn = nn.BatchNorm2d(3)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(3 * 13 * 13, 10)


    def forward(self, input):
        x = self.conv(input)      # (N, 3, 27, 27)
        x = self.bn(x)            # (N, 3, 27, 27)
        x = self.relu(x)          # (N, 3, 27, 27)
        x = self.maxpool(x)       # (N, 3, 13, 13)
        x = self.flatten(x)       # (N, 507)
        x = self.fc(x)            # (N, 10)
        return x

cnn_network = Network()
# if torch.cuda.is_available():
#     network = network.cuda()
    

In [30]:
dict(cnn_network.named_parameters())

{'conv.weight': Parameter containing:
 tensor([[[[ 0.3816, -0.4627],
           [ 0.3423,  0.2251]]],
 
 
         [[[ 0.1137, -0.4494],
           [ 0.1674,  0.3572]]],
 
 
         [[[-0.0532,  0.1100],
           [ 0.4255, -0.3246]]]], requires_grad=True),
 'conv.bias': Parameter containing:
 tensor([-0.0719, -0.0274,  0.0188], requires_grad=True),
 'bn.weight': Parameter containing:
 tensor([1., 1., 1.], requires_grad=True),
 'bn.bias': Parameter containing:
 tensor([0., 0., 0.], requires_grad=True),
 'fc.weight': Parameter containing:
 tensor([[ 0.0431,  0.0269, -0.0071,  ...,  0.0012,  0.0417, -0.0121],
         [-0.0199,  0.0111,  0.0313,  ...,  0.0073, -0.0351,  0.0155],
         [-0.0189, -0.0205,  0.0046,  ...,  0.0208,  0.0336,  0.0335],
         ...,
         [ 0.0323, -0.0056, -0.0165,  ..., -0.0177, -0.0255,  0.0142],
         [-0.0108,  0.0280, -0.0343,  ...,  0.0280, -0.0361,  0.0182],
         [-0.0312,  0.0435,  0.0265,  ..., -0.0089,  0.0308, -0.0417]],
        requi

In [32]:
cnn_network.fc.weight.size()

torch.Size([10, 507])

In [17]:
train_set = torchvision.datasets.FashionMNIST(
    root = 'FashionMNIST/',
    train = True,
    download = True,
    transform = transforms.Compose([
        transforms.ToTensor()
    ])
)

test_set = torchvision.datasets.FashionMNIST(
    root = 'FashionMNIST/',
    train = False,
    download = True,
    transform = transforms.Compose([
        transforms.ToTensor()
    ])
)

train_loader = torch.utils.data.DataLoader(train_set, batch_size=100)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=100)

In [19]:
images, labels = next(iter(train_loader))

In [22]:
images.shape, labels.shape

(torch.Size([100, 1, 28, 28]), torch.Size([100]))

In [33]:
A_out = cnn_network.forward(images)

In [34]:
A_out.shape

torch.Size([100, 10])

In [35]:
labels.shape

torch.Size([100])

In [36]:
loss = F.cross_entropy(A_out, labels)

In [37]:
loss

tensor(2.4624, grad_fn=<NllLossBackward0>)

In [38]:
def traverse(node, visited=set()):
    if node is None or node in visited:
        return
    visited.add(node)
    print(type(node))
    if hasattr(node, 'next_functions'):
        for f, _ in node.next_functions:
            traverse(f, visited)

traverse(loss.grad_fn)

<class 'NllLossBackward0'>
<class 'LogSoftmaxBackward0'>
<class 'AddmmBackward0'>
<class 'AccumulateGrad'>
<class 'ViewBackward0'>
<class 'MaxPool2DWithIndicesBackward0'>
<class 'ReluBackward0'>
<class 'NativeBatchNormBackward0'>
<class 'ConvolutionBackward0'>
<class 'AccumulateGrad'>
<class 'AccumulateGrad'>
<class 'AccumulateGrad'>
<class 'AccumulateGrad'>
<class 'TBackward0'>
<class 'AccumulateGrad'>


In [40]:
for name, param in cnn_network.named_parameters():
    print(name)
    print(param.grad)

conv.weight
None
conv.bias
None
bn.weight
None
bn.bias
None
fc.weight
None
fc.bias
None


As you can see, none of them have grads yet. This is cos we haven't ran backwards. 

In [41]:
loss.backward()

In [44]:
for name, param in cnn_network.named_parameters():
    print(name)
    print(param.grad)
    print(param.grad.size())

conv.weight
tensor([[[[ 0.0022, -0.0591],
          [-0.0333, -0.0745]]],


        [[[-0.3702, -0.3140],
          [-0.1772, -0.1936]]],


        [[[ 0.0539, -0.0747],
          [-0.0773, -0.1359]]]])
torch.Size([3, 1, 2, 2])
conv.bias
tensor([ 5.5879e-09,  4.1351e-07, -1.1735e-07])
torch.Size([3])
bn.weight
tensor([0.1121, 0.1651, 0.0776])
torch.Size([3])
bn.bias
tensor([0.0711, 0.0261, 0.0548])
torch.Size([3])
fc.weight
tensor([[ 0.0000,  0.0000, -0.0172,  ..., -0.1634,  0.0375,  0.0432],
        [ 0.0000,  0.0000,  0.0008,  ...,  0.0029,  0.0228,  0.0193],
        [ 0.0000,  0.0000, -0.0121,  ..., -0.0481, -0.1238, -0.0419],
        ...,
        [ 0.0000,  0.0000,  0.0014,  ...,  0.0321,  0.0235,  0.0142],
        [ 0.0000,  0.0000,  0.0023,  ...,  0.0607,  0.0272,  0.0226],
        [ 0.0000,  0.0000,  0.0098,  ...,  0.1240,  0.0708,  0.0184]])
torch.Size([10, 507])
fc.bias
tensor([-0.0167, -0.0613, -0.0206, -0.0038, -0.0113, -0.0044, -0.0153, -0.0236,
         0.0815,  0.0755])
t