# <font>Training a PyTorch CNN - Calculate Loss,Gradient & Update Weights<font>

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

torch.set_printoptions(linewidth=120)
torch.set_grad_enabled(True)

<torch.autograd.grad_mode.set_grad_enabled at 0x2c5481d7080>

## <font>Preparing<font>

In [2]:
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [3]:
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
        
    def forward(self, t):
        #(1) input Layer
        t = t
        
        #(2) hidden conv Layer
        t = self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        #(3) hidden conv Layer
        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=1, stride=2)
        
        #(4) hidden Liner Layer
        t = t.reshape(-1,12*4*4)
        t = self.fc1(t)
        t = F.relu(t)
        
        #(5) hidden Linear Layer
        t = self.fc2(t)
        t = F.relu(t)
        
        #(6) output Layer
        t = self.out(t)
        #t = F.softmax(t, dim=1)
        
        return t

In [4]:
train_set = torchvision.datasets.FashionMNIST(
    root='./data/FashionMNIST'
    ,train=True
    ,download=True
    ,transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

In [5]:
train_loader = torch.utils.data.DataLoader(train_set, batch_size=100)

In [6]:
batch = next(iter(train_loader))
images, labels = batch

In [7]:
net = Network()

## <font>Calculating the Loss<font>

In [8]:
preds = net(images)
loss = F.cross_entropy(preds, labels)
loss.item()

2.3017594814300537

## <font>Calculating tht Gradients<font>

In [9]:
print(net.conv1.weight.grad)

None


In [10]:
loss.backward()

In [11]:
net.conv1.weight.grad.shape

torch.Size([6, 1, 5, 5])

In [12]:
#print(net.conv1.weight.grad)

## <font>Updating the Weights

In [13]:
optimizer = optim.Adam(net.parameters(), lr=0.01)

In [14]:
loss.item()

2.3017594814300537

In [15]:
get_num_correct(preds, labels)

9

In [16]:
optimizer.step()

In [17]:
preds = net(images)
loss = F.cross_entropy(preds, labels)

In [18]:
loss.item()

2.282038927078247

In [19]:
get_num_correct(preds, labels)

17

## <font>CNN Training Loop - Neural Network Praogramming

In [None]:
net = Network()

train_loader = torch.utils.data.DataLoader(train_set, batch_size=100)
optimizer = optim.Adam(net.parameters(), lr=0.01)

In [69]:
for epoch in range(5):

    total_loss = 0
    total_correct = 0

    for batch in train_loader:
        images, labels = batch

        preds = net(images)
        loss = F.cross_entropy(preds, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        total_correct += get_num_correct(preds, labels)

    print("epoch:",epoch,"total_correct:",total_correct,"loss:",total_loss)
    print("correct rate",(total_correct / len(train_set)))

epoch: 0 total_correct: 52587 loss: 198.12155584990978
correct rate 0.87645
epoch: 1 total_correct: 52632 loss: 195.82708063721657
correct rate 0.8772
epoch: 2 total_correct: 52752 loss: 192.6270736604929
correct rate 0.8792
epoch: 3 total_correct: 52884 loss: 189.61911921203136
correct rate 0.8814
epoch: 4 total_correct: 53043 loss: 184.71415317058563
correct rate 0.88405


In [21]:
total_correct / len(train_set)

0.8736666666666667

In [22]:
len(train_set)

60000

In [23]:
len(train_set.targets)

60000

## <font>Getting predictions for the entire training set

In [72]:
def get_all_preds(model, loader):
    all_preds = torch.tensor([])
    for batch in loader:
        images, labels = batch
        
        preds = model(images)
        all_preds = torch.cat(
            (all_preds, preds)
            ,dim=0
        )
    return all_preds

In [50]:
prediction_loader = torch.utils.data.DataLoader(train_set,batch_size=10000)

In [51]:
#images, labels = next(iter(prediction_loader))
#ret = net(images)

In [52]:
#ret.shape

In [53]:
train_preds = get_all_preds(net, prediction_loader)

In [54]:
train_preds.shape

torch.Size([60000, 10])

In [55]:
print(train_preds.requires_grad)

True


In [56]:
train_preds.grad



In [57]:
train_preds.grad_fn

<CatBackward at 0x2c54feec518>

In [73]:
with torch.no_grad():
    prediction_loader = torch.utils.data.DataLoader(train_set, batch_size=10000)
    train_preds = get_all_preds(net, prediction_loader)

In [74]:
train_preds.shape

torch.Size([60000, 10])

In [75]:
print(train_preds.requires_grad)

False


In [76]:
train_preds.grad

In [77]:
train_preds.grad_fn

In [78]:
preds_correct = get_num_correct(train_preds, train_set.targets)

In [79]:
print('total correct:', preds_correct)
print('accuracy:', preds_correct / len(train_set))

total correct: 53069
accuracy: 0.8844833333333333


## <font>Building a confusion matrix

In [80]:
train_set.targets

tensor([9, 0, 0,  ..., 3, 0, 5])

In [81]:
train_preds.argmax(dim=1)

tensor([9, 0, 0,  ..., 3, 0, 5])

In [82]:
stacked = torch.stack(
    (
        train_set.targets
        ,train_preds.argmax(dim=1)
    )
    ,dim=1
)

In [83]:
stacked.shape

torch.Size([60000, 2])

In [84]:
stacked

tensor([[9, 9],
        [0, 0],
        [0, 0],
        ...,
        [3, 3],
        [0, 0],
        [5, 5]])

In [85]:
stacked[0].tolist()

[9, 9]

In [91]:
cmt = torch.zeros(10,10, dtype=torch.int64)

In [92]:
cmt

tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [93]:
for p in stacked:
    tl,pl = p.tolist()
    cmt[tl,pl] = cmt[tl,pl] + 1

In [94]:
cmt

tensor([[5621,    2,   55,  116,   12,    3,  162,    0,   29,    0],
        [  15, 5906,    5,   49,    9,    0,   15,    0,    1,    0],
        [ 103,    2, 4786,   85,  614,    0,  369,    1,   40,    0],
        [ 174,  114,   18, 5526,   86,    0,   77,    0,    5,    0],
        [  10,    6,  317,  361, 4841,    0,  400,    0,   65,    0],
        [   3,    0,    0,    0,    0, 5846,    0,  135,    8,    8],
        [1586,    3,  518,  176,  537,    1, 3119,    0,   60,    0],
        [   0,    0,    0,    1,    0,   42,    0, 5839,   10,  108],
        [  17,    5,    9,   18,   12,    4,   54,    8, 5870,    3],
        [   0,    1,    0,    0,    0,   53,    0,  229,    2, 5715]])