### ETL - Extract, Transform, Load data

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import torchvision
import torchvision.transforms as transforms
torch.set_grad_enabled(True)

<torch.autograd.grad_mode.set_grad_enabled at 0x7fcd28891650>

In [2]:
train_set = torchvision.datasets.FashionMNIST(
    root='../data'
    ,train=True
    ,download=True
    ,transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

In [18]:
import torch.nn as nn

class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)

        self.fc1 = nn.Linear(in_features=12 * 4 * 4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)

    def forward(self, t):
        # 1 input 
        t = t
        
        # (2) hidden conv layer
        t = self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)

        # (3) hidden conv layer
        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        # (4) hidden linear layer
        t = t.reshape(-1, 12 * 4 * 4)
        t = self.fc1(t)
        t = F.relu(t)

        # (5) hidden linear layer
        t = self.fc2(t)
        t = F.relu(t)
        
        # (6) output layer
        t = self.out(t)
        #t = F.softmax(t, dim=1)
        return t

In [19]:
network = Network()
train_loader = torch.utils.data.DataLoader(train_set
    ,batch_size=100
    ,shuffle=True
)

In [20]:
batch = next(iter(train_loader))

In [21]:
images, labels = batch

#### calculating the lossm

In [22]:
preds = network(images)

In [23]:
loss = F.cross_entropy(preds, labels) # Calculating the loss

In [24]:
loss.item()

2.3108553886413574

In [25]:
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [26]:
get_num_correct(preds, labels)

10

#### calculating the gradients

In [27]:
network.conv1.weight.grad

In [28]:
loss.backward()

In [29]:
network.conv1.weight.grad.shape

torch.Size([6, 1, 5, 5])

#### updating the weights

In [30]:
optimizer = optim.Adam(network.parameters(), lr=0.01)
optimizer.step() # Updating the weights

In [None]:
# When the step() function is called, the optimizer updates the weights using the gradients that are stored
# in the network's parameters. This means that we should expect our loss to be reduced if we pass the same 
# batch through the network again. Checking this, we can see that this is indeed the case:

In [31]:
preds = network(images)

In [32]:
loss.item()

2.3108553886413574

In [33]:
loss = F.cross_entropy(preds, labels)
loss.item()

2.2920010089874268

In [34]:
get_num_correct(preds, labels)

13

#### summary of the above using a single batch

In [35]:
network = Network()

train_loader = torch.utils.data.DataLoader(train_set, batch_size=100)
optimizer = optim.Adam(network.parameters(), lr=0.01)

batch = next(iter(train_loader)) # Get Batch
images, labels = batch

preds = network(images) # Pass Batch
loss = F.cross_entropy(preds, labels) # Calculate Loss

loss.backward() # Calculate Gradients
optimizer.step() # Update Weights

print('loss1:', loss.item())
preds = network(images)
loss = F.cross_entropy(preds, labels)
print('loss2:', loss.item())

loss1: 2.293233633041382
loss2: 2.2485525608062744


### training for an epoch - for loop

In [36]:
network = Network()

train_loader = torch.utils.data.DataLoader(train_set, batch_size=100)
optimizer = optim.Adam(network.parameters(), lr=0.01)

total_loss = 0
total_correct = 0

for batch in train_loader: # Get Batch
    images, labels = batch 

    preds = network(images) # Pass Batch
    loss = F.cross_entropy(preds, labels) # Calculate Loss

    optimizer.zero_grad() # this is used because the weights would be accumulated if not reset to zero
    loss.backward() # Calculate Gradients
    optimizer.step() # Update Weights

    total_loss += loss.item()
    total_correct += get_num_correct(preds, labels)

print(
    "epoch:", 0, 
    "total_correct:", total_correct, 
    "loss:", total_loss
)

epoch: 0 total_correct: 47258 loss: 338.834072843194


In [37]:
total_correct / len(train_set)

0.7876333333333333

#### Training With Multiple Epochs

In [38]:
train_loader = torch.utils.data.DataLoader(train_set, batch_size=100)
optimizer = optim.Adam(network.parameters(), lr=0.01)

for epoch in range(10):

    total_loss = 0
    total_correct = 0

    for batch in train_loader: # Get Batch
        images, labels = batch 

        preds = network(images) # Pass Batch
        loss = F.cross_entropy(preds, labels) # Calculate Loss

        optimizer.zero_grad()
        loss.backward() # Calculate Gradients
        optimizer.step() # Update Weights

        total_loss += loss.item()
        total_correct += get_num_correct(preds, labels)

    print(
        "epoch", epoch, 
        "total_correct:", total_correct, 
        "loss:", total_loss
    )

epoch 0 total_correct: 51447 loss: 234.48041023314
epoch 1 total_correct: 52281 loss: 209.84101347625256
epoch 2 total_correct: 52598 loss: 198.36338838934898
epoch 3 total_correct: 52907 loss: 192.3293892070651
epoch 4 total_correct: 53132 loss: 185.98707918822765
epoch 5 total_correct: 53142 loss: 186.43825101852417
epoch 6 total_correct: 53253 loss: 182.1532327234745
epoch 7 total_correct: 53450 loss: 175.79061660915613
epoch 8 total_correct: 53438 loss: 177.7160139232874
epoch 9 total_correct: 53480 loss: 176.5473354831338
