### ETL - Extract, Transform, Load data

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from IPython.display import display, clear_output
import pandas as pd
import time
import json

from itertools import product
from collections import namedtuple
from collections import OrderedDict

In [2]:
train_set = torchvision.datasets.FashionMNIST(
    root='../data'
    ,train=True
    ,download=True
    ,transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

In [3]:
from collections import OrderedDict
from collections import namedtuple
from itertools import product

In [4]:
import torch.nn as nn

class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)

        self.fc1 = nn.Linear(in_features=12 * 4 * 4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)

    def forward(self, t):
        # 1 input 
        t = t
        
        # (2) hidden conv layer
        t = self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)

        # (3) hidden conv layer
        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        # (4) hidden linear layer
        t = t.reshape(-1, 12 * 4 * 4)
        t = self.fc1(t)
        t = F.relu(t)

        # (5) hidden linear layer 
        t = self.fc2(t)
        t = F.relu(t)
        
        # (6) output layer
        t = self.out(t)
        #t = F.softmax(t, dim=1)
        return t

In [5]:
class RunBuilder():
    @staticmethod
    def get_runs(params):

        Run = namedtuple('Run', params.keys())
        runs = []
        for v in product(*params.values()):
            runs.append(Run(*v))

        return runs

In [13]:
class RunManager():
    
    def __init__(self):
        self.epoch_count = 0
        self.epoch_loss = 0
        self.epoch_num_correct = 0
        
        self.run_params = None
        self.run_count = 0
        self.run_data = []
        self.run_start_time = None
        
        self.network = None
        self.loader = None
        self.tb = None
        
    def begin_run(self, run, network, loader):
        self.run_start_time = time.time()
        self.run_params = run
        self.run_count += 1
        
        self.network = network
        self.loader = loader
        self.tb = SummaryWriter(comment=f'-{run}')
        
        images, labels = next(iter(self.loader))
        grid = torchvision.utils.make_grid(images)
        
        self.tb.add_image('image', grid)
        self.tb.add_graph(self.network, images)
           
    def end_run(self):
        self.tb.close()
        self.epoch_count = 0
        
    def begin_epoch(self):
        self.epoch_start_time = time.time()

        self.epoch_count += 1
        self.epoch_loss = 0
        self.epoch_num_correct = 0
                                
    def end_epoch(self):

        epoch_duration = time.time() - self.epoch_start_time
        run_duration = time.time() - self.run_start_time

        loss = self.epoch_loss / len(self.loader.dataset)
        accuracy = self.epoch_num_correct / len(self.loader.dataset)

        self.tb.add_scalar('Loss', loss, self.epoch_count)
        self.tb.add_scalar('Accuracy', accuracy, self.epoch_count)

        for name, param in self.network.named_parameters():
            self.tb.add_histogram(name, param, self.epoch_count)
            self.tb.add_histogram(f'{name}.grad', param.grad, self.epoch_count)
            
        results = OrderedDict()
        results["run"] = self.run_count
        results["epoch"] = self.epoch_count
        results['loss'] = loss
        results["accuracy"] = accuracy
        results['epoch duration'] = epoch_duration
        results['run duration'] = run_duration
        for k,v in self.run_params._asdict().items(): 
            results[k] = v
            self.run_data.append(results)

        df = pd.DataFrame.from_dict(self.run_data, orient='columns')
        clear_output(wait=True)
        display(df)
        
    def track_loss(self, loss, batch):
        self.epoch_loss += loss.item() * batch[0].shape[0]

    def track_num_correct(self, preds, labels):
        self.epoch_num_correct += self._get_num_correct(preds, labels)
        
    def _get_num_correct(self, preds, labels):
        return preds.argmax(dim=1).eq(labels).sum().item()
    
    def save(self, fileName):

        pd.DataFrame.from_dict(
            self.run_data, orient='columns'
        ).to_csv(f'{fileName}.csv')

        with open(f'{fileName}.json', 'w', encoding='utf-8') as f:
            json.dump(self.run_data, f, ensure_ascii=False, indent=4)

In [14]:
params = OrderedDict(
    lr = [.01]
    ,batch_size = [1000, 2000]
)

m = RunManager()
for run in RunBuilder.get_runs(params):
    
    network = Network()
    loader = DataLoader(train_set, batch_size=run.batch_size)
    optimizer = optim.Adam(network.parameters(), lr=run.lr)
    
    m.begin_run(run, network, loader)
    for epoch in range(5):
        m.begin_epoch()
        for batch in loader: # Get Batch
            
            images, labels = batch 
            preds = network(images) # Pass Batch
            loss = F.cross_entropy(preds, labels) # Calculate Loss

            optimizer.zero_grad() # this is used because the weights would be accumulated if not reset to zero
            loss.backward() # Calculate Gradients
            optimizer.step() # Update Weights
            
            m.track_loss(loss, batch)
            m.track_num_correct(preds, labels)
            
        m.end_epoch()
    m.end_run()
m.save('result')

Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size
0,1,1,1.036083,0.597333,13.109646,13.808707,0.01,1000
1,1,2,0.559073,0.78285,13.755945,27.667713,0.01,1000
2,1,3,0.46345,0.828983,13.461522,41.212512,0.01,1000
3,1,4,0.402669,0.8511,12.988818,54.279294,0.01,1000
4,1,5,0.369549,0.863133,14.063079,68.418623,0.01,1000
5,2,1,1.288689,0.5069,12.866314,14.301904,0.01,2000
6,2,2,0.688726,0.728217,12.37883,26.751441,0.01,2000
7,2,3,0.573809,0.771433,12.109697,38.94045,0.01,2000
8,2,4,0.51817,0.7991,12.270801,51.285186,0.01,2000
9,2,5,0.470654,0.82445,12.369275,63.729012,0.01,2000


#### explaination

In [8]:
params = OrderedDict(
    lr = [.01, .001]
    ,batch_size = [1000, 10000]
)

In [20]:
for run in RunBuilder.get_runs(params):
    comment = f'-{run}'
    print(comment)

-Run(lr=0.01, batch_size=1000)
-Run(lr=0.01, batch_size=10000)
-Run(lr=0.001, batch_size=1000)
-Run(lr=0.001, batch_size=10000)


#### The above is the same as below step by step

In [9]:
params.keys()

odict_keys(['lr', 'batch_size'])

In [10]:
params.values()

odict_values([[0.01, 0.001], [1000, 10000]])

In [17]:
Run = namedtuple('Run', params.keys()) # set the parameters ordered
runs = []
for v in product(*params.values()):
    runs.append(Run(*v)) # add the values for the parameters

In [18]:
runs

[Run(lr=0.01, batch_size=1000),
 Run(lr=0.01, batch_size=10000),
 Run(lr=0.001, batch_size=1000),
 Run(lr=0.001, batch_size=10000)]

#### Previous

In [4]:
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [None]:
# The best part about TensorBoard is its out-of-the-box capability of tracking 
# our hyperparameters over time and across runs.

#### TensorBoard Histograms And Scalars

In [7]:
parameters = dict(
    lr = [.01, .001]
    ,batch_size = [100, 1000]
    ,shuffle = [True, False]
)

param_values = [v for v in parameters.values()]
param_values

[[0.01, 0.001], [100, 1000], [True, False]]

In [9]:
from itertools import product

In [11]:
# example of a complete loop
network = Network()

for lr, batch_size, shuffle in product(*param_values): 
    comment = f' batch_size={batch_size} lr={lr} shuffle={shuffle}'
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size)
    optimizer = optim.Adam(network.parameters(), lr=lr)

    images, labels = next(iter(train_loader))
    grid = torchvision.utils.make_grid(images)

    tb = SummaryWriter()
    tb.add_image('images', grid)
    tb.add_graph(network, images)

    for epoch in range(3):

        total_loss = 0
        total_correct = 0

        for batch in train_loader: # Get Batch

            images, labels = batch 
            preds = network(images) # Pass Batch
            loss = F.cross_entropy(preds, labels) # Calculate Loss

            optimizer.zero_grad() # this is used because the weights would be accumulated if not reset to zero
            loss.backward() # Calculate Gradients
            optimizer.step() # Update Weights

            total_loss += loss.item()* images.shape[0] # 1
            total_correct += get_num_correct(preds, labels)

        tb.add_scalar('Loss', total_loss, epoch)
        tb.add_scalar('Number Correct', total_correct, epoch)
        tb.add_scalar('Accuracy', total_correct / len(train_set), epoch)

        for name, weight in network.named_parameters():
            tb.add_histogram(name, weight, epoch)
            tb.add_histogram(f'{name}.grad', weight.grad, epoch)

        print(
            "epoch", epoch, 
            "total_correct:", total_correct, 
            "loss:", total_loss
        )

tb.close()

epoch 0 total_correct: 46196 loss: 35514.61341679096
epoch 1 total_correct: 51020 loss: 24499.457749724388
epoch 2 total_correct: 51877 loss: 22003.507828712463
epoch 0 total_correct: 52089 loss: 21535.288953781128
epoch 1 total_correct: 52514 loss: 20343.014292418957
epoch 2 total_correct: 52736 loss: 19872.757725417614
epoch 0 total_correct: 53688 loss: 16914.28190469742
epoch 1 total_correct: 54034 loss: 15656.790286302567
epoch 2 total_correct: 54240 loss: 15146.981626749039
epoch 0 total_correct: 54234 loss: 15078.522741794586
epoch 1 total_correct: 54499 loss: 14334.07486975193
epoch 2 total_correct: 54618 loss: 14164.20367360115
epoch 0 total_correct: 54966 loss: 12930.421817302704
epoch 1 total_correct: 55124 loss: 12544.62364912033
epoch 2 total_correct: 55178 loss: 12317.50683709979
epoch 0 total_correct: 55223 loss: 12145.948259532452
epoch 1 total_correct: 55334 loss: 11961.580868065357
epoch 2 total_correct: 55398 loss: 11788.014801591635
epoch 0 total_correct: 55422 loss:

In [None]:
# 1 For every batch, we are using the batch_size variable to update the total_loss value. 
# 1 We are scaling up the average loss value of the samples in the batch by the batch_size value. 
# 1 However, as we have just discussed, sometimes the last batch will contain fewer samples. 
# 1 Thus, scaling by the predefined batch_size value is inaccurate.