#### Torch CNN - CIFAR10

In [44]:
import PIL
import torch
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms
import os, sys

from pathlib import Path
sys.path.append(str(Path('.ipynb').resolve().parents[2]))

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

def mini_batching(BATCH_SIZE, DOWNLOAD = False, SUBSET = 0):
    train_transform = transforms.Compose(
        [transforms.RandomHorizontalFlip(p=0.5),
         transforms.RandomAffine(degrees=(-5, 5), translate=(0.1, 0.1), scale=(0.9, 1.1)),
         transforms.ToTensor(),
         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    test_transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    # Load dataset
    train_dataset = torchvision.datasets.CIFAR10(root='/home/tung5534/cnn_cifar/data', 
                                                train=True,
                                                download=DOWNLOAD,
                                                transform=train_transform,
                                                )
    test_dataset = torchvision.datasets.CIFAR10(root='/home/tung5534/cnn_cifar/data', 
                                                train=False,
                                                download=DOWNLOAD,
                                                transform=test_transform,
                                                )
    if SUBSET != 0:
        subset_indices = list(range(SUBSET))
        train_set = torch.utils.data.Subset(train_dataset, subset_indices)
        test_set = torch.utils.data.Subset(test_dataset, subset_indices)
        print(f"Using a subset of {SUBSET} samples for training and testing.")
    else:
        train_set, test_set = train_dataset, test_dataset
        print("Using the full dataset for training and testing.")

    # Create data loaders
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False)
    
    return train_loader, test_loader

cuda


In [45]:
def save_performance(save_path, method,
                     train_losses, test_losses, train_errs, 
                     test_errs, train_accs, test_acc, run_times,
                     n_step, lr, batch_size):
    import os
    import json

    if not os.path.exists(save_path):
        os.makedirs(save_path)
        
    lr = str(lr).replace('.', '')

    performance = {
        'train_losses': train_losses,
        'test_losses': test_losses,
        'train_errs': train_errs,
        'test_errs': test_errs,
        'train_accs': train_accs,
        'test_acc': test_acc,
        'run_time': run_times,
        'n_step': n_step,
        'lr':lr,
        'batch_size':batch_size,
    }
    with open(f'{save_path}/{method}_{n_step}_{lr}_{batch_size}.json', 'w') as f:
        json.dump(performance, f, indent=4)
    

In [46]:
def evaluate_model(model, criterion, test_loader):
    model.eval() 
    _test_acc, _test_err, _test_loss, total_test = 0, 0, 0, 0
    with torch.no_grad(): 
        for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total_test += labels.size(0)
                _test_acc += (predicted == labels).sum().item()
                _test_err += (predicted != labels).sum().item()
                _test_loss += criterion(outputs, labels).item()

    test_loss = _test_loss / total_test
    test_err = 100 * _test_err / total_test
    test_acc = 100 * _test_acc / total_test

    return test_loss, test_err, test_acc

In [47]:
import os, json

def viz_scores(scores_path, threshold):
    contents = os.walk(scores_path)
    lr_runtime, lr_epochs, runtimes, epochs, lrs = {}, {}, [], [], []
    train_loss_dict = {}
    train_acc_dict = {}

    for root, dirs, files in contents:
        for f in files:
            if f.endswith("json"):
                _path = os.path.join(root, f)
                with open(_path, 'r') as file:
                    data = json.load(file)
                lrs.append(float(data['n_step'][0] + '.' + data['n_step'][1:]))
                runtimes.append(sum(data['run_time']))
                epochs.append(len(data['run_time']))

                train_loss_dict[data['n_step']] = data['train_losses']
                train_acc_dict[data['n_step']] = data['train_accs']
    lr_runtime['lr'] = lrs
    lr_runtime['run_time'] = runtimes
    lr_epochs['lr'] = lrs
    lr_epochs['nepochs'] = epochs
    
    import matplotlib.pyplot as plt
    fig, ax1 = plt.subplots(figsize=(10, 6))
    ax2 = ax1.twinx()

    ax1.plot(lr_runtime['lr'], lr_runtime['run_time'], label='Runtime', marker='o',linestyle='', color='blue', alpha=.5)
    ax2.plot(lr_epochs['lr'], lr_epochs['nepochs'], label='Epochs', marker='o', linestyle='', color='red', alpha=.5)

    ax1.set_ylabel('Runtime (s)')
    ax2.set_ylabel('Epochs')

    ax1.set_xlabel('Stepsize')

    lines, labels = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax2.legend(lines + lines2, labels + labels2, loc='best')

    plt.title(f'Step size evaluation to get {threshold}% train acc')
    plt.grid(True)
    plt.show()
    return train_loss_dict, train_acc_dict

In [48]:
from pathlib import Path
str(Path('.ipynb').resolve().parents[2])

'/home/tung5534/cnn_cifar'

In [52]:
import os, sys
from pathlib import Path

sys.path.append(str(Path('.ipynb').resolve().parents[2]))

from models import SimpleCNN
from optim.sgd_sngl import OneStepSGD
from optim.sgd_mult_v2 import ManyStepSGD
import time 

def modeling(train_loader, test_loader, n_step=2, n_epochs=15, lr=0.133, momentum=0, threshold=90):
    model = SimpleCNN().to(device)
    criterion = nn.CrossEntropyLoss()

    if n_step == 1:
        optimizer = OneStepSGD(model.parameters(), lr=lr, momentum=momentum)
    else:
        optimizer = ManyStepSGD(model.parameters(), lr=lr, momentum=momentum, n_step=n_step)

    train_losses, test_losses, train_errs, test_errs, train_accs, test_accs, run_times = [], [], [], [], [], [], []
    
    def closure():
        optimizer.zero_grad()
        output = model(images)
        loss = criterion(output, labels)
        loss.backward()
        return loss
    
    for i, epoch in enumerate(range(n_epochs)):
#         print(f"Epoch: {i+1}/{n_epochs}")
        model.train() 
        total_train, _train_err, _train_acc, running_loss, run_time = 0, 0, 0, 0.0, 0
        _start = time.time()

        for i, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            
            if n_step == 1:
                # Forward
                outputs = model(images)
                loss = criterion(outputs, labels)
                # Backward
                loss.backward()
                optimizer.step()
                
            else:
                # Forward
                outputs = model(images)
                # Backward
                loss = optimizer.step(closure)

            running_loss += loss.item()
            # Training accuracy
            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            _train_err += (predicted != labels).sum().item()
            _train_acc += (predicted == labels).sum().item()

        epoch_train_loss = running_loss / total_train
        epoch_train_acc = 100 * _train_acc / total_train
        epoch_train_err = 100 * _train_err / total_train
        run_time = time.time() - _start

        test_loss, test_err, test_acc = evaluate_model(model, criterion, test_loader)

        train_losses.append(epoch_train_loss)
        train_errs.append(epoch_train_err)
        train_accs.append(epoch_train_acc)
        test_losses.append(test_loss)
        test_errs.append(test_err)
        test_accs.append(test_acc)
        run_times.append(run_time)
        
        if epoch % 1 == 0:
            print(f'E [{epoch+1}/{n_epochs}]. train_loss_acc: {running_loss / len(train_loader):.4f}, {epoch_train_acc:.2f}%, '
                    f'test_acc: {test_acc:.2f}%, run_time: {run_time}')
        if epoch_train_acc >= threshold:
            print(f"Early stopping at epoch {epoch+1} with train acc {epoch_train_acc:.2f}%")
            break
    return train_losses, test_losses, train_errs, test_errs, train_accs, test_accs, run_times

In [61]:
from plot import metrics_plot
n_epochs = 30
n_step = 10
lr = 0.133
threshold = 85
BATCH_SIZES = [128, 256, 512, #1024, 2048, 4096]
for batch_size in BATCH_SIZES:
    train_loader, test_loader = mini_batching(batch_size)
    train_losses, test_losses, train_errs, test_errs, train_accs, test_accs, run_times = modeling(n_step=n_step, 
                                                                                               n_epochs=n_epochs, 
                                                                                               lr=lr,
                                                                                               threshold=threshold,
                                                                                               train_loader=train_loader,
                                                                                               test_loader=test_loader,
                                                                                      )
    save_path = 'scores/E30T85'
    method = 'SGD'
    save_performance(save_path, method,
                     train_losses, test_losses, train_errs, 
                     test_errs, train_accs, test_accs, run_times, n_step, lr, batch_size
                     )
#     actual_nepochs = len(train_losses)
#     metrics_plot(actual_nepochs, train_losses, test_losses, train_accs, test_accs, train_errs, test_errs)

Using the full dataset for training and testing.
E [1/30]. train_loss_acc: 0.0826, 48.13%, test_acc: 61.54%, run_time: 13.919124841690063
E [2/30]. train_loss_acc: 0.0114, 62.51%, test_acc: 67.68%, run_time: 13.920006275177002
E [3/30]. train_loss_acc: 0.0092, 68.31%, test_acc: 71.11%, run_time: 13.922752141952515
E [4/30]. train_loss_acc: 0.0082, 71.39%, test_acc: 73.35%, run_time: 13.916241645812988
E [5/30]. train_loss_acc: 0.0075, 74.07%, test_acc: 74.70%, run_time: 13.931274890899658
E [6/30]. train_loss_acc: 0.0070, 75.88%, test_acc: 76.68%, run_time: 13.943676233291626
E [7/30]. train_loss_acc: 0.0063, 77.50%, test_acc: 76.17%, run_time: 13.931981086730957
E [8/30]. train_loss_acc: 0.0059, 78.78%, test_acc: 79.08%, run_time: 13.915026426315308
E [9/30]. train_loss_acc: 0.0055, 79.96%, test_acc: 78.23%, run_time: 13.91064977645874
E [10/30]. train_loss_acc: 0.0051, 80.94%, test_acc: 78.80%, run_time: 13.939643383026123
E [11/30]. train_loss_acc: 0.0049, 82.09%, test_acc: 79.80%, 

KeyboardInterrupt: 

In [None]:
scores_path_1 = "scores/E20T60"
train_loss_dict_1, train_acc_dict_1 = viz_scores(scores_path_1, threshold=60)
scores_path_2 = "scores/E30T90"
train_loss_dict_2, train_acc_dict_2 = viz_scores(scores_path_2, threshold=80)

In [8]:
pwd

'/home/tung5534/cnn_cifar/W08/optimal_batch_size'

## Compare Single vs Multi Descent Steps SGD

In [None]:
import json
import matplotlib.pyplot as plt

sngl_sgd = "scores/E30T90/SGD_lr_0133_sngl.json"
mult_05_sgd = "scores/E30T90/SGD_lr_0133_mult_05.json"
mult_03_sgd = "scores/E30T90/SGD_lr_0133_mult_03.json"
mult_10_sgd = "scores/E30T90/SGD_lr_0133_mult_10.json"

with open(sngl_sgd, 'r') as f:
    sngl_scores = json.load(f)
with open(mult_03_sgd, 'r') as f:
    mult_03_scores = json.load(f)
with open(mult_05_sgd, 'r') as f:
    mult_05_scores = json.load(f)
with open(mult_10_sgd, 'r') as f:
    mult_10_scores = json.load(f)

print(sngl_scores.keys())
plt.figure(figsize=(10,6))
plt.plot(range(1, len(sngl_scores['train_errs'][:30])+1), sngl_scores['train_errs'][:30], linestyle='-', label='Single SGD')
plt.plot(range(1, len(mult_03_scores['train_errs'])+1), mult_03_scores['train_errs'], linestyle='-', label='Multi (03) SGD')
plt.plot(range(1, len(mult_05_scores['train_errs'])+1), mult_05_scores['train_errs'], linestyle='-', label='Multi (05) SGD')
plt.plot(range(1, len(mult_10_scores['train_errs'])+1), mult_10_scores['train_errs'], linestyle='-', label='Multi (10) SGD')
plt.xlabel('Epochs')
plt.ylabel('Training Error Rate (%)')
plt.title('Performance Comparison of Single vs Multi - Descent Steps SGD')
plt.legend()
plt.show()