<a href="https://colab.research.google.com/github/uicids560/Efficient-Deep-Training/blob/main/Cifar100_ResNet18_CRAIG_Epoch_%3D_20.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ResNet18 - CRAIG - CIFAR100
ResNet18 Model Architecture from CORDS: https://github.com/decile-team/cords/blob/844f897ea4ed7e2f9c1453888022c281bb2091be/examples/SL/image_classification/python_notebooks/CORDS_SL_CIFAR10_Custom_Train.ipynb

## Cloning CORDS repository

In [None]:
!git clone https://github.com/decile-team/cords.git
%cd cords/
%ls

fatal: destination path 'cords' already exists and is not an empty directory.
/content/cords
[0m[01;34mbenchmarks[0m/   [01;34mcords[0m/  [01;34mexamples[0m/    [01;34mrequirements[0m/  [01;34mtests[0m/        train_ssl.py
CITATION.CFF  [01;34mdata[0m/   LICENSE.txt  [01;34mresults[0m/       train_hpo.py
[01;34mconfigs[0m/      [01;34mdocs[0m/   README.md    setup.py       train_sl.py


## Install prerequisite libraries of CORDS

In [None]:
!pip install dotmap
!pip install apricot-select
!pip install ray[default]
!pip install ray[tune]

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


## Import necessary libraries

In [None]:
import time
import numpy as np
import os
import torch
import torch.nn as nn
import torch.optim as optim
from cords.utils.data.datasets.SL import gen_dataset
from torch.utils.data import Subset
from cords.utils.config_utils import load_config_data
import os.path as osp
from cords.utils.data.data_utils import WeightedSubset
from ray import tune

## Loading the CIFAR100 dataset

In [None]:
trainset, validset, testset, num_cls = gen_dataset('data/', 'cifar100', None, isnumpy=False)

Files already downloaded and verified
Files already downloaded and verified


## Create dataloaders

In [None]:
trn_batch_size = 20
val_batch_size = 20
tst_batch_size = 1200

# Creating the Data Loaders
trainloader = torch.utils.data.DataLoader(trainset, batch_size=trn_batch_size,
                                          shuffle=False, pin_memory=True)

valloader = torch.utils.data.DataLoader(validset, batch_size=val_batch_size,
                                        shuffle=False, pin_memory=True)

testloader = torch.utils.data.DataLoader(testset, batch_size=tst_batch_size,
                                          shuffle=False, pin_memory=True)

## Defining Model

In [None]:
from cords.utils.models import ResNet18
numclasses = 100
device = 'cuda' #Device Argument
model = ResNet18(100)
model = model.to(device)

## Defining Loss Functions

In [None]:
criterion = nn.CrossEntropyLoss()
criterion_nored = nn.CrossEntropyLoss(reduction='none')

## Cumulative time calculation

In [None]:
def generate_cumulative_timing(mod_timing):
    tmp = 0
    mod_cum_timing = np.zeros(len(mod_timing))
    for i in range(len(mod_timing)):
        tmp += mod_timing[i]
        mod_cum_timing[i] = tmp
    return mod_cum_timing / 3600

## Defining Optimizers and schedulers

In [None]:
optimizer = optim.SGD(model.parameters(), lr=1e-2,
                                  momentum=0.9,
                                  weight_decay=5e-4,
                                  nesterov=False)

#T_max is the maximum number of scheduler steps. Here we are using the number of epochs as the maximum number of scheduler steps.

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                       T_max=300)

## Get logger object for logging

In [None]:
def __get_logger(results_dir):
  os.makedirs(results_dir, exist_ok=True)
  # setup logger
  plain_formatter = logging.Formatter("[%(asctime)s] %(name)s %(levelname)s: %(message)s",
                                      datefmt="%m/%d %H:%M:%S")
  logger = logging.getLogger(__name__)
  logger.setLevel(logging.INFO)
  s_handler = logging.StreamHandler(stream=sys.stdout)
  s_handler.setFormatter(plain_formatter)
  s_handler.setLevel(logging.INFO)
  logger.addHandler(s_handler)
  f_handler = logging.FileHandler(os.path.join(results_dir, "results.log"))
  f_handler.setFormatter(plain_formatter)
  f_handler.setLevel(logging.DEBUG)
  logger.addHandler(f_handler)
  logger.propagate = False
  return logger

## Instantiating logger file for logging the information

In [None]:
import logging
import os
import os.path as osp
import sys

#Results logging directory
results_dir = osp.abspath(osp.expanduser('results'))
logger = __get_logger(results_dir)

## Instantiating CRAIG subset selection dataloaders

In [None]:
from cords.utils.data.dataloader.SL.adaptive import CRAIGDataLoader
from dotmap import DotMap

selection_strategy = 'CRAIG'
dss_args = dict(model=model,
                loss=criterion_nored,
                eta=0.1,
                num_classes=100,
                num_epochs=20,
                device='cuda',
                fraction=0.1,
                select_every=20,
                kappa=0,
                linear_layer=False,
                selection_type='SL',
                optimizer=optimizer,
                if_convex=False)
dss_args = DotMap(dss_args)

dataloader = CRAIGDataLoader(trainloader, valloader, dss_args, logger, 
                                  batch_size=20,
                                  shuffle=False,
                                  pin_memory=False)

[11/16 20:24:35] __main__ INFO: CRAIG dataloader initialized. 


## Arguments for training and evaluation

In [None]:
#Training Arguments
num_epochs = 20

#Arguments for results logging
print_every = 1
print_args = ["val_loss", "val_acc", "tst_loss", "tst_acc", "time"]

#Evaluation Metrics
trn_losses = list()
val_losses = list()
tst_losses = list()
subtrn_losses = list()
timing = list()
trn_acc = list()
val_acc = list()  
tst_acc = list()  
subtrn_acc = list()

## Training loop with evaluation

In [None]:
#Training Loop
for epoch in range(num_epochs):
    subtrn_loss = 0
    subtrn_correct = 0
    subtrn_total = 0
    model.train()
    start_time = time.time()
    for _, (inputs, targets, weights) in enumerate(dataloader):
        inputs = inputs.to(device)
        targets = targets.to(device, non_blocking=True)
        weights = weights.to(device)  
        optimizer.zero_grad()
        outputs = model(inputs)
        losses = criterion_nored(outputs, targets)
        loss = torch.dot(losses, weights/(weights.sum()))
        loss.backward()
        subtrn_loss += loss.item()
        optimizer.step()
        _, predicted = outputs.max(1)
        subtrn_total += targets.size(0)
        subtrn_correct += predicted.eq(targets).sum().item()
    epoch_time = time.time() - start_time
    scheduler.step()
    timing.append(epoch_time)

    #Evaluation Loop
    if (epoch + 1) % print_every == 0:
        trn_loss = 0
        trn_correct = 0
        trn_total = 0
        val_loss = 0
        val_correct = 0
        val_total = 0
        tst_correct = 0
        tst_total = 0
        tst_loss = 0
        model.eval()

        if ("trn_loss" in print_args) or ("trn_acc" in print_args):
            with torch.no_grad():
                for _, (inputs, targets) in enumerate(trainloader):
                    inputs, targets = inputs.to(device), \
                                      targets.to(device, non_blocking=True)
                    outputs = model(inputs)
                    loss = criterion(outputs, targets)
                    trn_loss += loss.item()
                    if "trn_acc" in print_args:
                        _, predicted = outputs.max(1)
                        trn_total += targets.size(0)
                        trn_correct += predicted.eq(targets).sum().item()
                trn_losses.append(trn_loss)

            if "trn_acc" in print_args:
                trn_acc.append(trn_correct / trn_total)

        if ("val_loss" in print_args) or ("val_acc" in print_args):
            with torch.no_grad():
                for _, (inputs, targets) in enumerate(valloader):
                    inputs, targets = inputs.to(device), \
                                      targets.to(device, non_blocking=True)
                    outputs = model(inputs)
                    loss = criterion(outputs, targets)
                    val_loss += loss.item()
                    if "val_acc" in print_args:
                        _, predicted = outputs.max(1)
                        val_total += targets.size(0)
                        val_correct += predicted.eq(targets).sum().item()
                val_losses.append(val_loss)

            if "val_acc" in print_args:
                val_acc.append(val_correct / val_total)

        if ("tst_loss" in print_args) or ("tst_acc" in print_args):
            with torch.no_grad():
                for _, (inputs, targets) in enumerate(testloader):
                    inputs, targets = inputs.to(device), \
                                      targets.to(device, non_blocking=True)
                    outputs = model(inputs)
                    loss = criterion(outputs, targets)
                    tst_loss += loss.item()
                    if "tst_acc" in print_args:
                        _, predicted = outputs.max(1)
                        tst_total += targets.size(0)
                        tst_correct += predicted.eq(targets).sum().item()
                tst_losses.append(tst_loss)

            if "tst_acc" in print_args:
                tst_acc.append(tst_correct / tst_total)

        if "subtrn_acc" in print_args:
            subtrn_acc.append(subtrn_correct / subtrn_total)

        if "subtrn_losses" in print_args:
            subtrn_losses.append(subtrn_loss)

        print_str = "Epoch: " + str(epoch + 1)

        #Results Printing
        for arg in print_args:

            if arg == "val_loss":
                print_str += " , " + "Validation Loss: " + str(val_losses[-1])

            if arg == "val_acc":
                print_str += " , " + "Validation Accuracy: " + str(val_acc[-1])

            if arg == "tst_loss":
                print_str += " , " + "Test Loss: " + str(tst_losses[-1])

            if arg == "tst_acc":
                print_str += " , " + "Test Accuracy: " + str(tst_acc[-1])

            if arg == "trn_loss":
                print_str += " , " + "Training Loss: " + str(trn_losses[-1])

            if arg == "trn_acc":
                print_str += " , " + "Training Accuracy: " + str(trn_acc[-1])

            if arg == "subtrn_loss":
                print_str += " , " + "Subset Loss: " + str(subtrn_losses[-1])

            if arg == "subtrn_acc":
                print_str += " , " + "Subset Accuracy: " + str(subtrn_acc[-1])

            if arg == "time":
                print_str += " , " + "Timing: " + str(timing[-1])

        logger.info(print_str)

[11/16 20:24:49] __main__ INFO: Epoch: 1 , Validation Loss: 1085.5097072124481 , Validation Accuracy: 0.0478 , Test Loss: 39.365739822387695 , Test Accuracy: 0.0473 , Timing: 8.330830812454224
[11/16 20:25:00] __main__ INFO: Epoch: 2 , Validation Loss: 1037.0730621814728 , Validation Accuracy: 0.0652 , Test Loss: 37.51666879653931 , Test Accuracy: 0.0637 , Timing: 5.453387260437012
[11/16 20:25:11] __main__ INFO: Epoch: 3 , Validation Loss: 1007.6160254478455 , Validation Accuracy: 0.0764 , Test Loss: 36.371790409088135 , Test Accuracy: 0.0804 , Timing: 5.452266693115234
[11/16 20:25:22] __main__ INFO: Epoch: 4 , Validation Loss: 996.350784778595 , Validation Accuracy: 0.0924 , Test Loss: 35.975873947143555 , Test Accuracy: 0.0908 , Timing: 5.45169997215271
[11/16 20:25:33] __main__ INFO: Epoch: 5 , Validation Loss: 980.8410398960114 , Validation Accuracy: 0.1012 , Test Loss: 35.451478004455566 , Test Accuracy: 0.1031 , Timing: 5.428737163543701
[11/16 20:25:44] __main__ INFO: Epoch: 6

## Results Summary

In [None]:
print("CORDS Selection Method: {0:s}".format(selection_strategy))
print("Final SubsetTrn: {0:f}".format(subtrn_loss))
if "val_loss" in print_args:
    if "val_acc" in print_args:
        print("Validation Loss:", val_loss, "/ Validation Accuracy:", val_acc[-1])
    else:
        print("Validation Loss:", val_loss)

if "tst_loss" in print_args:
    if "tst_acc" in print_args:
        print("Test Loss:", tst_loss, "/ Test Accuracy:", tst_acc[-1])
    else:
        print("Test Data Loss:", tst_loss)

if "time" in print_args:
    time_str = "Time, "
    for t in timing:
        time_str = time_str + " , " + str(t)

timing_array = np.array(timing)
cum_timing = list(generate_cumulative_timing(timing_array))
print("Total time taken by", selection_strategy, "=", cum_timing[-1])

CORDS Selection Method: CRAIG
Final SubsetTrn: 443.521102
Validation Loss: 1027.9132301807404 / Validation Accuracy: 0.1614
Test Loss: 38.59984874725342 / Test Accuracy: 0.1711
Total time taken by CRAIG = 0.03126797238985697
