# <a id='toc1_'></a>[Evaluate Classification Models](#toc0_)

**Table of contents**<a id='toc0_'></a>    
- [Evaluate Classification Models](#toc1_)    
  - [Setup for evaluating the classifier](#toc1_1_)    
  - [Prepare Dataloader](#toc1_2_)    
    - [Standard ImageNet Validation Dataset](#toc1_2_1_)    
  - [Prepare Model](#toc1_3_)    
  - [Evaluate on ImageNet Validation Dataset](#toc1_4_)    
    - [Evaluate by class](#toc1_4_1_)    
    - [Save the results](#toc1_4_2_)    

<!-- vscode-jupyter-toc-config
	numbering=false
	anchor=true
	flat=false
	minLevel=1
	maxLevel=6
	/vscode-jupyter-toc-config -->
<!-- THIS CELL WILL BE REPLACED ON TOC UPDATE. DO NOT WRITE YOUR TEXT IN THIS CELL -->

In [1]:
import json
import os

import torch
from torch import nn
from torchvision.models import resnet18, mobilenet_v2

from tools import data, utils

## <a id='toc1_1_'></a>[Setup for evaluating the classifier](#toc0_)

In [2]:
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

In [3]:
setup = {
    "data_path": "./data/imagewoof",
    "device": "cuda",
    "batch_size": 32,
    "epochs": 90,
    "workers": 16,
    "opt": "sgd",
    "lr": 0.1,
    "momentum": 0.9,
    "weight_decay": 0.0001,
    "label_smoothing": 0.0,
    "mixup_alpha": 0.0,
    "cutmix_alpha": 0.0,
    "lr_scheduler": "steplr",
    "lr_warmup_epochs": 0,
    "lr_warmup_method": "constant",
    "lr_warmup_decay": 0.01,
    "lr_step_size": 30,
    "lr_gamma": 0.1,
    "lr_min": 0.0,
    "print_freq": 10,
    "output_dir": ".",
    "resume": "",
    "start_epoch": 0,
    "cache_dataset": False,
    "sync_bn": False,
    "test_only": False,
    "ra_magnitude": 9,
    "augmix_severity": 3,
    "random_erase": 0.0,
    "amp": False,
    "world_size": 1,
    "dist_url": "env://",
    "model_ema": False,
    "model_ema_steps": 32,
    "model_ema_decay": 0.99998,
    "use_deterministic_algorithms": False,
    "interpolation": "bilinear",
    "val_resize_size": 256,
    "val_crop_size": 224,
    "train_crop_size": 224,
    "ra_sampler": False,
    "ra_reps": 3,
    "backend": "PIL",
    "use_v2": False,
}

In [4]:
device = torch.device(setup['device'])

In [5]:
setup["batch_size"] = 256
setup["workers"] = 16

In [6]:
setup = utils.init_distributed_mode(setup)

Not using distributed mode


## <a id='toc1_2_'></a>[Prepare Dataloader](#toc0_)

### <a id='toc1_2_1_'></a>[Standard ImageNet Validation Dataset](#toc0_)

In [7]:
# Load validation set
data.cacheValData(
    f'{setup["data_path"].replace("./","")}/val',
    "imagenet10_val",
    save_path=setup["data_path"],
)

val_dataset = data.loadData("imagenet10_val", cache_path=setup["data_path"])

assert len(val_dataset.classes) == 10

imagenet10_val.pt dataset saved to ./data/imagewoof
imagenet10_val.pt dataset loaded from ./data/imagewoof


In [8]:
class_to_idx = val_dataset.class_to_idx
idx_to_class = {v: k for k, v in class_to_idx.items()}
num_classes = len(class_to_idx)

In [9]:
val_sampler = utils.getValSampler(val_dataset, setup)

In [10]:
data_loader_test = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=setup["batch_size"],
    shuffle=False,
    sampler=val_sampler,
    num_workers=setup["workers"],
    pin_memory=True,
)

In [11]:
val_subsets = data.getSubsets(val_dataset, f"{setup['data_path'].replace('./','')}/val")
val_loaders = data.getSubsetLoader(
    val_subsets,
    batch_size=setup["batch_size"],
    num_workers=setup["workers"],
    pin_memory=True,
)

## <a id='toc1_3_'></a>[Prepare Model](#toc0_)

In [12]:
# Load the trained model weights
model = resnet18(weights=None, num_classes=10)  # resnet18, mobilenet_v2
eval_model_path = "output/imagewoof/model_200.pth"  # Path to the model for evaluation
checkpoint = torch.load(eval_model_path)
if "setup" in checkpoint:
    if checkpoint["setup"]["distributed"]:
        torch.nn.modules.utils.consume_prefix_in_state_dict_if_present(
            checkpoint["model"], "module."
        )
    model.load_state_dict(checkpoint["model"])
else:
    model.load_state_dict(checkpoint)
model.to(device)
model.eval()
print("Model loaded")

Model loaded


In [13]:
model, model_ema, model_without_ddp = utils.model_setup(model, setup)

## <a id='toc1_4_'></a>[Evaluate on ImageNet Validation Dataset](#toc0_)

In [14]:
criterion = nn.CrossEntropyLoss(label_smoothing=setup['label_smoothing'])

In [15]:
if model_ema:
    metrics = utils.evaluate(
        model_ema,
        criterion,
        data_loader_test,
        device=device,
        log_suffix="EMA",
        print_freq=setup["print_freq"],
    )
else:
    metrics = utils.evaluate(
        model,
        criterion,
        data_loader_test,
        device=device,
        print_freq=setup["print_freq"],
    )

Test:   [ 0/16]  eta: 0:15:06  loss: 0.6389 (0.6389)  acc1: 80.8594 (80.8594)  acc5: 98.4375 (98.4375)  time: 56.6710  data: 55.7033  max mem: 1847
Test:   [10/16]  eta: 0:00:31  loss: 0.6389 (0.6664)  acc1: 83.9844 (82.2798)  acc5: 98.8281 (98.1889)  time: 5.2129  data: 5.0641  max mem: 1855
Test:  Total time: 0:00:59
Test:  Acc@1 83.431 Acc@5 98.091


In [16]:
real_val = metrics.copy()
metrics

{'acc1': 83.4308984474421,
 'acc5': 98.09111733265462,
 'loss': 0.6162927746772766,
 'count': 16}

### <a id='toc1_4_1_'></a>[Evaluate by class](#toc0_)

In [17]:
eval_dict = dict()
with torch.inference_mode():
    for class_name, loader in val_loaders.items():
        eval_dict[class_name] = dict()
        if model_ema:
            val_metrics, top_n_classes, conf_mean = utils.evaluateByClass(
                model_ema,
                loader,
                num_classes,
                device=device,
                log_suffix="EMA",
                print_freq=setup["print_freq"],
            )
        else:
            val_metrics, top_n_classes, conf_mean = utils.evaluateByClass(
                model,
                loader,
                num_classes,
                device=device,
                print_freq=setup["print_freq"],
            )
        eval_dict[class_name]["val_metrics"] = val_metrics
        eval_dict[class_name]["top_n_classes"] = [
            idx_to_class[i] for i in top_n_classes
        ]
        eval_dict[class_name]["correct_conf"] = conf_mean

Test:   [0/2]  eta: 0:01:43  acc1: 80.8594 (80.8594)  acc5: 98.4375 (98.4375)  time: 51.7837  data: 51.2957  max mem: 1855
Test:  Total time: 0:00:53
Test:  Acc@1 82.152 Acc@5 98.533
Test:   [0/2]  eta: 0:01:42  acc1: 89.4531 (89.4531)  acc5: 98.8281 (98.8281)  time: 51.1885  data: 51.0603  max mem: 1855
Test:  Total time: 0:00:53
Test:  Acc@1 87.990 Acc@5 99.265
Test:   [0/2]  eta: 0:02:23  acc1: 77.3438 (77.3438)  acc5: 97.2656 (97.2656)  time: 71.6386  data: 71.4893  max mem: 1855
Test:  Total time: 0:01:14
Test:  Acc@1 77.512 Acc@5 97.847
Test:   [0/1]  eta: 0:01:10  acc1: 65.6250 (65.6250)  acc5: 95.9821 (95.9821)  time: 70.9953  data: 70.8363  max mem: 1855
Test:  Total time: 0:01:12
Test:  Acc@1 65.625 Acc@5 95.982
Test:   [0/2]  eta: 0:02:12  acc1: 83.5938 (83.5938)  acc5: 96.4844 (96.4844)  time: 66.1593  data: 65.9824  max mem: 1855
Test:  Total time: 0:01:08
Test:  Acc@1 83.791 Acc@5 96.509
Test:   [0/2]  eta: 0:02:13  acc1: 82.8125 (82.8125)  acc5: 99.2188 (99.2188)  time: 

### <a id='toc1_4_2_'></a>[Save the results](#toc0_)

In [18]:
summary = dict()
summary["eval_model"] = eval_model_path
summary["val_metrics"] = real_val
summary["val_metrics_details"] = eval_dict
summary["setup"] = setup

with open(
    os.path.join(setup["data_path"].replace("./", ""), "val.json"),
    "w",
) as f:
    json.dump(summary, f, indent=4)