# Benchmark


In [None]:
#| default_exp benchmark

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
# |export

import time

from itertools import count
import torch
from torch import nn
from torch.nn import functional as F
from torch.cuda.amp.autocast_mode import autocast


#import timm
from tqdm.auto import tqdm
#import wandb

In [None]:
# |export
def benchmark(model: nn.Module, # Model to run
                bs: int =32,    # Batch size
                n_batches: int|None =None,  # Number of batches to run. `seconds` must be None
                n_seconds: int|None =None,  # Number of seconds to run. `n_batches` must be None
                fp16: int =False,           # Use Automatic Mixed Precision
                size: int=224,              # Mock-train on this size "images"
                dev: torch.device=torch.device("cuda:0"),): # Device to run on

    """Mock-train the model on random noise input."""

    # There can be only one
    assert not n_batches or not n_seconds
    assert n_batches or n_seconds

    torch.backends.cudnn.benchmark=True
    assert torch.backends.cudnn.is_available()

    model.to(dev)
    optim = torch.optim.Adam(model.parameters(), lr=0.00001, weight_decay=0.00005)

    X = torch.randn((bs, 3, size, size), device=dev)

    # Assume the head is for ImageNet with 1000 catagories.
    y = torch.randint(0, 999, (bs,), device=dev)

    # Warm-up to run cudnn.benchmark first.
    yhat = model(X)

    loss = F.cross_entropy(yhat, y)
    loss.backward()

    optim.step()
    optim.zero_grad(set_to_none=True)

    if n_batches:
        pbar = tqdm(total=n_batches, unit="Batch")
    else:
        pbar = tqdm(total=n_seconds,
            bar_format="{l_bar}{bar}| {n:.1f}/{total} s [{elapsed}<{remaining} {postfix}]")

    start_time = time.time()
    last_time = start_time
    for c in count():
        with autocast(enabled=fp16):
            yhat = model(X)
            loss = F.cross_entropy(yhat, y)

        loss.backward()
        optim.step()
        optim.zero_grad(set_to_none=True)

        if n_batches:
            pbar.update()
            if c+1 == n_batches:
                break

        else:
            now = time.time()
            iter_time =  now - last_time
            run_time = now - start_time
            pbar.update(iter_time)
            if run_time >= n_seconds:
                break
            last_time = now
    pbar.close()

    return ((time.time() - start_time), c*bs)


In [None]:
# |eval: false
model = timm.create_model("resnet50", pretrained=False)
benchmark(model, n_seconds=10)

  0%|          | 0.0/10 s [00:00<? ]

(10.188517570495605, 768)

In [None]:
# |eval: false
benchmark(model, n_batches=10)

  0%|          | 0/10 [00:00<?, ?Batch/s]

(4.067640781402588, 288)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


device_name: NVIDIA GeForce GTX 1060 6GB
device_capability: (6, 1)
pytorch: 1.12.1.post201
cuda: 11.2
platform: Linux-5.15.0-48-generic-x86_64-with-glibc2.35
model: resnet50
fp16: False
bs: 32
n_seconds: 20
n_batches: None


[34m[1mwandb[0m: Currently logged in as: [33mxl0[0m. Use [1m`wandb login --relogin`[0m to force relogin


  0%|          | 0.0/20 s [00:00<? ]

duration: 20.015317916870117
n_items: 1536
throughput: 76.74122421534791


0,1
bs,▁
duration,▁
fp16,▁
n_items,▁
n_seconds,▁
throughput,▁

0,1
bs,32
cuda,11.2
device_capability,"(6, 1)"
device_name,NVIDIA GeForce GTX 1...
duration,20.01532
fp16,False
model,resnet50
n_items,1536
n_seconds,20
platform,Linux-5.15.0-48-gene...


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()