# CLI

In [1]:
#| default_exp cli

In [2]:
#| hide
from nbdev.showdoc import *


In [3]:
# |hide
# |export
import platform

import torch
import timm
import wandb

from fastcore.script import *

from ready_steady_go.benchmark import *

In [4]:
# |export
@call_parse
def cli(wnb: str ="disabled",       # W&B mode. Accepted values: online, offline, disabled.
        wnb_run: str =None,    # W&B run name (auto-generate if None)
        wnb_project: str =None,
        wnb_entity: str =None,
        run_number = 1,             # A unique number to keep track over repeat runs
        model: str ="resnet50",     # TIMM Model name
        bs: int =32,                #Batch size
        size: int =224,             # (fake) image size
        fp16: bool =False,
        n_batches = 0,              # Run for N batches. Mututally exclusive with `n_seconds`
        n_seconds = 0,              # Run for N seconds. Mutually exclusive with `n_batches`
    ):

    if not n_batches and not n_seconds:
        print("Either `n_batches` or `n_seconds` must be non-zero")
        exit(1)
    if n_batches and n_seconds:
        print(f"`n_batches` and `n_seconds` are mutually explusive. {n_batches=}, {n_seconds=}")
        exit(1)

    assert wnb in ["online", "offline", "disabled"]
    
    stats = {
        "device_name" : torch.cuda.get_device_name(),
        "device_capability": str(torch.cuda.get_device_capability()),
        "pytorch": torch.version.__version__,
        "cuda": torch.version.cuda,
        "platform": platform.platform(),
        "model": model,
        "fp16": fp16,
        "bs": bs,
        "n_seconds": n_seconds if n_seconds else None,
        "n_batches": n_batches if n_batches else None,
        "run_number": run_number, 
        "gpu_mem" : torch.cuda.get_device_properties(0).total_memory,
    }

    print("\n".join([ f"{k}: {v}" for k, v in stats.items() ]))

    with wandb.init(mode=wnb, project=wnb_project, entity=wnb_entity, name=wnb_run) as run:
        run.log(stats, step=0, commit=True)

        model = timm.create_model(model, pretrained=False)
        duration, n_items = benchmark(model, bs=bs, size=size, fp16=fp16, n_batches=n_batches, n_seconds=n_seconds)
        summary = {
            "duration": duration,
            "n_items": n_items,
            "throughput": n_items / duration,
        }

        print("\n".join([ f"{k}: {v}" for k, v in summary.items() ]))

        run.log(summary, step=0, commit=True)



In [5]:
# |eval: false
# |hide
cli(wnb = "disabled",
    model="resnet50",
    bs=32,
    size=224,
    fp16=False,
    n_batches = 0,
    n_seconds = 20,
    run_number=1)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


device_name: NVIDIA GeForce GTX 1060 6GB
device_capability: (6, 1)
pytorch: 1.12.1
cuda: 11.6
platform: Linux-5.15.0-48-generic-x86_64-with-glibc2.35
model: resnet50
fp16: False
bs: 32
n_seconds: 20
n_batches: None
run_number: 1
gpu_mem: 6373638144


  0%|          | 0.0/20 s [00:00<? ]

duration: 20.110930919647217
n_items: 1536
throughput: 76.37637492451515


## How to use

In [6]:
!ready-steady-go -h

usage: ready-steady-go [-h] [--wnb WNB] [--wnb_run WNB_RUN]
                       [--wnb_project WNB_PROJECT] [--wnb_entity WNB_ENTITY]
                       [--run_number RUN_NUMBER] [--model MODEL] [--bs BS]
                       [--size SIZE] [--fp16] [--n_batches N_BATCHES]
                       [--n_seconds N_SECONDS]

optional arguments:
  -h, --help                 show this help message and exit
  --wnb WNB                  W&B mode. Accepted values: online, offline,
                             disabled. (default: disabled)
  --wnb_run WNB_RUN          W&B run name (auto-generate if None)
  --wnb_project WNB_PROJECT
  --wnb_entity WNB_ENTITY
  --run_number RUN_NUMBER    A unique number to keep track over repeat runs
                             (default: 1)
  --model MODEL              TIMM Model name (default: resnet50)
  --bs BS                    Batch size (default: 32)
  --size SIZE                (fake) image size (default: 224)
  --fp16                     (default:

To run the benchmark over a range of models and batch sizes, have a look at the `run_all_benahmarks.sh` script:

In [7]:
#!hide
!cat ../run_all_benchmarks.sh

#!/bin/bash

WANDB_MODE="online"
WANDB_PROJECT="ready-steady-go"

MODELS="resnet50 vgg19 swin_s3_base_224"
BATCHES="8 16 16 32 64 128 256 512 1024"

N_SECONDS=30

#set -x

wandb login

echo "Warming up the GPU for 3 minutes..."
gpu-sprint --model=resnet50 --n_seconds=180

echo "Running benchmarks..."

# You can do multiple runs, but in my experience the results barely change between runs.
for RUN in 1 #2 3
do
    for m in $MODELS; do
        for fp16 in " " "--fp16"; do
            for bs in $BATCHES; do
                ready-steady-go --model=$m $fp16 --bs=$bs --n_seconds=$N_SECONDS --wnb=$WANDB_MODE --wnb_project=$WANDB_PROJECT --run_number=$RUN
                if [ $? -ne 0 ]; then
                    # We probably hit a batch size the GPU can't handle.
                    # No need to try larger batch sizes.
                    break
                fi
            done
        done
    done
done

# I had weird data lossed with wandb for some reason.
wandb sync --sync-all --include-

In [8]:
#| hide
import nbdev; nbdev.nbdev_export()