## Benchmarking ToMe
We provide some benchmarking code in order to benchmark ToMe's throughput.

**Note**: notebooks have an overhead. To properly benchmark, use a standalone script.

In [1]:
import timm
import tome

In [2]:
# Use any ViT model here (see timm.models.vision_transformer)
model_name = "vit_base_patch16_224"

# Load a pretrained model
model = timm.create_model(model_name, pretrained=True)

In [3]:
# Set this to be whatever device you want to benchmark on
# If you don't have a GPU, you can use "cpu" but you probably want to set the # runs to be lower
device = "cuda:0"
runs = 50
batch_size = 256  # Lower this if you don't have that much memory
input_size = model.default_cfg["input_size"]

In [4]:
# Baseline benchmark with half-precision
baseline_throughput = tome.utils.benchmark(
    model,
    device=device,
    verbose=True,
    runs=runs,
    use_fp16=True,
    batch_size=batch_size,
    input_size=input_size
)

Benchmarking: 100%|██████████| 50/50 [00:22<00:00,  2.21it/s]


Throughput: 522.04 im/s


### Applying ToMe
Simply patch the model after initialization to enable ToMe.

In [5]:
# Apply ToMe
tome.patch.timm(model, use_cuda_ext=False)

In [6]:
# ToMe with r=8 with half-precision
model.r = 8
tome_throughput = tome.utils.benchmark(
    model,
    device=device,
    verbose=True,
    runs=runs,
    use_fp16=True,
    batch_size=batch_size,
    input_size=input_size
)
print(f"Throughput improvement: {tome_throughput / baseline_throughput:.2f}x")

Benchmarking:   0%|          | 0/50 [00:00<?, ?it/s]

Benchmarking: 100%|██████████| 50/50 [00:21<00:00,  2.33it/s]


Throughput: 580.03 im/s
Throughput improvement: 1.11x


In [7]:
# ToMe with r=8 and a decreasing schedule with half-precision
model.r = (8, -1.0)
tome_throughput = tome.utils.benchmark(
    model,
    device=device,
    verbose=True,
    runs=runs,
    use_fp16=True,
    batch_size=batch_size,
    input_size=input_size
)
print(f"Throughput improvement: {tome_throughput / baseline_throughput:.2f}x")

Benchmarking: 100%|██████████| 50/50 [00:18<00:00,  2.66it/s]


Throughput: 660.27 im/s
Throughput improvement: 1.26x


In [8]:
# ToMe with r=16 with half-precision
model.r = 16
tome_throughput = tome.utils.benchmark(
    model,
    device=device,
    verbose=True,
    runs=runs,
    use_fp16=True,
    batch_size=batch_size,
    input_size=input_size
)
print(f"Throughput improvement: {tome_throughput / baseline_throughput:.2f}x")

Benchmarking: 100%|██████████| 50/50 [00:14<00:00,  3.48it/s]


Throughput: 868.17 im/s
Throughput improvement: 1.66x


In [9]:
# ToMe with r=16 and a decreasing schedule with half-precision
model.r = (16, -1.0)
tome_decr_throughput = tome.utils.benchmark(
    model,
    device=device,
    verbose=True,
    runs=runs,
    use_fp16=True,
    batch_size=batch_size,
    input_size=input_size
)
print(f"Throughput improvement: {tome_decr_throughput / baseline_throughput:.2f}x")

Benchmarking: 100%|██████████| 50/50 [00:09<00:00,  5.22it/s]


Throughput: 1304.62 im/s
Throughput improvement: 2.50x


In [10]:
# Apply ToMe with CUDA extension
tome.patch.timm(model, use_cuda_ext=True)

In [11]:
# ToMe with r=8 with half-precision
model.r = 8
tome_throughput = tome.utils.benchmark(
    model,
    device=device,
    verbose=True,
    runs=runs,
    use_fp16=True,
    batch_size=batch_size,
    input_size=input_size
)
print(f"Throughput improvement: {tome_throughput / baseline_throughput:.2f}x")

Benchmarking: 100%|██████████| 50/50 [00:19<00:00,  2.58it/s]


Throughput: 635.16 im/s
Throughput improvement: 1.22x


In [12]:
# ToMe with r=8 and a decreasing schedule with half-precision
model.r = (8, -1.0)
tome_throughput = tome.utils.benchmark(
    model,
    device=device,
    verbose=True,
    runs=runs,
    use_fp16=True,
    batch_size=batch_size,
    input_size=input_size
)
print(f"Throughput improvement: {tome_throughput / baseline_throughput:.2f}x")

Benchmarking: 100%|██████████| 50/50 [00:17<00:00,  2.92it/s]


Throughput: 717.80 im/s
Throughput improvement: 1.37x


In [13]:
# ToMe with r=16 with half-precision
model.r = 16
tome_throughput = tome.utils.benchmark(
    model,
    device=device,
    verbose=True,
    runs=runs,
    use_fp16=True,
    batch_size=batch_size,
    input_size=input_size
)
print(f"Throughput improvement: {tome_throughput / baseline_throughput:.2f}x")

Benchmarking: 100%|██████████| 50/50 [00:13<00:00,  3.82it/s]


Throughput: 941.25 im/s
Throughput improvement: 1.80x


In [14]:
# ToMe with r=16 and a decreasing schedule with half-precision
model.r = (16, -1.0)
tome_decr_throughput = tome.utils.benchmark(
    model,
    device=device,
    verbose=True,
    runs=runs,
    use_fp16=True,
    batch_size=batch_size,
    input_size=input_size
)
print(f"Throughput improvement: {tome_decr_throughput / baseline_throughput:.2f}x")

Benchmarking: 100%|██████████| 50/50 [00:08<00:00,  5.66it/s]


Throughput: 1395.00 im/s
Throughput improvement: 2.67x
