* Adapted from https://github.com/ryujaehun/pytorch-gpu-benchmark
* pip install plotly cufflink

In [1]:
import warnings
warnings.filterwarnings('ignore')
import torch
import torchvision.models as models
import platform 
import torch.nn as nn
import datetime
import time
import os
import pandas as pd  
import argparse
from torch.utils.data import Dataset, DataLoader
import json
import cufflinks as cf
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)

torch.backends.cudnn.benchmark = True 
NUM_GPU    = torch.cuda.device_count()

MODEL_LIST = {
    #models.mnasnet      : models.mnasnet.     __all__[11:],
    models.resnet       : models.resnet.      __all__[11:],
    models.densenet     : models.densenet.    __all__[11:],
    #models.squeezenet   : models.squeezenet.  __all__[11:],
    #models.vgg          : models.vgg.         __all__[11:],
    #models.mobilenet    : models.mobilenet.   mv2_all[11:],
    #models.mobilenet    : models.mobilenet.   mv3_all[11:],
    #models.shufflenetv2 : models.shufflenetv2.__all__[11:],
}

precisions = ["half", "float"] # , "double"

# Training settings 
WARM_UP    = 5   # Run the model but ignore the result
BATCH_SIZE = 12 

class RandomDataset(Dataset):
    def __init__(self, length):
        self.len = length
        self.data = torch.randn(3, 224, 224, length)

    def __getitem__(self, index):
        return self.data[:, :, :, index]

    def __len__(self):
        return self.len

rand_loader = DataLoader(
    dataset=RandomDataset( BATCH_SIZE * ( WARM_UP + 50)),
    batch_size = BATCH_SIZE,
    shuffle=False,
    num_workers=8,
) 

def train(precision="single"):
    """use fake image for training speed test"""
    target = torch.LongTensor(BATCH_SIZE).random_(1000).cuda()
    criterion = nn.CrossEntropyLoss()
    benchmark = {}
    for model_type in MODEL_LIST.keys():
        for model_name in MODEL_LIST[model_type]:
            if model_name[-8:] == '_Weights': continue
            model = getattr(model_type, model_name)()
            if NUM_GPU > 1:
                model = nn.DataParallel(model, device_ids=range(NUM_GPU))
            model = getattr(model, precision)()
            model = model.to("cuda")
            durations = []
            print(f"Benchmarking Training {precision} precision type {model_name} ")
            for step, img in enumerate(rand_loader):
                img = getattr(img, precision)()
                torch.cuda.synchronize()
                start = time.time()
                model.zero_grad()
                prediction = model(img.to("cuda"))
                loss = criterion(prediction, target)
                loss.backward()
                torch.cuda.synchronize()
                end = time.time()
                if step >= WARM_UP:
                    durations.append((end - start) * 1000)
            print(
                f"{model_name} model average train time : {sum(durations)/len(durations)}ms"
            )
            del model
            benchmark[model_name] = durations
    return benchmark


def inference(precision="float"):
    benchmark = {}
    with torch.no_grad():
        for model_type in MODEL_LIST.keys():
            for model_name in MODEL_LIST[model_type]:
                if model_name[-8:] == '_Weights': continue
                model = getattr(model_type, model_name)()
                if NUM_GPU > 1:
                    model = nn.DataParallel(model, device_ids=range(NUM_GPU))
                model = getattr(model, precision)()
                model = model.to("cuda")
                model.eval()
                durations = []
                print(
                    f"Benchmarking Inference {precision} precision type {model_name} "
                )
                for step, img in enumerate(rand_loader):
                    img = getattr(img, precision)()
                    torch.cuda.synchronize()
                    start = time.time()
                    model(img.to("cuda"))
                    torch.cuda.synchronize()
                    end = time.time()
                    if step >= WARM_UP:
                        durations.append((end - start) * 1000)
                print(
                    f"{model_name} model average inference time : {sum(durations)/len(durations)}ms"
                )
                del model
                benchmark[model_name] = durations
    return benchmark


if __name__ == "__main__":  
    df = pd.DataFrame([torch.cuda.device_count(), torch.version.cuda, 
                       torch.backends.cudnn.version(), torch.cuda.get_device_name(0)],
                      index=["# GPU" , "CUDA Ver." , "Cudnn Ver." , "Dev. Name"], columns=['']
                      ) 
    display(df)
    
    print(f"benchmark start : {datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S')}")
    
    for precision in precisions: 
        train_result = train(precision)
        df = pd.DataFrame(train_result)   
        df.groupby(level=0,axis=1).mean().mean().iplot(kind='scatter',mode='markers',
                                                       asImage=False,title='Train',
                                                       xTitle='models',yTitle='time(ms)')
           
        inference_result = inference(precision)
        df = pd.DataFrame(inference_result)   
        df.groupby(level=0,axis=1).mean().mean().iplot(kind='scatter',mode='markers',
                                                       asImage=False,title='Infer',
                                                       xTitle='models',yTitle='time(ms)') 
            
    print(f"benchmark end : {datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S')}")  

Unnamed: 0,Unnamed: 1
# GPU,1
CUDA Ver.,11.8
Cudnn Ver.,8700
Dev. Name,NVIDIA GeForce RTX 2060


benchmark start : 2023/08/30 05:11:34
Benchmarking Training half precision type resnet18 
resnet18 model average train time : 18.584237098693848ms
Benchmarking Training half precision type resnet34 
resnet34 model average train time : 30.530176162719727ms
Benchmarking Training half precision type resnet50 
resnet50 model average train time : 50.10777950286865ms
Benchmarking Training half precision type resnet101 
resnet101 model average train time : 82.00854301452637ms
Benchmarking Training half precision type resnet152 
resnet152 model average train time : 116.19025707244873ms
Benchmarking Training half precision type resnext50_32x4d 
resnext50_32x4d model average train time : 61.32874011993408ms
Benchmarking Training half precision type resnext101_32x8d 
resnext101_32x8d model average train time : 168.72469902038574ms
Benchmarking Training half precision type resnext101_64x4d 
resnext101_64x4d model average train time : 158.317232131958ms
Benchmarking Training half precision type wid

Benchmarking Inference half precision type resnet18 
resnet18 model average inference time : 6.75328254699707ms
Benchmarking Inference half precision type resnet34 
resnet34 model average inference time : 10.34419059753418ms
Benchmarking Inference half precision type resnet50 
resnet50 model average inference time : 16.51625633239746ms
Benchmarking Inference half precision type resnet101 
resnet101 model average inference time : 26.518306732177734ms
Benchmarking Inference half precision type resnet152 
resnet152 model average inference time : 37.0784330368042ms
Benchmarking Inference half precision type resnext50_32x4d 
resnext50_32x4d model average inference time : 19.915485382080078ms
Benchmarking Inference half precision type resnext101_32x8d 
resnext101_32x8d model average inference time : 51.234726905822754ms
Benchmarking Inference half precision type resnext101_64x4d 
resnext101_64x4d model average inference time : 51.530470848083496ms
Benchmarking Inference half precision type w

Benchmarking Training float precision type resnet18 
resnet18 model average train time : 35.86417198181152ms
Benchmarking Training float precision type resnet34 
resnet34 model average train time : 59.80921268463135ms
Benchmarking Training float precision type resnet50 
resnet50 model average train time : 104.26292896270752ms
Benchmarking Training float precision type resnet101 
resnet101 model average train time : 170.23240566253662ms
Benchmarking Training float precision type resnet152 
resnet152 model average train time : 240.47704219818115ms
Benchmarking Training float precision type resnext50_32x4d 
resnext50_32x4d model average train time : 148.03293228149414ms
Benchmarking Training float precision type resnext101_32x8d 
resnext101_32x8d model average train time : 420.13548374176025ms
Benchmarking Training float precision type resnext101_64x4d 
resnext101_64x4d model average train time : 437.47716426849365ms
Benchmarking Training float precision type wide_resnet50_2 
wide_resnet5

Benchmarking Inference float precision type resnet18 
resnet18 model average inference time : 13.081965446472168ms
Benchmarking Inference float precision type resnet34 
resnet34 model average inference time : 21.297636032104492ms
Benchmarking Inference float precision type resnet50 
resnet50 model average inference time : 33.565802574157715ms
Benchmarking Inference float precision type resnet101 
resnet101 model average inference time : 55.39978504180908ms
Benchmarking Inference float precision type resnet152 
resnet152 model average inference time : 78.64436626434326ms
Benchmarking Inference float precision type resnext50_32x4d 
resnext50_32x4d model average inference time : 45.867910385131836ms
Benchmarking Inference float precision type resnext101_32x8d 
resnext101_32x8d model average inference time : 129.0067720413208ms
Benchmarking Inference float precision type resnext101_64x4d 
resnext101_64x4d model average inference time : 135.88871002197266ms
Benchmarking Inference float prec

benchmark end : 2023/08/30 05:16:08
