In [None]:
import time
import sys
import json
from argparse import ArgumentParser
import multiprocessing as mp

from pandas.io.json import json_normalize

import torch
import torch.nn.functional as F
from torch import nn
from torch import optim

from utils import *
from resnet import make_model

from bench import *

In [None]:
BATCH_SIZE = 256
DATA_PATH = "./data"
OUTPUT_PATH = "./"
CUDA_DEVICES = "all"

if CUDA_DEVICES == "all":
    cuda_devices = list(range(torch.cuda.device_count()))
else:
    cuda_devices = list(map(lambda x: int(x.strip()), CUDA_DEVICES.split(",")))

print("Deep Learning Benchmark")
print("  CUDA:  ", torch.cuda.is_available())
print("  CUDNN: ", torch.backends.cudnn.enabled)
print("  #GPUs: ", torch.cuda.device_count())
print("  GPUs selected: ", cuda_devices)
print()

print("CIFAR10 dataset")
download_time, untar_time = download_cifar10(DATA_PATH)
print("  download time:", round(download_time, 3))
print("  untar time:", round(untar_time, 3))
print()

In [None]:
stats = []
if torch.cuda.device_count() and torch.backends.cudnn.enabled:
    torch.backends.cudnn.benchmark = True

    model_list = ["ResNet18", "ResNet152"] 

    print("CIFAR10 benchmark (RAM->GPU data transfer)")
    for model_type in model_list:
        print("[" + model_type + "]")
        for device in cuda_devices:
            trn_loader = make_cifar10_dataset(DATA_PATH, BATCH_SIZE, distributed=False, num_workers=0)
            model_time, n_batches = train_cnn_ram(model_type, trn_loader, device)

            add_item(stats, "ram_gpu", "cuda:" + str(device), 
                     model_type, model_time, n_batches, BATCH_SIZE * n_batches)

            print("  cuda:" + str(device), model_time, "sec / batch (" + str(n_batches) + " batches, " + str(BATCH_SIZE * n_batches) + " images)")
    print()

    print("CIFAR10 benchmark (full+disk)")
    for model_type in model_list:
        for num_workers in range(0, mp.cpu_count()):
            print("[" + model_type + " #workers ", num_workers, "]", sep="")
            for device in cuda_devices:
                trn_loader = make_cifar10_dataset(DATA_PATH, BATCH_SIZE, distributed=False, num_workers=num_workers)
                model_time, n_batches = train_cnn_full(model_type, trn_loader, device)

                add_item(stats, "cifar" + str(num_workers).zfill(2), "cuda:" + str(device), 
                         model_type, model_time, n_batches, BATCH_SIZE * n_batches)

                print("  cuda:" + str(device), model_time, "sec / batch (" + str(n_batches) + " batches, " + str(BATCH_SIZE * n_batches) + " images)")
    print()

In [None]:
df = json_normalize(stats)
df.sort_values(by=["benchmark", "model", "device"], inplace=True)
df.to_csv(OUTPUT_PATH + "/logs.txt")
df