In [3]:
import torch
from torchvision.datasets import FakeData
import PIL
import numpy as np
import os
from tqdm import tqdm_notebook
from cnn import *
import shutil

# cifar - 32x32
# dim128 - 128x128
# imagenet - 256x256
# dim512 - 512x512
# dim1024 - 1024x1024

def generate_dataset(size, key):
    assert key in ["cifar", "dim128", "imagenet", "dim512", "dim1024"]
    
    num_classes = 10
    image_sizes = {"cifar": (3,32,32), 
                   "dim128": (3,128,128), 
                   "imagenet": (3,256,256), 
                   "dim512": (3,512,512), 
                   "dim1024": (3,1024,1024)
                  }
    image_size = image_sizes[key]
    
    folder_name = "data/" + key
    os.makedirs(folder_name, exist_ok=True)
    for i in range(num_classes):
        os.makedirs(folder_name + "/" + str(i), exist_ok=True)
    
    dataset = FakeData(size=size, image_size=image_size, num_classes=num_classes)
    
    for img_i, (img, cls) in tqdm_notebook(zip(range(size), dataset), total=size):
        img.save(folder_name + "/" + str(cls.item()) + "/img" + str(img_i) + ".png", "PNG")
        
        
def train_cnn_full(model_type, trn_loader, n_trials=5, device="cuda:0"):
    assert device != "cpu"
    if type(device) is int:
        device = "cuda:" + str(device)
    model = make_model(model_type.lower()).to(device)

    optimizer = optim.Adam(model.parameters())

    trial_time = []
    for i_trial in range(n_trials):
        start = time.time()
        for batch_i, (batch, labels) in enumerate(trn_loader):
            batch, labels = batch.to(device), labels.to(device)
            preds = model(batch)
            loss = F.cross_entropy(preds, labels)
            loss.backward()
            optimizer.step()
        end = time.time()
        trial_time.append(round((end - start) / batch_i, 3))

    batch_i += 1
    return trial_time, batch_i


def add_item(stats, bench, cuda, model, time, batches, images):
    stats.append({})
    stats[-1]["benchmark"] = bench
    stats[-1]["device"] = cuda
    stats[-1]["model"] = model
    stats[-1]["time"] = time
    stats[-1]["batches"] = batches
    stats[-1]["objects"] = images
    

def make_cnn_dataset_resnet(data_path, batch_size, device, distributed=False, num_workers=0, transformations=True):
    torch.cuda.device(device)

    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.ColorJitter(.25,.25,.25),
        transforms.RandomRotation(2),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    if transformations:
        train_data = torchvision.datasets.ImageFolder(data_path, transform_train)
    else:
        train_data = torchvision.datasets.ImageFolder(data_path)
    trainloader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=num_workers)

    return trainloader

In [None]:
1+1

In [14]:
# Classic fs

stats = []

key = "dim1024"
dataset_size = 2000
batch_size = 256
num_workers = 4
model_type = "resnet18"

# generate_dataset(dataset_size, key)

trn_loader = make_cnn_dataset_resnet("data/" + key, batch_size, "cuda", False, num_workers, True)
model_time, n_batches = train_cnn_full(model_type, trn_loader, 10)
# add_item(stats, key, "cuda:0", model_type, model_time, n_batches, batch_size * n_batches)

# shutil.rmtree("data/" + key)

print(model_time)

[3.005, 3.027, 3.187, 3.002, 3.221, 3.347, 3.051, 3.049, 2.92, 3.497]


In [15]:
import fastai

In [16]:
fastai.__version__

'1.0.52'

In [None]:
stats = []
if torch.cuda.device_count() and torch.backends.cudnn.enabled:
    model_list = ["ResNet18", "ResNet152"] 

    print("CIFAR10 benchmark (RAM->GPU data transfer)")
    for model_type in model_list:
        print("[" + model_type + "]")
        for device in cuda_devices:
            trn_loader = make_cifar10_dataset(DATA_PATH, BATCH_SIZE, distributed=False, num_workers=0)
            model_time, n_batches = train_cnn_ram(model_type, trn_loader, device)

            add_item(stats, "ram_gpu", "cuda:" + str(device), 
                     model_type, model_time, n_batches, BATCH_SIZE * n_batches)

            print("  cuda:" + str(device), model_time, "sec / batch (" + str(n_batches) + " batches, " + str(BATCH_SIZE * n_batches) + " images)")
    print()

    print("CIFAR10 benchmark (full+disk)")
    for model_type in model_list:
        for num_workers in range(0, mp.cpu_count()):
            print("[" + model_type + " #workers ", num_workers, "]", sep="")
            for device in cuda_devices:
                trn_loader = make_cifar10_dataset(DATA_PATH, BATCH_SIZE, distributed=False, num_workers=num_workers)
                model_time, n_batches = train_cnn_full(model_type, trn_loader, device)

                add_item(stats, "cifar" + str(num_workers).zfill(2), "cuda:" + str(device), 
                         model_type, model_time, n_batches, BATCH_SIZE * n_batches)

                print("  cuda:" + str(device), model_time, "sec / batch (" + str(n_batches) + " batches, " + str(BATCH_SIZE * n_batches) + " images)")
    print()

In [None]:
df = json_normalize(stats)
df.sort_values(by=["benchmark", "model", "device"], inplace=True)
df.to_csv(OUTPUT_PATH + "/logs.txt")
df