In [3]:
from set_loader import CustomDataset
import jpeg4py as jpeg
from torch.utils.data import Dataset, DataLoader
import time

# AFHQ

In [13]:
# PIL Loader
root_path = '/home/aiteam/tykim/dataset/afhq/train'
custom_ds = CustomDataset(root_path, loader_type='pil')
dataloader = DataLoader(custom_ds, batch_size=128, shuffle=False, num_workers=8)

In [14]:
%%timeit -r 3 -n 3
simple_load_times = []
start_time = time.time()
for image, label in dataloader:
    image = image.cuda()
    label = label.cuda()
    pass
pil_time = time.time() - start_time
simple_load_times.append(pil_time)
print(str(simple_load_times) + ' sec')

[18.762083530426025] sec
[18.24456214904785] sec
[17.425167322158813] sec
[18.613457441329956] sec
[19.203899145126343] sec
[19.258994817733765] sec
[19.374570608139038] sec
[18.847532033920288] sec
[19.108444690704346] sec
18.8 s ± 437 ms per loop (mean ± std. dev. of 3 runs, 3 loops each)


In [15]:
# OPEN CV Loader
root_path = '/home/aiteam/tykim/dataset/afhq/train'
custom_ds = CustomDataset(root_path, loader_type='opencv')
dataloader = DataLoader(custom_ds, batch_size=128, shuffle=False, num_workers=8)

In [16]:
%%timeit -r 3 -n 3
simple_load_times = []
start_time = time.time()
for image, label in dataloader:
    image = image.cuda()
    label = label.cuda()
    pass
opencv_time = time.time() - start_time
simple_load_times.append(opencv_time)
print(str(simple_load_times) + ' sec')

[7.810119867324829] sec
[7.763317346572876] sec
[7.54777193069458] sec
[7.822279214859009] sec
[7.724693536758423] sec
[7.8466784954071045] sec
[7.711875915527344] sec
[7.677571773529053] sec
[7.6445276737213135] sec
7.73 s ± 51.1 ms per loop (mean ± std. dev. of 3 runs, 3 loops each)


In [17]:
# Jpeg4Py Loader
root_path = '/home/aiteam/tykim/dataset/afhq/train'
custom_ds = CustomDataset(root_path, loader_type='jpeg4py')
dataloader = DataLoader(custom_ds, batch_size=128, shuffle=False, num_workers=8)

In [18]:
%%timeit -r 3 -n 3
simple_load_times = []
start_time = time.time()
for image, label in dataloader:
    image = image.cuda()
    label = label.cuda()
    pass
jpeg4py_time = time.time() - start_time
simple_load_times.append(jpeg4py_time)
print(str(simple_load_times) + ' sec')

[5.72104024887085] sec
[5.583805322647095] sec
[5.445183515548706] sec
[5.339044809341431] sec
[5.4290385246276855] sec
[5.507980823516846] sec
[5.217567682266235] sec
[5.513427019119263] sec
[5.363497018814087] sec
5.46 s ± 92.1 ms per loop (mean ± std. dev. of 3 runs, 3 loops each)


In [12]:
from nvidia.dali.pipeline import pipeline_def
import nvidia.dali.types as types
import nvidia.dali.fn as fn
from nvidia.dali.plugin.pytorch import DALIGenericIterator

root_path = '/home/aiteam/tykim/dataset/afhq/train'

@pipeline_def(batch_size=128, num_threads=8, device_id=0)
def get_dali_pipeline(data_dir):
  img_files, labels = fn.readers.file(file_root=data_dir, random_shuffle=False, name="Reader")
  images = fn.decoders.image(img_files, device="mixed")
  
  return images, labels.gpu()

pipe = get_dali_pipeline(data_dir=root_path)
pipe.build()

dataloader = DALIGenericIterator(pipe, ['data', 'label'],reader_name='Reader')

In [13]:
%%timeit -r 3 -n 3
simple_load_times = []
start_time = time.time()
for i, data in enumerate(dataloader):
  x, y = data[0]['data'], data[0]['label']  

dali_time = time.time() - start_time
simple_load_times.append(dali_time)
print(str(simple_load_times) + ' sec') 

[1.4376187324523926] sec
[1.3231616020202637] sec
[1.3179664611816406] sec
[1.354482650756836] sec
[1.3438947200775146] sec
[1.3326356410980225] sec
[1.3353204727172852] sec
[1.3344662189483643] sec
[1.4105403423309326] sec
1.35 s ± 7.67 ms per loop (mean ± std. dev. of 3 runs, 3 loops each)


In [None]:
# FFCV - JPEG 100 % quality

from ffcv.writer import DatasetWriter
from ffcv.fields import IntField, RGBImageField
from torchvision.datasets import ImageFolder

from ffcv.fields.decoders import IntDecoder, SimpleRGBImageDecoder
from ffcv.loader import Loader, OrderOption
from ffcv.transforms import ToDevice, ToTensor, ToTorchImage


In [16]:
# Preparation
my_dataset = ImageFolder(root='/home/aiteam/tykim/dataset/afhq/train')
write_path = '/home/aiteam/tykim/scratch/data_loaders/afhq_io_test.beton'

# Pass a type for each data field
writer = DatasetWriter(write_path, {
    'image': RGBImageField(jpeg_quality=100),
    'label': IntField()
})

# Write dataset
writer.from_indexed_dataset(my_dataset)

100%|██████████| 14630/14630 [00:21<00:00, 696.44it/s] 


In [19]:
# Random resized crop
decoder = SimpleRGBImageDecoder()

# Data decoding and augmentation
image_pipeline = [decoder,  ToTensor(), ToTorchImage(), ToDevice('cuda:0', non_blocking=True)] #Cutout(),
label_pipeline = [IntDecoder(), ToTensor(), ToDevice('cuda:0')]

# Pipeline for each data field
pipelines = {
    'image': image_pipeline,
    'label': label_pipeline
}

# Replaces PyTorch data loader (`torch.utils.data.Dataloader`)
loader = Loader(write_path, batch_size=128, num_workers=8,
                order=OrderOption.RANDOM, pipelines=pipelines, os_cache=True)

In [20]:
%%timeit -r 3 -n 3
simple_load_times = []
start_time = time.time()
for batch_idx, data in enumerate(loader):
    inputs, labels = data
    
ffcv_time = time.time() - start_time
simple_load_times.append(ffcv_time)
print(str(simple_load_times) + ' sec') 

[1.9811444282531738] sec
[1.1470427513122559] sec
[1.1128733158111572] sec
[0.8895688056945801] sec
[0.9101715087890625] sec
[0.9381265640258789] sec
[0.9251284599304199] sec
[0.9449906349182129] sec
[0.918032169342041] sec
1.09 s ± 232 ms per loop (mean ± std. dev. of 3 runs, 3 loops each)


In [36]:
# Quasi_random
decoder = SimpleRGBImageDecoder()

# Data decoding and augmentation
image_pipeline = [decoder,  ToTensor(), ToTorchImage(), ToDevice('cuda:0', non_blocking=True)] #Cutout(),
label_pipeline = [IntDecoder(), ToTensor(), ToDevice('cuda:0')]

# Pipeline for each data field
pipelines = {
    'image': image_pipeline,
    'label': label_pipeline
}

# Replaces PyTorch data loader (`torch.utils.data.Dataloader`)
loader = Loader(write_path, batch_size=128, num_workers=8,
                order=OrderOption.QUASI_RANDOM, pipelines=pipelines, os_cache=True)

In [37]:
%%timeit -r 3 -n 3
simple_load_times = []
start_time = time.time()
for batch_idx, data in enumerate(loader):
    inputs, labels = data
    
ffcv_time = time.time() - start_time
simple_load_times.append(ffcv_time)
print(str(simple_load_times) + ' sec') 

[1.7662463188171387] sec
[0.9466965198516846] sec
[0.971419095993042] sec
[0.9264469146728516] sec
[0.9306745529174805] sec
[0.9450130462646484] sec
[0.9147017002105713] sec
[0.9315376281738281] sec
[0.9097979068756104] sec
1.03 s ± 142 ms per loop (mean ± std. dev. of 3 runs, 3 loops each)


# CIFAR-10

In [3]:
# PIL Loader
root_path = '/home/aiteam/tykim/dataset/CIFAR-10-images/train'
custom_ds = CustomDataset(root_path, loader_type='pil')
dataloader = DataLoader(custom_ds, batch_size=128, shuffle=False, num_workers=8)

In [4]:
%%timeit -r 3 -n 3
simple_load_times = []
start_time = time.time()
for image, label in dataloader:
    image = image.cuda()
    label = label.cuda()
    pass
pil_time = time.time() - start_time
simple_load_times.append(pil_time)
print(str(simple_load_times) + ' sec')

[3.300985813140869] sec
[1.7530553340911865] sec
[2.084369659423828] sec
[1.956310510635376] sec
[1.895453691482544] sec
[1.8113484382629395] sec
[2.076040744781494] sec
[1.9186503887176514] sec
[2.023421287536621] sec
2.09 s ± 210 ms per loop (mean ± std. dev. of 3 runs, 3 loops each)


In [5]:
# OPEN CV Loader
root_path = '/home/aiteam/tykim/dataset/CIFAR-10-images/train'
custom_ds = CustomDataset(root_path, loader_type='opencv')
dataloader = DataLoader(custom_ds, batch_size=128, shuffle=False, num_workers=8)

In [6]:
%%timeit -r 3 -n 3
simple_load_times = []
start_time = time.time()
for image, label in dataloader:
    image = image.cuda()
    label = label.cuda()
    pass
opencv_time = time.time() - start_time
simple_load_times.append(opencv_time)
print(str(simple_load_times) + ' sec')

[0.8447706699371338] sec
[0.8297159671783447] sec
[0.782383918762207] sec
[0.7501564025878906] sec
[0.777930498123169] sec
[0.7334840297698975] sec
[0.6861250400543213] sec
[0.8113012313842773] sec
[0.7088854312896729] sec
770 ms ± 35.8 ms per loop (mean ± std. dev. of 3 runs, 3 loops each)


In [9]:
# Jpeg4Py Loader
root_path = '/home/aiteam/tykim/dataset/CIFAR-10-images/train'
custom_ds = CustomDataset(root_path, loader_type='jpeg4py')
dataloader = DataLoader(custom_ds, batch_size=128, shuffle=False, num_workers=8)

In [10]:
%%timeit -r 3 -n 3
simple_load_times = []
start_time = time.time()
for image, label in dataloader:
    image = image.cuda()
    label = label.cuda()
    pass
jpeg4py_time = time.time() - start_time
simple_load_times.append(jpeg4py_time)
print(str(simple_load_times) + ' sec')

[0.9442241191864014] sec
[0.9938108921051025] sec
[0.9856014251708984] sec
[0.9854307174682617] sec
[0.9906682968139648] sec
[0.9095518589019775] sec
[1.0330533981323242] sec
[0.946082592010498] sec
[0.901226282119751] sec
966 ms ± 6.41 ms per loop (mean ± std. dev. of 3 runs, 3 loops each)


In [14]:
# DALI
from nvidia.dali.pipeline import pipeline_def
import nvidia.dali.types as types
import nvidia.dali.fn as fn
from nvidia.dali.plugin.pytorch import DALIGenericIterator

root_path = '/home/aiteam/tykim/dataset/CIFAR-10-images/train'


@pipeline_def(batch_size=128, num_threads=8, device_id=0)
def get_dali_pipeline(data_dir):
  img_files, labels = fn.readers.file(file_root=data_dir, random_shuffle=False, name="Reader")
  images = fn.decoders.image(img_files, device="mixed")
  
  return images, labels.gpu()

pipe = get_dali_pipeline(data_dir=root_path)
pipe.build()

dataloader = DALIGenericIterator(pipe, ['data', 'label'],reader_name='Reader')

In [15]:
%%timeit -r 3 -n 3
simple_load_times = []
start_time = time.time()
for i, data in enumerate(dataloader):
  x, y = data[0]['data'], data[0]['label']  

dali_time = time.time() - start_time
simple_load_times.append(dali_time)
print(str(simple_load_times) + ' sec') 

[0.8147809505462646] sec
[0.7482140064239502] sec
[0.668778657913208] sec
[0.7292904853820801] sec
[0.676008939743042] sec
[0.6448142528533936] sec
[0.6547183990478516] sec
[0.6652204990386963] sec
[0.6618211269378662] sec
696 ms ± 35.2 ms per loop (mean ± std. dev. of 3 runs, 3 loops each)


In [21]:
# FFCV

from ffcv.writer import DatasetWriter
from ffcv.fields import IntField, RGBImageField
from torchvision.datasets import ImageFolder

from ffcv.fields.decoders import IntDecoder, SimpleRGBImageDecoder
from ffcv.loader import Loader, OrderOption
from ffcv.transforms import ToDevice, ToTensor, ToTorchImage

In [22]:
# Preparation

my_dataset = ImageFolder(root='/home/aiteam/tykim/dataset/CIFAR-10-images/train')
write_path = '/home/aiteam/tykim/scratch/data_loaders/cifar10_io_test.beton'

# Pass a type for each data field
writer = DatasetWriter(write_path, {
    'image': RGBImageField(jpeg_quality=100),
    'label': IntField()
})

# Write dataset
writer.from_indexed_dataset(my_dataset)

100%|██████████| 50000/50000 [00:00<00:00, 498219.89it/s]


In [23]:
# Random resized crop
decoder = SimpleRGBImageDecoder()

# Data decoding and augmentation
image_pipeline = [decoder,  ToTensor(), ToTorchImage(), ToDevice('cuda:0', non_blocking=True)] #Cutout(),
label_pipeline = [IntDecoder(), ToTensor(), ToDevice('cuda:0')]

# Pipeline for each data field
pipelines = {
    'image': image_pipeline,
    'label': label_pipeline
}

# Replaces PyTorch data loader (`torch.utils.data.Dataloader`)
loader = Loader(write_path, batch_size=128, num_workers=8,
                order=OrderOption.RANDOM, pipelines=pipelines, os_cache=True)

In [24]:
%%timeit -r 3 -n 3
simple_load_times = []
start_time = time.time()
for batch_idx, data in enumerate(loader):
    inputs, labels = data
    
ffcv_time = time.time() - start_time
simple_load_times.append(ffcv_time)
print(str(simple_load_times) + ' sec') 

[1.0405628681182861] sec
[0.19046449661254883] sec
[0.2178652286529541] sec
[0.21071386337280273] sec
[0.21244049072265625] sec
[0.22309231758117676] sec
[0.22395992279052734] sec
[0.22606253623962402] sec
[0.22167515754699707] sec
308 ms ± 124 ms per loop (mean ± std. dev. of 3 runs, 3 loops each)


In [38]:
# Random resized crop
decoder = SimpleRGBImageDecoder()

# Data decoding and augmentation
image_pipeline = [decoder,  ToTensor(), ToTorchImage(), ToDevice('cuda:0', non_blocking=True)] #Cutout(),
label_pipeline = [IntDecoder(), ToTensor(), ToDevice('cuda:0')]

# Pipeline for each data field
pipelines = {
    'image': image_pipeline,
    'label': label_pipeline
}
write_path = '/home/aiteam/tykim/scratch/data_loaders/cifar10_io_test.beton'
# Replaces PyTorch data loader (`torch.utils.data.Dataloader`)
loader = Loader(write_path, batch_size=128, num_workers=8,
                order=OrderOption.QUASI_RANDOM, pipelines=pipelines, os_cache=True)

In [39]:
%%timeit -r 3 -n 3
simple_load_times = []
start_time = time.time()
for batch_idx, data in enumerate(loader):
    inputs, labels = data
    
ffcv_time = time.time() - start_time
simple_load_times.append(ffcv_time)
print(str(simple_load_times) + ' sec') 

[1.009735345840454] sec
[0.1959528923034668] sec
[0.21576571464538574] sec
[0.23236322402954102] sec
[0.22238373756408691] sec
[0.2182614803314209] sec
[0.217071533203125] sec
[0.23358869552612305] sec
[0.2304394245147705] sec
308 ms ± 117 ms per loop (mean ± std. dev. of 3 runs, 3 loops each)


In [None]:
########## 최종 결과 ##########
import numpy as np
import matplotlib.pyplot as plt
 
left = np.array([1, 2, 3, 4])
height = np.array([71, 41.5, 26.2, 8.1])
label = ["OpenCV\n+\nAlbumentations", "jpeg4py\n+\nAlbumentations", "jpeg4py\n+\nKornia", "NVIDIA DALI\n+\nKornia"]
plt.bar(left, height, tick_label=label, align="center")