<a href="https://colab.research.google.com/github/yoneken1/colab_pytorch_sample/blob/master/pytorch_dali.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# DALIインストール
Google Colaboratoryの環境に合わせてCuda10版を入れる

In [4]:
!pip install --extra-index-url https://developer.download.nvidia.com/compute/redist/cuda/10.0 nvidia-dali

Looking in indexes: https://pypi.org/simple, https://developer.download.nvidia.com/compute/redist/cuda/10.0
Collecting nvidia-dali
[?25l  Downloading https://developer.download.nvidia.com/compute/redist/cuda/10.0/nvidia-dali/nvidia_dali-0.13.0-853141-cp36-cp36m-manylinux1_x86_64.whl (37.0MB)
[K     |████████████████████████████████| 37.0MB 1.6MB/s 
Installing collected packages: nvidia-dali
Successfully installed nvidia-dali-0.13.0


# MS-COCOのアノテーションデータの準備
ms-cocoを使って試すために、アノテーションデータを落として解凍する

In [0]:
!cd /content/
!wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip
!unzip '/content/annotations_trainval2017.zip'

In [0]:
!cd /content/
!wget http://images.cocodataset.org/zips/val2017.zip
!unzip '/content/val2017.zip'

# PyTorchのDataLoaderを使う

In [0]:
BATCH_SIZE = 4
DATA_LOAD_TIME = 0.01
LEARNING_TIME = 0.1

In [0]:
from pycocotools.coco import COCO
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import numpy as np
import cv2
import time

In [0]:
class CocoDataset(Dataset):

    def __init__(self, dataType='val2017'):

        annFile='/content/annotations/instances_{}.json'.format(dataType)
        self.coco=COCO(annFile)
        self.ids = list(self.coco.imgToAnns.keys())
        self.imgs = self.coco.loadImgs(self.ids)
            
    def __len__(self):
        return len(self.ids)

    def __getitem__(self, idx):
      
        img_path = os.path.join('/content/val2017', self.imgs[idx]['file_name'])
        time.sleep(DATA_LOAD_TIME) #for simulation of very slow file IO
        img = cv2.imread(img_path)
        img = cv2.resize(img, (512,512))
        return img


In [0]:
  def collate_fn(batch):
    imgs = [x for x in batch]
    return imgs
    

In [180]:
coco_dataset = CocoDataset()
coco_dataloader = DataLoader(coco_dataset, num_workers=8, batch_size=BATCH_SIZE, collate_fn=collate_fn)

loading annotations into memory...
Done (t=0.41s)
creating index...
index created!


In [181]:
data_iter = iter(coco_dataloader)

start = time.time()
for _ in range(100):
  images = next(data_iter)
  time.sleep(LEARNING_TIME) #for simulation of DeepLearning
end = time.time()

print('total_time : %s' % ( str(end-start) ))

total_time : 11.102628707885742


# DALIのサンプルに従う場合

In [0]:
from __future__ import division
import types
import collections
import numpy as np
from random import shuffle
from nvidia.dali.pipeline import Pipeline
import nvidia.dali.ops as ops            
import nvidia.dali.types as types


In [0]:
class ExternalInputIterator(object):
    def __init__(self, batch_size, dataType='val2017'):

        annFile='/content/annotations/instances_{}.json'.format(dataType)
        self.coco=COCO(annFile)
        self.ids = list(self.coco.imgToAnns.keys())
        self.imgs = self.coco.loadImgs(self.ids)

        self.batch_size = batch_size
        self.data_set_len = len(self.imgs) 
        self.n = len(self.imgs)

    def __iter__(self):
        self.i = 0
        return self

    def __next__(self):
        batch = []

        if self.i >= self.n:
            raise StopIteration

        for _ in range(self.batch_size):
            img_path = os.path.join('/content/val2017', self.imgs[self.i]['file_name'])
            f = open(img_path, 'rb')
            time.sleep(DATA_LOAD_TIME) #for simulation of very slow file IO
            batch.append(np.frombuffer(f.read(), dtype = np.uint8))
            self.i = (self.i + 1) % self.n
        return batch

    @property
    def size(self,):
        return self.data_set_len

    next = __next__

In [0]:
class ExternalSourcePipeline(Pipeline):
    def __init__(self, batch_size, num_threads, device_id, external_data):
        super(ExternalSourcePipeline, self).__init__(batch_size,
                                      num_threads,
                                      device_id,
                                      seed=12)
        self.input = ops.ExternalSource()
        self.decode = ops.ImageDecoder(device = "mixed", output_type = types.RGB)
        self.res = ops.Resize(device="gpu", resize_x=512, resize_y=512)
        self.external_data = external_data
        self.iterator = iter(self.external_data)

    def define_graph(self):
        self.jpegs = self.input()
        images = self.decode(self.jpegs)
        images = self.res(images)
        return images

    def iter_setup(self):
        try:
            images = self.iterator.next()
            self.feed_input(self.jpegs, images)
        except StopIteration:
            self.iterator = iter(self.external_data)
            raise StopIteration

In [188]:
from nvidia.dali.plugin.pytorch import DALIGenericIterator

eii = ExternalInputIterator(batch_size=BATCH_SIZE)
pipe = ExternalSourcePipeline(batch_size=BATCH_SIZE, num_threads=2, device_id = 0,
                              external_data = eii)
pii = DALIGenericIterator(pipe, ['image'], size=100*BATCH_SIZE, last_batch_padded=True, fill_last_batch=False)

start = time.time()
for i, data in enumerate(pii):
    images = data[0]["image"]
    time.sleep(LEARNING_TIME) #for simulation of DeepLearning
end = time.time()

print('total_time : %s' % ( str(end-start) ))

loading annotations into memory...
Done (t=1.09s)
creating index...
index created!
total_time : 14.16463017463684


# PyTorch DataLoader × DALI

In [0]:
class CocoDatasetForDALI(Dataset):

    def __init__(self, dataType='val2017'):

        annFile='/content/annotations/instances_{}.json'.format(dataType)
        self.coco=COCO(annFile)
        self.ids = list(self.coco.imgToAnns.keys())
        self.imgs = self.coco.loadImgs(self.ids)
            
    def __len__(self):
        return len(self.ids)

    def __getitem__(self, idx):
      
        img_path = os.path.join('/content/val2017', self.imgs[idx]['file_name'])
        f = open(img_path, 'rb')
        time.sleep(DATA_LOAD_TIME) #for simulation of very slow file IO
        img = np.frombuffer(f.read(), dtype = np.uint8)
        return img


In [0]:
class ExternalSourcePipelineForPytorch(Pipeline):
    def __init__(self, batch_size, num_threads, device_id, external_data):
        super(ExternalSourcePipelineForPytorch, self).__init__(batch_size,
                                      num_threads,
                                      device_id,
                                      seed=12)
        self.input = ops.ExternalSource()
        self.decode = ops.ImageDecoder(device = "mixed", output_type = types.RGB)
        self.res = ops.Resize(device="gpu", resize_x=512, resize_y=512)
        self.external_data = external_data
        self.iterator = iter(self.external_data)

    def define_graph(self):
        self.jpegs = self.input()
        images = self.decode(self.jpegs)
        images = self.res(images)
        return images

    def iter_setup(self):
        try:
            images = next(self.iterator)
            self.feed_input(self.jpegs, images)
        except StopIteration:
            self.iterator = iter(self.external_data)
            raise StopIteration

In [191]:
coco_dataset_for_dali = CocoDatasetForDALI()
coco_dataloader_for_dali = DataLoader(coco_dataset_for_dali, num_workers=8, batch_size=BATCH_SIZE, collate_fn=collate_fn)

pipe = ExternalSourcePipelineForPytorch(batch_size=BATCH_SIZE, num_threads=2, device_id = 0,
                              external_data = coco_dataloader_for_dali)
pii = DALIGenericIterator(pipe, ['image'], size=100*BATCH_SIZE, last_batch_padded=True, fill_last_batch=False)

start = time.time()
for i, data in enumerate(pii):
    images = data[0]["image"]
    time.sleep(LEARNING_TIME) #for simulation of DeepLearning
end = time.time()

print('total_time : %s' % ( str(end-start) ))

loading annotations into memory...
Done (t=1.04s)
creating index...
index created!
total_time : 10.300386428833008
