## Speed comparison between fast.ai and torchvision dataloaders. Using cifar-10 dataset with WideResNet22 as model.

In [1]:
# sources:
#  https://github.com/fastai/fastai/blob/master/courses/dl2/cifar10-dawn.ipynb
#  https://github.com/fastai/imagenet-fast/blob/master/cifar10/cifar10-super-convergence-tuned.ipynb
#  https://github.com/radekosmulski/machine_learning_notebooks/blob/master/cifar10_fastai_dawnbench.ipynb

In [None]:
# my hardware:
#    Intel i7-7700K@4.2GHz x 8(CPU) (actually 4 cores), 32GB of RAM
#    Zotac GeForce 1080Ti 11GB
#    MSI Z170I Gaming Pro motherboard

In [1]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [2]:
from fastai.conv_learner import *
from fastai.models.cifar10.wideresnet import wrn_22
torch.backends.cudnn.benchmark = True
PATH = Path("data/cifar10_dirs_by_classes/")

In [3]:
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
means,stdevs = (0.4914, 0.48216, 0.44653), (0.24703, 0.24349, 0.26159)
stats = (np.array(means), np.array(stdevs))

bs=512
sz=32
wd=1e-4
lr=1.5
nw=num_cpus()

## ver1: with fast.ai dataloaders

In [4]:
tfms = tfms_from_stats(stats, sz, aug_tfms=[RandomCrop(sz), RandomFlip()], pad=sz//8)
data = ImageClassifierData.from_paths(PATH, val_name='test', tfms=tfms, bs=bs, num_workers=nw)

In [5]:
m = wrn_22()

In [6]:
learn = ConvLearner.from_model_data(m, data)
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]

In [7]:
%time learn.fit(lr, 1, wds=wd, cycle_len=2, use_clr_beta=(20,20,0.95,0.85))

epoch      trn_loss   val_loss   accuracy                 
    0      1.528044   1.415433   0.491     
    1      1.071998   0.902383   0.673                    

CPU times: user 1min 9s, sys: 36.6 s, total: 1min 45s
Wall time: 1min 29s


[array([0.90238]), 0.673]

## ver2: with torchvision dataloaders

In [4]:
from torchvision import transforms as tvtf, datasets as tvds
from torch.utils.data import DataLoader as tdl

In [9]:
def get_loaders(bs, num_workers):
    traindir, valdir = str(PATH/'train'), str(PATH/'test')
    tfms = [tvtf.ToTensor(), tvtf.Normalize(means,stdevs)]
    aug_tfms = tvtf.Compose([tvtf.RandomCrop(sz, padding=4), tvtf.RandomHorizontalFlip()] + tfms)

    train_dataset = tvds.ImageFolder(traindir, aug_tfms)
    val_dataset = tvds.ImageFolder(valdir, tvtf.Compose(tfms))
    aug_dataset = tvds.ImageFolder(valdir, aug_tfms)

    train_loader = tdl(train_dataset, batch_size=bs, shuffle=True, num_workers=num_workers, pin_memory=True)
    val_loader = tdl(val_dataset, batch_size=bs, shuffle=False, num_workers=num_workers, pin_memory=True)
    aug_loader = tdl(aug_dataset, batch_size=bs, shuffle=False, num_workers=num_workers, pin_memory=True)

    return train_loader, val_loader, aug_loader

def get_data(bs, num_workers):
    trn_dl, val_dl, aug_dl = get_loaders(bs, num_workers)
    d = ModelData(PATH, trn_dl, val_dl)
    d.aug_dl = aug_dl
    d.sz=sz
    return d

data = get_data(bs, nw)

In [10]:
m = wrn_22()

In [11]:
learn = ConvLearner.from_model_data(m, data)
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]

In [12]:
%time learn.fit(lr, 1, wds=wd, cycle_len=2, use_clr_beta=(20,20,0.95,0.85))

epoch      trn_loss   val_loss   accuracy                 
    0      1.580392   1.649794   0.4413    
    1      1.1049     0.96543    0.6573                   

CPU times: user 54.7 s, sys: 24.4 s, total: 1min 19s
Wall time: 1min 19s


[array([0.96543]), 0.6573]

## Conclusion

Average run times:


ver1 = 90sec


ver2 = 80sec
