In [1]:
from __future__ import print_function, division
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

plt.ion()   # interactive mode

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import sys
sys.path.insert(0, '../')

In [4]:
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

# Saving cropped images

### Loading the dataset

In [5]:
from cpe775.dataset import FaceLandmarksDataset
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision.transforms import Compose
from cpe775.transforms import ToTensor, CropFace, ToGray

In [35]:
# load the dataset
dataset = FaceLandmarksDataset(csv_file='../data/train.csv',
                               root_dir='../data/',
                               transform=CropFace())

In [7]:
crop_face = CropFace()

images = []
landmarks = []
for idx in range(len(dataset)):
    sample = dataset[idx]
    
    images.append(sample['image'])
    landmarks.append(sample['landmarks'])

In [18]:
np_images = np.stack([np.array(img) for img in images], axis=0)
np_landmarks = np.stack([np.array(land) for land in landmarks], axis=0)

### Saving in npz format

In [19]:
np_images.shape

(3147, 256, 256, 3)

In [20]:
np_landmarks.shape

(3147, 68, 2)

In [21]:
np.savez('../data/train.npz', images=np_images, landmarks=np_landmarks)

### Saving in hdf5 format

In [50]:
import h5py

In [51]:
f = h5py.File('../data/train.h5', 'w')
f['images'] = np_images
f['landmarks'] = np_landmarks
f.close()

## Loading the CroppedFaceLandmarksDataset

In [64]:
from cpe775.dataset import CroppedFaceLandmarksDataset
from cpe775.transforms import ToPILImage

In [66]:
cropped_dataset = CroppedFaceLandmarksDataset('../data/train.npz',
                                              transform=ToPILImage())
hdf5_cropped_dataset = CroppedFaceLandmarksDataset('../data/train.h5',
                                                   transform=ToPILImage())

In [48]:
%timeit cropped_dataset[np.random.randint(len(cropped_dataset))]

131 µs ± 1.02 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [41]:
%timeit dataset[np.random.randint(len(dataset))]

34.8 ms ± 10.4 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [67]:
%timeit hdf5_cropped_dataset[np.random.randint(len(hdf5_cropped_dataset))]

20.4 ms ± 9.56 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
