### Setup

In [None]:
import bcolz, glob, os, numpy as np, pickle, random, shutil, sys

from PIL import Image

In [None]:
seed = 708970590
np.random.seed(seed)

In [None]:
new_size = 72

In [None]:
CURRENT_DIR = os.getcwd()
print("Current directory:", CURRENT_DIR)

In [None]:
DATA_DIR = os.path.join(CURRENT_DIR, 'data', 'imagenet')
print("Data directory:", DATA_DIR)

### Find and permute ordering of all filenames

In [None]:
fnames = list(glob.iglob(os.path.join(DATA_DIR, 'sample', 'original', '*', '*.JPEG')))
fnames = np.random.permutation(fnames)
print(fnames[:10])
num_images = len(fnames)
print('Found {} images'.format(num_images))

### Prepare directory structure for resized images

In [None]:
RESIZED_DIR = os.path.join(DATA_DIR, 'sample', 'resized-' + str(new_size))
dirnames = list(glob.iglob(os.path.join(DATA_DIR, 'sample', 'original', '*')))
dirnames = [os.path.join(RESIZED_DIR, os.path.basename(dirname)) for dirname in dirnames]
for dirname in dirnames:
    if not os.path.exists(dirname):
        os.makedirs(dirname)
num_classes = len(dirnames)
print('Number of classes: {}'.format(num_classes))

### Prepare bcolz array for resized images

In [None]:
bc_path = os.path.join(DATA_DIR, 'sample', 'resized-' + str(new_size) + '.bc')
bc_arr = bcolz.carray(np.empty((0, new_size, new_size, 3), 'float32'), chunklen=16, mode='w', rootdir=bc_path)
bc_arr.shape

### Dump list of filenames as per permuted ordering

In [None]:
def new_tail(fname):
    head, tail = os.path.split(fname)
    return os.path.join(os.path.basename(head), tail)

relative_fnames = [new_tail(fname) for fname in fnames]
with open(os.path.join(DATA_DIR, 'sample', 'resized-' + str(new_size) + '.filenames.pkl'), 'wb') as file:
    pickle.dump(relative_fnames, file)

### Resize images

In [None]:
def new_path(basedir, fname):
    head, tail = os.path.split(fname)
    return os.path.join(basedir, os.path.basename(head), tail)

def resize_image(fname, size):
    img = Image.open(fname)
    s = np.array(img).shape
    if len(s) != 3 or s[2] != 3:
        return None
    shortest = min(img.width, img.height)
    resized = np.round(np.multiply(size / shortest, img.size)).astype(int)
    img = img.resize(resized, Image.BILINEAR)
    img = img.crop((0, 0, size, size))
    return img

processed = 0

try:
    for fname in fnames:
        img = resize_image(fname, new_size)
        processed = processed + 1
        if processed % 1000 == 0:
            print('Processed {} of {} images'.format(processed, num_images))
        if img is not None:
            img_arr = np.array(img)
            bc_arr.append(np.array(img))
            img.save(new_path(RESIZED_DIR, fname))
except KeyboardInterrupt:
    pass

bc_arr.flush()
print('Processed {} images'.format(processed))