In [None]:
import os
import scipy.misc
import numpy as np
import matplotlib.pyplot as plt

In [None]:
def mkdir_if_not_exists(path):
    '''
    Create directory if it does not exist.
        path:           Path of directory.
    '''
    if not os.path.exists(path):
        os.mkdir(path)

In [None]:
def get_images_name_in_directory(path):
    '''
    Get file name of all images recursively in directory filtered by extension list.
        path: Path of directory contains images.
    Return file name of images in selected directory.
    '''
    images_name_in_directory = []
    image_extensions = ['.png', '.jpg']
    
    for root_path, directory_names, file_names in os.walk(path):
        for file_name in file_names:
            lower_file_name = file_name.lower()
            if any(map(lambda image_extension: 
                       lower_file_name.endswith(image_extension), 
                       image_extensions)):
                images_name_in_directory.append(file_name)

    return images_name_in_directory

In [None]:
def load_unnormalized_image(path):
    '''
    Load a RGB image and do not normalize. Each intensity value is from 
    0 to 255 and then it is converted into 32-bit float.
        path: Path of image file.
    Return image array.
    '''
    return scipy.misc.imread(path, mode = 'RGB').astype(np.float32)

In [None]:
def save_unnormalized_image(image, path):
    '''
    Merge multiple unnormalized images into one and save it.
        image:  Unnormalized image array. The intensity values range
                from 0 to 255. Format: [height, width, channels]
        path:   Path of image.
    '''
    # Attention: Here we should not use the following way to save image.
    #     scipy.misc.imsave(path, image)
    # Because it automatically scale the intensity value in merged_image
    # from [min(image), max(image)] to [0, 255]. It should be
    # the reason behind the issue reported by Kwonjoon Lee, which states 
    # the intensity value in demo in ICL/IGM paper is much near 0 or 255.
    scipy.misc.toimage(arr = image, cmin = 0, cmax = 255).save(path)

In [None]:
def center_crop(image, cropped_height, cropped_width):
    '''
    Crop the center part of image.
        image:          Unnormalized image array. The intensity values range
                        from 0 to 255. Format: [height, width, channels]
        cropped_height: Height of cropped part.
        cropped_width:  Width of cropped part.
    Return array of cropped part.
    '''
    image_height, image_width = image.shape[:2]
    
    cropped_in_image_height = int(round((image_height - cropped_height) / 2.))
    cropped_in_image_width = int(round((image_width - cropped_width) / 2.))
    
    cropped = image[cropped_in_image_height : cropped_in_image_height + cropped_height, 
                    cropped_in_image_width : cropped_in_image_width + cropped_width]
    
    return cropped

In [None]:
def resize(image, resized_height, resized_width):
    '''
    Resize the image.
        image:          Image array.
        resized_height: Height of resized part.
        resized_width:  Width of resized part.
    Return array of resized part.
    '''
    return scipy.misc.imresize(image, [resized_height, resized_width])

In [None]:
# Create directories.
mkdir_if_not_exists('./dataset')
mkdir_if_not_exists('./dataset/celeba')

In [None]:
# Download CelebA dataset.
!wget -P ./dataset/celeba https://cseweb.ucsd.edu/~weijian/static/celeba/img_align_celeba.zip

In [None]:
# Extract images.
!unzip -q ./dataset/celeba/img_align_celeba.zip -d ./dataset/celeba
!mv ./dataset/celeba/img_align_celeba ./dataset/celeba/original

In [None]:
# Crop and resize images.
mkdir_if_not_exists('./dataset/celeba/cropped')
original_images_name = get_images_name_in_directory('./dataset/celeba/original/')
for (i, original_image_name) in enumerate(original_images_name):
    original_image_path = os.path.join('./dataset/celeba/original', original_image_name)
    original_image = load_unnormalized_image(original_image_path)
    cropped_image = resize(center_crop(image = original_image, 
                                       cropped_height = 108, 
                                       cropped_width = 108),
                           resized_height = 64,
                           resized_width = 64)
    cropped_image_path = os.path.join('./dataset/celeba/cropped', original_image_name)
    save_unnormalized_image(cropped_image, cropped_image_path)
    if i % 1000 == 0:
        print("Cropping and resizing {} images...".format(i))