In [1]:
import os
import scipy.misc
import numpy as np
import matplotlib.pyplot as plt

In [2]:
def mkdir_if_not_exists(path):
    '''
    Create directory if it does not exist.
        path:           Path of directory.
    '''
    if not os.path.exists(path):
        os.mkdir(path)

In [3]:
def get_images_name_in_directory(path):
    '''
    Get file name of all images recursively in directory filtered by extension list.
        path: Path of directory contains images.
    Return file name of images in selected directory.
    '''
    images_name_in_directory = []
    image_extensions = ['.png', '.jpg']
    
    for root_path, directory_names, file_names in os.walk(path):
        for file_name in file_names:
            lower_file_name = file_name.lower()
            if any(map(lambda image_extension: 
                       lower_file_name.endswith(image_extension), 
                       image_extensions)):
                images_name_in_directory.append(file_name)

    return images_name_in_directory

In [4]:
def load_unnormalized_image(path):
    '''
    Load a RGB image and do not normalize. Each intensity value is from 
    0 to 255 and then it is converted into 32-bit float.
        path: Path of image file.
    Return image array.
    '''
    return scipy.misc.imread(path, mode = 'RGB').astype(np.float32)

In [5]:
def save_unnormalized_image(image, path):
    '''
    Merge multiple unnormalized images into one and save it.
        image:  Unnormalized image array. The intensity values range
                from 0 to 255. Format: [height, width, channels]
        path:   Path of image.
    '''
    # Attention: Here we should not use the following way to save image.
    #     scipy.misc.imsave(path, image)
    # Because it automatically scale the intensity value in merged_image
    # from [min(image), max(image)] to [0, 255]. It should be
    # the reason behind the issue reported by Kwonjoon Lee, which states 
    # the intensity value in demo in ICL/IGM paper is much near 0 or 255.
    scipy.misc.toimage(arr = image, cmin = 0, cmax = 255).save(path)

In [6]:
def center_crop(image, cropped_height, cropped_width):
    '''
    Crop the center part of image.
        image:          Unnormalized image array. The intensity values range
                        from 0 to 255. Format: [height, width, channels]
        cropped_height: Height of cropped part.
        cropped_width:  Width of cropped part.
    Return array of cropped part.
    '''
    image_height, image_width = image.shape[:2]
    
    cropped_in_image_height = int(round((image_height - cropped_height) / 2.))
    cropped_in_image_width = int(round((image_width - cropped_width) / 2.))
    
    cropped = image[cropped_in_image_height : cropped_in_image_height + cropped_height, 
                    cropped_in_image_width : cropped_in_image_width + cropped_width]
    
    return cropped

In [7]:
def resize(image, resized_height, resized_width):
    '''
    Resize the image.
        image:          Image array.
        resized_height: Height of resized part.
        resized_width:  Width of resized part.
    Return array of resized part.
    '''
    return scipy.misc.imresize(image, [resized_height, resized_width])

In [8]:
# Create directories.
mkdir_if_not_exists('./dataset')
mkdir_if_not_exists('./dataset/celeba')

In [9]:
# Download CelebA dataset.
!wget -P ./dataset/celeba https://cseweb.ucsd.edu/~weijian/static/celeba/img_align_celeba.zip

--2017-08-30 17:00:03--  https://cseweb.ucsd.edu/~weijian/static/celeba/img_align_celeba.zip
Resolving cseweb.ucsd.edu (cseweb.ucsd.edu)... 132.239.8.30
Connecting to cseweb.ucsd.edu (cseweb.ucsd.edu)|132.239.8.30|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1443490838 (1.3G) [application/zip]
Saving to: ‘./dataset/celeba/img_align_celeba.zip’


2017-08-30 17:00:16 (102 MB/s) - ‘./dataset/celeba/img_align_celeba.zip’ saved [1443490838/1443490838]



In [10]:
# Extract images.
!unzip -q ./dataset/celeba/img_align_celeba.zip -d ./dataset/celeba
!mv ./dataset/celeba/img_align_celeba ./dataset/celeba/original

In [11]:
# Crop and resize images.
mkdir_if_not_exists('./dataset/celeba/cropped')
original_images_name = get_images_name_in_directory('./dataset/celeba/original/')
for (i, original_image_name) in enumerate(original_images_name):
    original_image_path = os.path.join('./dataset/celeba/original', original_image_name)
    original_image = load_unnormalized_image(original_image_path)
    cropped_image = resize(center_crop(image = original_image, 
                                       cropped_height = 108, 
                                       cropped_width = 108),
                           resized_height = 64,
                           resized_width = 64)
    cropped_image_path = os.path.join('./dataset/celeba/cropped', original_image_name)
    save_unnormalized_image(cropped_image, cropped_image_path)
    if i % 1000 == 0:
        print("Cropping and resizing {} images...".format(i))

Cropping and resizing 0 images...
Cropping and resizing 1000 images...
Cropping and resizing 2000 images...
Cropping and resizing 3000 images...
Cropping and resizing 4000 images...
Cropping and resizing 5000 images...
Cropping and resizing 6000 images...
Cropping and resizing 7000 images...
Cropping and resizing 8000 images...
Cropping and resizing 9000 images...
Cropping and resizing 10000 images...
Cropping and resizing 11000 images...
Cropping and resizing 12000 images...
Cropping and resizing 13000 images...
Cropping and resizing 14000 images...
Cropping and resizing 15000 images...
Cropping and resizing 16000 images...
Cropping and resizing 17000 images...
Cropping and resizing 18000 images...
Cropping and resizing 19000 images...
Cropping and resizing 20000 images...
Cropping and resizing 21000 images...
Cropping and resizing 22000 images...
Cropping and resizing 23000 images...
Cropping and resizing 24000 images...
Cropping and resizing 25000 images...
Cropping and resizing 260