In [1]:
import numpy as np
import pandas as pd

from PIL import Image, ImageEnhance
import torchvision.transforms as transforms

import os
from tqdm import tqdm 

In [2]:
# the folder from 256_ObjectCategories.tar file
train_dir = '/home/ubuntu/data/256_ObjectCategories/'

# a folder where resized and split data will be stored
data_dir = '/home/ubuntu/data/'

# Load constant train-val split

In [3]:
T = pd.read_csv('../train_val_split/train_metadata.csv')
V = pd.read_csv('../train_val_split/val_metadata.csv')

# Create directories for different categories

In [4]:
os.mkdir(data_dir + 'train')
for i in range(1, 256 + 1):
    os.mkdir(data_dir + 'train/' + str(i))

In [5]:
os.mkdir(data_dir + 'val')
for i in range(1, 256 + 1):
    os.mkdir(data_dir + 'val/' + str(i))

# Resize val. images

In [6]:
val_transform = transforms.Compose([
    transforms.Scale(299, Image.LANCZOS),
    transforms.CenterCrop(299)
])

In [7]:
val_size = len(V)
val_size

5120

In [8]:
# resize RGB images
for i, row in tqdm(V.loc[V.channels == 3].iterrows()):
    # get image
    file_path = os.path.join(train_dir, row.directory, row.img_name)
    image = Image.open(file_path)
    
    # transform it
    image = val_transform(image)
    
    # save
    save_path = os.path.join(data_dir, 'val', str(row.category_number), row.img_name)
    image.save(save_path, 'jpeg')

5033it [01:24, 59.70it/s]


In [9]:
# resize grayscale images
for i, row in tqdm(V.loc[V.channels == 1].iterrows()):
    # get image
    file_path = os.path.join(train_dir, row.directory, row.img_name)
    image = Image.open(file_path)
    
    # transform it
    image = val_transform(image)
    
    # convert to RGB
    array = np.asarray(image, dtype='uint8')
    array = np.stack([array, array, array], axis=2)
    image = Image.fromarray(array)
    
    # save
    save_path = os.path.join(data_dir, 'val', str(row.category_number), row.img_name)
    image.save(save_path, 'jpeg')

87it [00:01, 87.24it/s]


# Resize train images

In [10]:
enhancers = {
    0: lambda image, f: ImageEnhance.Color(image).enhance(f),
    1: lambda image, f: ImageEnhance.Contrast(image).enhance(f),
    2: lambda image, f: ImageEnhance.Brightness(image).enhance(f),
    3: lambda image, f: ImageEnhance.Sharpness(image).enhance(f)
}

factors = {
    0: lambda: np.random.uniform(0.4, 1.6),
    1: lambda: np.random.uniform(0.8, 1.2),
    2: lambda: np.random.uniform(0.8, 1.2),
    3: lambda: np.random.uniform(0.4, 1.6)
}

# randomly enhance images in random order
def enhance(image):
    order = [0, 1, 2, 3]
    np.random.shuffle(order)
    for i in order:
        f = factors[i]()
        image = enhancers[i](image, f)
    return image

In [11]:
train_transform_rare = transforms.Compose([
    transforms.Scale(384, Image.LANCZOS),
    transforms.RandomCrop(299),
    transforms.RandomHorizontalFlip(),
    transforms.Lambda(enhance)
])

train_transform = transforms.Compose([
    transforms.Scale(384, Image.LANCZOS),
    transforms.RandomCrop(299),
    transforms.RandomHorizontalFlip(),
])

In [12]:
# number of images in each category
class_counts = dict(T.category_name.value_counts())
np.save('class_counts.npy', class_counts)

In [13]:
# sample with replacement 100 images from each category
T = T.groupby('category_name', group_keys=False).apply(lambda x: x.sample(n=100, replace=True))
T.reset_index(drop=True, inplace=True)

In [14]:
train_size = len(T)
train_size

25600

In [15]:
# resize RGB images
for i, row in tqdm(T.loc[T.channels == 3].iterrows()):
    # get image
    file_path = os.path.join(train_dir, row.directory, row.img_name)
    image = Image.open(file_path)
    
    # transform it
    if class_counts[row.category_name] < 100:
        image = train_transform_rare(image)
    else:
        image = train_transform(image)
    
    # save
    new_image_name = str(i) + '_' + row.img_name
    save_path = os.path.join(data_dir, 'train', str(row.category_number), new_image_name)
    image.save(save_path, 'jpeg')

25261it [11:57, 35.22it/s]


In [16]:
# resize grayscale images
for i, row in tqdm(T.loc[T.channels == 1].iterrows()):
    # get image
    file_path = os.path.join(train_dir, row.directory, row.img_name)
    image = Image.open(file_path)
    
    # transform it
    if class_counts[row.category_name] < 100:
        image = train_transform_rare(image)
    else:
        image = train_transform(image)
    
    # convert to RGB
    array = np.asarray(image, dtype='uint8')
    array = np.stack([array, array, array], axis=2)
    image = Image.fromarray(array)
    
    # save
    new_image_name = str(i) + '_' + row.img_name
    save_path = os.path.join(data_dir, 'train', str(row.category_number), new_image_name)
    image.save(save_path, 'jpeg')

339it [00:03, 85.92it/s]
