# Creating Thumbnails of Images - Max  Dimension $\leq 256$

If an image is originally square with dimensions greater than 256x256, it will simply be resized to be 256x256; on the other hand, if an image is rectangular, say 460x780, it will first be resized so that the largest side length is 256, while maintaining the same aspect ratio. So, it will first be resized so that 780 becomes 256, and keeping the ratio, 460 will become $\approx 151$.

In [14]:
from PIL import Image
import glob, os

size = 256, 256

input_image_dir = 'images'
output_thumbnail_dir = 'thumbnails'

for infile in glob.glob(input_image_dir + '/*.jpg'):
    filename, ext = os.path.splitext(infile)
    basename = os.path.basename(filename)
    im = Image.open(infile)
    im.thumbnail(size)
    im.save(output_thumbnail_dir + '/' + basename + '_thumb.jpg', 'JPEG')

# Resizing and Cropping Images to Exactly 256x256

The next script _first_ resizes images such that the _shortest_ side becomes 256 pixels, and then crops the central regions of the images to make the final images exactly 256x256.

In [4]:
from __future__ import division

import math
import glob, os

from PIL import Image

side_length = 256
#side_length = 128

input_image_dir = 'images'
output_square_dir = 'square_images2'
#output_square_dir = 'square_images128'

for infile in glob.glob(input_image_dir + '/*.jpg'):
    filename, ext = os.path.splitext(infile)
    basename = os.path.basename(filename)
    
    im = Image.open(infile)
    
    [width, height] = im.size
    
    if width < height:
        ratio = height / width
        new_width = side_length
        new_height = int(math.ceil(ratio * new_width))
    else:
        ratio = width / height
        new_height = side_length
        new_width = int(math.ceil(ratio * new_height))
    
    resized_im = im.resize((new_width, new_height), Image.ANTIALIAS)
    
    if new_width != side_length:
        left_bound = int((new_width - side_length) / 2)
        upper_bound = 0
        right_bound = int((new_width + side_length) / 2)
        lower_bound = side_length
    else:
        left_bound = 0
        upper_bound = int((new_height - side_length) / 2)
        right_bound = side_length
        lower_bound = int((new_height + side_length) / 2)
        
    cropped_im = resized_im.crop((left_bound, upper_bound, right_bound, lower_bound))
    
    cropped_im.save(output_square_dir + '/' + basename + '_square.jpg', 'JPEG')

# Creating Tiny 16x16 (or 32x32) Images by Simple Resizing

This is the simplest possible method, which will squash aspect ratios.

In [107]:
from PIL import Image
import glob, os

size = 32, 32

input_image_dir = 'images'
output_tiny_dir = 'tiny_images_32'

for infile in glob.glob(input_image_dir + '/*.jpg'):
    filename, ext = os.path.splitext(infile)
    basename = os.path.basename(filename)
    im = Image.open(infile)
    resized_im = im.resize(size)
    resized_im.save(output_tiny_dir + '/' + basename + '_tiny.jpg', 'JPEG')

# Grouping Oxford 17 Flower Dataset Images into Folders for each Class

Here, we create the following directory structure:

For each class CLASS_K, we create a folder titled CLASS_K (using underscores when the class name contains spaces) and put all images of that class into that new folder.

We take advantage of the fact that in the 17-class dataset, each class consists of exactly 80 images, and all images of a particular class appear _consecutively_ in the original dataset. Thus, we can simply take chunks of 80-images each and create new folders for each of them. The only complication is that the dataset does not seem to come with an easy way to tell which class an image belongs to; I have manually determined the correct order of the classes for the following code.

In [None]:
import glob, os

classes = [
    'daffodil', 
    'snowdrop',
    'lily_valley',
    'bluebell',
    'crocus',
    'iris',
    'tigerlily',
    'tulip',
    'fritillary',
    'sunflower',
    'daisy',
    'colts_foot',
    'dandelion',
    'cowslip',
    'buttercup',
    'windflower',
    'pansy'
]

input_image_dir = 'square_images128'
files_per_class = 80

full_filenames = sorted(glob.glob(input_image_dir + '/*.jpg'))
filenames = [os.path.basename(full) for full in full_filenames]

for (class_index, class_name) in enumerate(classes):
    os.mkdir(input_image_dir + '/' + class_name)
    
    from_index = files_per_class * class_index
    to_index = files_per_class * class_index + files_per_class
    
    files_to_move = filenames[from_index:to_index]
    
    for filename in files_to_move:
        os.rename(input_image_dir + '/' + filename,
                  input_image_dir + '/' + class_name + '/' + filename)

# Create `train.txt` and `test.txt` files for Caffe

In [52]:
from __future__ import division

import os
import random

base_dir = 'square_images2'
percentage_test_data = 0.20 # This means 20%

output_train_file = 'train.txt'
output_test_file = 'test.txt'


classes = ['daffodil',  'snowdrop', 'lily_valley', 'bluebell', 'crocus', 'iris', 
           'tigerlily', 'tulip', 'fritillary', 'sunflower', 'daisy', 'colts_foot', 
           'dandelion', 'cowslip', 'buttercup', 'windflower', 'pansy']

class_dict = { class_name: index for (index, class_name) in enumerate(classes)}


folders = os.listdir(base_dir)
folders = [folder for folder in folders if folder != '.DS_Store']

file_path_class_dict = dict()

for folder_name in folders:
    # We look up the class number based on the name of the folder the image is in.
    # This maps a folder name like 'daffodil' to a class number like 0.
    class_index = class_dict[folder_name]
    
    file_names = os.listdir(base_dir + '/' + folder_name)
    file_names = [name for name in file_names if name != '.DS_Store']
    
    for file_name in file_names:
        full_file_path = os.path.abspath(folder_name + '/' + file_name)
        file_path_class_dict[full_file_path] = class_index

print(len(file_path_class_dict))

file_path_class_items = file_path_class_dict.items()
random.shuffle(file_path_class_items)

num_files = len(file_path_class_items)
num_test = int(math.floor(num_files * percentage_test_data))
num_train = num_files - num_test

# Write the train.txt file
train_f = open(output_train_file, 'w')
for (file_path, class_index) in file_path_class_items[:num_train]:
    train_f.write("{0} {1}\n".format(file_path, class_index))
train_f.close()

# Write the test.txt file
test_f = open(output_test_file, 'w')
for (file_path, class_index) in file_path_class_items[num_train:]:
    test_f.write("{0} {1}\n".format(file_path, class_index))
test_f.close()

1360
