# Create single (scaled) samples

In [1]:
import sys
import os
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm

In [2]:
# Source input directory with complete Painter by Numbers dataset
input_dir_1 = os.getcwd() + '/data/painters/train/'
input_dir_2 = os.getcwd() + '/data/painters/test/'

In [3]:
# Output directory for 12 crops
output_dir = os.getcwd() + '/data/single_crop/'
if not os.path.exists(output_dir): os.makedirs(output_dir)

In [4]:
def create_corner_crops(f):
    s = 299           # Crop size
    i = Image.open(f) # Open image
    i = i.convert(mode='RGB')
    w = i.width       # Image width
    h = i.height      # Image height
    ws = w - s        # Image width - crop size
    hs = h - s        # Image height - crop size
    
    # Fix images which are too small
    if w < s:
        if w <= h:
            i = i.resize((s,int((s/w)*h)), resample=True)
            w = i.width       # New image width
            h = i.height      # New image height
            ws = w - s        # New image width - crop size
            hs = h - s        # New image height - crop size
    if h < s:
        i = i.resize((int((s/h)*w),s), resample=True)
        w = i.width       # New image width
        h = i.height      # New image height
        ws = w - s        # New image width - crop size
        hs = h - s        # New image height - crop size

    # Detect image orientation
    if (w <= h): 
        l = i.crop(box=(0,int((h-w)/2),w,int(((h-w)/2)+w))) # Portrait orientation (or square)
    else:
        l = i.crop(box=(int((w-h)/2),0,int(((w-h)/2)+h),h)) # Landscape orientation

    x1 = l.resize((s,s), resample=True) # Full painting cropped to square

    return x1

In [5]:
# Load dataset selection
img_details = pd.read_csv(os.getcwd() + '/data/balanced_256_split.csv')

file_names = list(img_details['filename'].values)

set_labels = list(img_details['set'].values)
set_folders = np.unique(list(img_details['set']))

artist_labels = list(img_details['artist'].values)
artist_folders = np.unique(list(img_details['artist']))

print('Files total:', len(file_names))
print('Artists:', len(artist_folders))
print('Sets:', len(set_folders))

Files total: 17664
Artists: 69
Sets: 3


In [7]:
# Create output set folders and artist folders within
for set_path in set_folders:
    if not os.path.exists(output_dir + set_path + '/'):
        os.makedirs(output_dir + set_path + '/')
        
    for artist_path in artist_folders:
        if not os.path.exists(output_dir + set_path + '/' + artist_path + '/'):
            os.makedirs(output_dir + set_path + '/' + artist_path + '/')

In [None]:
for f in tqdm(range(len(file_names))):
    
    file_id = int(file_names[f][:-4])
    current_set = set_labels[f]
    current_artist = artist_labels[f]
    
    # Locate file in train or test folder of Painter by Numbers dataset
    if os.path.isfile(input_dir_1 + file_names[f]):
        input_dir = input_dir_1
    else:
        input_dir = input_dir_2
    
    f_x1 = '%s/%d.jpg'%(output_dir + current_set + '/' + current_artist, file_id)
    
    # skip if files exist
    if os.path.isfile(f_x1): continue
        
    try:
        x1 = create_corner_crops(input_dir + file_names[f])
        
        x1.save(f_x1, quality=100)

    except:
        print('!', file_names[f], sys.exc_info()[0], sys.exc_info()[1])