### Setup

In [None]:
from pathlib import Path

import numpy as np
from PIL import Image
from IPython.display import clear_output

In [None]:
%cd "/content/drive/My Drive/archive/imecc/texture"

### Utils

In [None]:
def get_files(dataset):
    """Get files from a dataset folder."""
    dataset = Path(dataset)
    files = [sorted(dataset.rglob(ext))
           for ext in ["*.png", "*.jpg", "*.bmp", "*.ppm"]]
    files = [file for ext in files for file in ext]
    return files

### Resize Images

In [None]:
def crop(img):
    width, height = img.size
    new_width = min(width, height)
    new_height = min(width, height)
    left = int(np.ceil((width - new_width) / 2))
    right = width - int(np.floor((width - new_width) / 2))
    top = int(np.ceil((height - new_height) / 2))
    bottom = height - int(np.floor((height - new_height) / 2))
    return img.crop((left, top, right, bottom))

def resize(raw_dataset, proc_dataset):
    raw_dataset = Path(raw_dataset)
    proc_dataset = Path(proc_dataset)
    proc_dataset.mkdir(parents=True, exist_ok=True)
    filepaths = get_files(raw_dataset)
    for index, filepath in enumerate(filepaths):
        img = Image.open(filepath)
        img = crop(img)
        img = img.resize((224, 224))
        img.save(proc_dataset / f"{index:04d}.png")
        print(filepath.stem)
        clear_output(wait=True)

In [None]:
resize("bckp/brodatz", "data/brodatz")
resize("bckp/vistex", "data/vistex")
resize("bckp/outex13i", "data/outex13i")
resize("bckp/umd", "data/umd")
resize("bckp/uiuc", "data/uiuc")
resize("bckp/kthtips2b", "data/kthtips2b")
resize("bckp/fmd/image", "data/fmd")
resize("bckp/dtd/images", "data/dtd")

### Create Targets

In [None]:
def gen_target_by_class_id(datapath, targetpath, p, q):
    y = [path.stem[p:q] for path in get_files(datapath)]
    yu = np.unique(y, return_index=True)[1]
    yu = [y[i] for i in sorted(yu)]
    str_to_num = {k:v for k, v in zip(yu, np.arange(len(yu)))}
    y = [str_to_num[yi] for yi in y]
    np.savez_compressed(targetpath, y=y)

def gen_target_by_class_name(datapath, targetpath):
    y = [path.stem.split("_")[0]
       for path in get_files(datapath)]
    yu = np.unique(y, return_index=True)[1]
    yu = [y[i] for i in sorted(yu)]
    C = {k:v for k, v in zip(yu, np.arange(len(yu)))}
    y = [C[yi] for yi in y]
    np.savez_compressed(targetpath, y=y)

In [None]:
gen_target_by_class_id("bckp/brodatz", "data/brodatz_target.npz", 1, 4)
gen_target_by_class_id("bckp/vistex", "data/vistex_target.npz", 1, 4)
gen_target_by_class_id("bckp/outex13i", "data/outex13i_target.npz", 1, 4)
gen_target_by_class_id("bckp/umd", "data/umd_target.npz", 2, 4)
gen_target_by_class_id("bckp/uiuc", "data/uiuc_target.npz", 1, 3)
gen_target_by_class_id("bckp/kthtips2b", "data/kthtips2b_target.npz", 0, 2)
gen_target_by_class_id("bckp/kthtips2b", "data/kthtips2b_group.npz", 2, 3)

In [None]:
gen_target_by_class_name("bckp/fmd/image", "data/fmd_target.npz")
gen_target_by_class_name("bckp/dtd/images", "data/dtd_target.npz")