In [1]:
%matplotlib inline
import os
import sys
sys.path.append("..")
import time
import numpy as np
import h5py

from joblib import Parallel, delayed
from skimage.io import imread

from canon.pattern import LatentExtractor, RescaleExtractor
from canon.util import split_workload

Using TensorFlow backend.


In [2]:
def process_img(img_files, model_name):
    if model_name == "rescale":
        extractor = RescaleExtractor((16, 16))
    else:
        extractor = LatentExtractor(model_name)
    img_data = np.array([imread(f) for f in img_files])
    img_idx = np.array([[int(f[-9:-4])] for f in img_files])
    codes = extractor.features(img_data)
    return np.hstack([img_idx, codes])

def extract_features(jpg_dir, model_name):
    dir_path = os.path.join("img", jpg_dir)
    filenames = [os.path.join(dir_path, filename) for filename in os.listdir(dir_path)
                 if (not filename[0] == '.') and filename[-4:] == ".jpg"]
    print('Found %d files in the directory %s.' % (len(filenames), dir_path))

    fn_grps = split_workload(filenames, 32)

    with Parallel(n_jobs=-1, verbose=1) as parallel:
        data = parallel(delayed(process_img)(grp, model_name) for grp in fn_grps)
        data = np.vstack(data)
        data = data[data[:,0].argsort()]
        print("Loaded a data of shape {}".format(data.shape))
    
    return data

In [3]:
t0 = time.time()

# jpg_dir = "C5_30x30"
jpg_dir = "BTO_01_m_zoom"

models = ['ae_conv_4_256_best']

features = []
for model in models:
    print("Extracting features from {} using {} ...".format(jpg_dir, model))
    data = extract_features(jpg_dir, model)
    features.append(data)

hdf5_file = jpg_dir +'.hdf5'
print("Save {} datasets to {}".format(len(features), hdf5_file))
with h5py.File(os.path.join("features", hdf5_file), 'w') as h5f:
    for m, d in zip(models, features):
        print(m, d.shape)
        h5f.create_dataset(m, data=d)
with h5py.File(os.path.join("features", hdf5_file), 'r') as h5f:
    print("Final datasets in {}: {}".format(hdf5_file, list(h5f.keys())))
    
print("%.2f" % (time.time() - t0))

Extracting features from BTO_01_m_zoom using ae_conv_4_256_best ...
Found 5712 files in the directory img\BTO_01_m_zoom.


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.


Loaded a data of shape (5712, 257)
Save 1 datasets to BTO_01_m_zoom.hdf5
ae_conv_4_256_best (5712, 257)
Final datasets in BTO_01_m_zoom.hdf5: ['ae_conv_4_256_best']
29.27


[Parallel(n_jobs=-1)]: Done  32 out of  32 | elapsed:   29.1s finished


In [4]:
with h5py.File(os.path.join("features", hdf5_file), 'r') as h5f:
    for k in h5f.keys():
        print(k)
        data = np.array(h5f.get(k))
        print(data.shape)

ae_conv_4_256_best
(5712, 257)
