In [15]:
%matplotlib inline
import os
import sys
sys.path.append("..")
import time
import numpy as np
import h5py

from joblib import Parallel, delayed
from skimage.io import imread

from canon.pattern import LatentExtractor
from canon.util import split_workload

In [27]:
def process_img(img_files, model_name):
    extractor = LatentExtractor(model_name)
    img_data = np.array([imread(f) for f in img_files])
    img_idx = np.array([[int(f[-9:-4])] for f in img_files])
    codes = extractor.features(img_data)
    return np.hstack([img_idx, codes])

def extract_features(jpg_dir, model_name):
    dir_path = os.path.join("img", jpg_dir)
    filenames = [os.path.join(dir_path, filename) for filename in os.listdir(dir_path)
                 if (not filename[0] == '.') and filename[-4:] == ".jpg"]
    print('Found %d files in the directory %s.' % (len(filenames), dir_path))

    fn_grps = split_workload(filenames, 32)

    with Parallel(n_jobs=-1, verbose=1) as parallel:
        data = parallel(delayed(process_img)(grp, model_name) for grp in fn_grps)
        data = np.vstack(data)
        data = data[data[:,0].argsort()]
        print("Loaded a data of shape {}".format(data.shape))
    
    return data

In [33]:
jpg_dir = "au29_area2_50_150"
models = ["ae_conv_4_128", "ae_conv_4_256"]
features = []
for model in models:
    print("Extracting features from {} using {} ...".format(jpg_dir, model))
    data = extract_features(jpg_dir, model)
    features.append(data)

hdf5_file = jpg_dir +'.hdf5'
print("Save {} datasets to {}".format(len(features), hdf5_file))
with h5py.File(os.path.join("features", hdf5_file), 'w') as h5f:
    for m, d in zip(models, features):
        h5f.create_dataset(m, data=d)
with h5py.File(os.path.join("features", hdf5_file), 'r') as h5f:
    print("Final datasets in {}: {}".format(hdf5_file, list(h5f.keys())))

Extracting features from au29_area2_50_150 using ae_conv_4_128 ...
Found 7493 files in the directory img\au29_area2_50_150.


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  32 out of  32 | elapsed:   54.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.


Loaded a data of shape (7493, 129)
Extracting features from au29_area2_50_150 using ae_conv_4_256 ...
Found 7493 files in the directory img\au29_area2_50_150.
Loaded a data of shape (7493, 257)
Save 2 datasets to au29_area2_50_150.hdf5
Final datasets in au29_area2_50_150.hdf5: ['ae_conv_4_128', 'ae_conv_4_256']


[Parallel(n_jobs=-1)]: Done  32 out of  32 | elapsed:   46.0s finished
