In [None]:
import os
import numpy as np
import pickle

from sklearn.preprocessing import StandardScaler

import utils
from modules import RotationLSH, IdentityLSH, PcaLSH, RandomForestClustRegressor

In [None]:
# CONFIG

data_dir = '../data/Set14'
train_files = ['barbara.png', 'bridge.png', 'comic.png', 'pepper.png', 'man.png']

n_samples=10000
s=1/2
n_estimators=10
max_depth=15
max_features=20

augmented = True        # whether to use gradient magnitudes or not
lsh_type = 'rotation'   # ['rotation', 'pca', 'identity']
n_components = 50       # makes sense only for PcaLSH
binary = False          # makes sense only for RotationLSH
reg_type = 'lasso'      # ['ridge', 'lasso']

min_samples_leaf = 64
alpha = 4               # regularization parameter


config = dict(
    data_dir=data_dir,
    train_files=train_files,
    n_samples=n_samples,
    s=s,
    n_estimators=n_estimators,
    max_depth=max_depth,
    max_features=max_features,
    augmented=augmented,
    lsh_type=lsh_type,
    n_components=n_components,
    binary=binary,
    reg_type=reg_type,
    min_samples_leaf=min_samples_leaf,
    alpha=alpha
)

In [None]:
def generate_trainset(n_samples, s, seed=7):
    # patch_size is (9,9)
    
    np.random.seed(seed)
    X = []
    Y = []
    for file in train_files:
        path = os.path.join(data_dir, file)
        img_h = utils.load_image(path)
        img_h = utils.crop_image(img_h, s)
        new_shape = img_h.shape
        
        img_l = utils.resize(utils.resize(img_h, s), 1/s) # downsample -> upsample

        features = utils.extract_features(img_l, augmented=augmented)
        img_out_delta = img_h - img_l
        assert img_h.shape == img_l.shape == img_out_delta.shape == new_shape
        
        patches_per_picture = n_samples//len(train_files)
        
        x_ind = np.random.randint(0, new_shape[0]-9, size=patches_per_picture)
        y_ind = np.random.randint(0, new_shape[1]-9, size=patches_per_picture)
        for x, y in zip(x_ind, y_ind):
            feat_patch = features[x:x+9, y:y+9]
            delta_patch = img_out_delta[x:x+9, y:y+9]
            X.append(feat_patch.flatten())
            Y.append(delta_patch.flatten())

    X = np.array(X)
    Y = np.array(Y)
    return X, Y

In [None]:
if lsh_type == 'identity':
    lsh = IdentityLSH()
elif lsh_type == 'rotation':
    lsh = RotationLSH(binary=binary)
elif lsh_type == 'pca':
    lsh = PcaLSH(n_components=n_components)

In [None]:
X, Y = generate_trainset(
    n_samples=n_samples,
    s=s,
    seed=7
)
print('X.shape:\t {}'.format(X.shape))
print('Y.shape:\t {}'.format(Y.shape))

scaler = StandardScaler()
X = scaler.fit_transform(X)

lsh.fit(X)
X_comp = lsh.transform(X)
print('X_comp.shape:\t {}'.format(X_comp.shape))

In [None]:
rf = RandomForestClustRegressor(
    n_estimators=n_estimators,
    max_depth=max_depth,
    n_jobs=None,
    max_features=max_features,
    min_samples_leaf=min_samples_leaf,
    reg_type=reg_type,
    alpha=alpha
)
rf.fit(X, X_comp, Y)

In [None]:
save_name = './trained_models/example_model.pkl'

model = {
    'scaler': scaler,
    'lsh': lsh,
    'rf': rf,
    'config': config
}
with open(save_name, 'wb') as output:
    pickle.dump(model, output, pickle.HIGHEST_PROTOCOL)