Skip to content

Commit

Permalink
Added mnist configurator script. Put all tests and examples in separa…
Browse files Browse the repository at this point in the history
…te folders.

Added a resume_file parameter to the BayesOpt to resume optimization of a previous canceled process.
  • Loading branch information
nikivanstein committed Feb 6, 2018
1 parent b4141a5 commit 335be09
Show file tree
Hide file tree
Showing 18 changed files with 867 additions and 182 deletions.
2 changes: 2 additions & 0 deletions .gitignore
@@ -0,0 +1,2 @@
BayesOpt/optimizer/__pycache__
*.pyc
122 changes: 74 additions & 48 deletions BayesOpt/BayesOpt.py 100644 → 100755
Expand Up @@ -9,20 +9,21 @@
from __future__ import print_function

import pdb
import warnings, dill, functools, copy_reg, itertools
import warnings, dill, functools, itertools
from joblib import Parallel, delayed
import tensorflow as tf
import copyreg as copy_reg

import pandas as pd
import numpy as np

from scipy.optimize import fmin_l_bfgs_b
from sklearn.metrics import r2_score
import pickle

from .InfillCriteria import EI, MGFI
from .optimizer import mies
from .utils import proportional_selection

import os
# TODO: remove the usage of pandas here change it to customized np.ndarray
# TODO: adding logging system

Expand All @@ -31,10 +32,10 @@ class BayesOpt(object):
Generic Bayesian optimization algorithm
"""
def __init__(self, search_space, obj_func, surrogate,
eval_budget=None, max_iter=None, n_init_sample=None,
n_point=1, n_jobs=1, minimize=True, noisy=False, wait_iter=3,
n_restart=None, optimizer='MIES',
verbose=False, random_seed=None, debug=False):
minimize=True, noisy=False, eval_budget=None, max_iter=None,
n_init_sample=None, n_point=1, n_jobs=1,
n_restart=None, optimizer='MIES', wait_iter=3,
verbose=False, random_seed=None, debug=False, resume_file = ""):

self.debug = debug
self.verbose = verbose
Expand All @@ -43,14 +44,21 @@ def __init__(self, search_space, obj_func, surrogate,
self.obj_func = obj_func
self.noisy = noisy
self.surrogate = surrogate

self.resume_file = resume_file
if (os.path.isfile(resume_file)):
self.data = self._load_object(resume_file) #resume from previous model
print("Resuming from previously saved surrogate")
self.fit_and_assess()

self.n_point = n_point
self.n_jobs = min(self.n_point, n_jobs)

self.minimize = minimize
self.dim = len(self._space)

# column names for each variable type
self.con_ = self._space.var_name[self._space.id_C].tolist() # continuous
self.con_ = self._space.var_name[self._space.id_C].tolist() # continuous
self.cat_ = self._space.var_name[self._space.id_N].tolist() # categorical
self.int_ = self._space.var_name[self._space.id_O].tolist() # integer

Expand All @@ -71,8 +79,9 @@ def __init__(self, search_space, obj_func, surrogate,

# paramter: acquisition function optimziation
mask = np.nonzero(self._space.C_mask | self._space.O_mask)[0]
self._bounds = np.array([self._space.bounds[i] for i in mask])
self._levels = list(self._space.levels.values())
self._bounds = np.array([self._space.bounds[i] for i in mask]) # bounds for continuous and integer variable
# self._levels = list(self._space.levels.values())
self._levels = np.array([self._space.bounds[i] for i in self._space.id_N]) # levels for discrete variable
self._optimizer = optimizer
self._max_eval = int(5e2 * self.dim)
self._random_start = int(10 * self.dim) if n_restart is None else n_restart
Expand All @@ -94,6 +103,16 @@ def __init__(self, search_space, obj_func, surrogate,

copy_reg.pickle(self._eval, dill.pickles) # for pickling


def _save_object(self, obj, filename):
with open(filename, 'wb') as output: # Overwrites any existing file.
pickle.dump(obj, output, pickle.HIGHEST_PROTOCOL)

def _load_object(self, filename):
with open(filename, 'rb') as inputfile:
obj = pickle.load(inputfile)
return obj

def _get_var(self, data):
"""
get variables from the dataframe
Expand Down Expand Up @@ -138,15 +157,14 @@ def _remove_duplicate(self, confs):
idx.append(i)
return confs.loc[idx]

def _eval(self, x, runs=1):
# with tf.device_scope('/gpu:{}'.format(gpu_no)):
# TODO: parallel execution
def _eval(self, x, gpu_no=0, runs=1):

perf_, n_eval = x.perf, x.n_eval
# TODO: handle the evaluation in a better way
try: # for dictionary input
__ = [self.obj_func(x[self.var_names].to_dict()) for i in range(runs)]
except: # for list input
__ = [self.obj_func(self._get_var(x)) for i in range(runs)]
# TODO: handle the input type in a better way
#try: # for dictionary input
__ = [self.obj_func(x[self.var_names].to_dict(), gpu_no=gpu_no) for i in range(runs)]
#except: # for list input
# __ = [self.obj_func(self._get_var(x)) for i in range(runs)]
perf = np.sum(__)

x.perf = perf / runs if not perf_ else np.mean((perf_ * n_eval + perf))
Expand All @@ -165,18 +183,20 @@ def evaluate(self, data, runs=1):
self._eval(data)

elif isinstance(data, pd.DataFrame):
# parallel execution using joblib
if self.n_jobs > 1:
gpu_no = range(self.n_jobs)

res = Parallel(n_jobs=self.n_jobs, verbose=False)(
delayed(self._eval, check_pickle=False)(row) for k, row in data.iterrows())
delayed(self._eval, check_pickle=False)(row, gpu_no[k % len(gpu_no)]) \
for k, row in data.iterrows())

x, runs, hist, hist_id = zip(*res)
x, runs, hist, hist_id = list(zip(*res))
self.eval_count += sum(runs)
self.eval_hist += list(itertools.chain(*hist))
self.eval_hist_id += list(itertools.chain(*hist_id))
for i, k in enumerate(data.index):
data.loc[k] = x[i]

else:
for k, row in data.iterrows():
self._eval(row)
Expand All @@ -196,7 +216,7 @@ def fit_and_assess(self):

self.is_update = True
perf_hat = self.surrogate.predict(X)
self.r2 = r2_score(perf, perf_hat)
self.r2 = r2_score(perf_, perf_hat)

# TODO: in case r2 is really poor, re-fit the model or transform the input?
if self.verbose:
Expand Down Expand Up @@ -236,6 +256,10 @@ def select_candidate(self):
self.evaluate(confs_, runs=self.init_n_eval)
self.data = self.data.append(confs_)
self.data.perf = pd.to_numeric(self.data.perf)
#save self.data if resume_file is set
if (self.resume_file != ""):
self._save_object(self.data, self.resume_file)
#
return candidates_id

def intensify(self, candidates_ids):
Expand Down Expand Up @@ -344,7 +368,9 @@ def check_stop(self):

def _acquisition(self, plugin=None, dx=False):
if plugin is None:
plugin = np.min(self.data.perf) if self.minimize else -np.max(self.data.perf)
# plugin = np.min(self.data.perf) if self.minimize else -np.max(self.data.perf)
# Note that performance are normalized when building the surrogate
plugin = 0 if self.minimize else -1

if self.n_point > 1: # multi-point method
# create a portofolio of n infill-criteria by
Expand All @@ -356,7 +382,6 @@ def _acquisition(self, plugin=None, dx=False):
acquisition_func = MGFI(self.surrogate, plugin, minimize=self.minimize, t=t)
elif self.n_point == 1:
acquisition_func = EI(self.surrogate, plugin, minimize=self.minimize)

return functools.partial(acquisition_func, dx=dx)

def arg_max_acquisition(self, plugin=None):
Expand All @@ -370,20 +395,21 @@ def arg_max_acquisition(self, plugin=None):
obj_func = [self._acquisition(plugin, dx=dx) for i in range(self.n_point)]

if self.n_point == 1:
candidates, values = self._multistart(obj_func[0])
candidates, values = self._argmax_multistart(obj_func[0])
else:
# parallel optimization for n points approach
# parallelization using joblib
res = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(
delayed(self._multistart, check_pickle=False)(func) for func in obj_func)
candidates, values = zip(*res)

delayed(self._argmax_multistart, check_pickle=False)(func) for func in obj_func)
candidates, values = list(zip(*res))
return candidates, values

def _multistart(self, obj_func):
optima, foptima = [], []
def _argmax_multistart(self, obj_func):
# keep the list of optima in each restart for future usage
xopt, fopt = [], []
eval_budget = self._max_eval
fopt = np.inf
best = -np.inf
wait_count = 0

for iteration in range(self._random_start):
x0 = self._space.sampling(1)[0]

Expand All @@ -392,25 +418,26 @@ def _multistart(self, obj_func):
if self._optimizer == 'BFGS':
if self.N_d + self.N_i != 0:
raise ValueError('BFGS is not supported with mixed variable types.')

xopt_, fopt_, stop_dict = fmin_l_bfgs_b(obj_func, x0, pgtol=1e-8,
# TODO: somehow this local lambda function can be pickled...
# for minimization
func = lambda x: tuple(map(lambda x: -1. * x, obj_func(x)))
xopt_, fopt_, stop_dict = fmin_l_bfgs_b(func, x0, pgtol=1e-8,
factr=1e6, bounds=self._bounds,
maxfun=eval_budget)
xopt_ = xopt_.flatten().tolist()
fopt_ = np.sum(fopt_)
fopt_ = -np.sum(fopt_)

if stop_dict["warnflag"] != 0 and self.verbose:
warnings.warn("L-BFGS-B terminated abnormally with the "
" state: %s" % stop_dict)
" state: %s" % stop_dict)

elif self._optimizer == 'MIES':
opt = mies(x0, obj_func, self._bounds.T, self._levels,
self.param_type, eval_budget, minimize=False,
verbose=False)
opt = mies(x0, obj_func, self._bounds.T, self._levels, self.param_type,
eval_budget, minimize=False, verbose=False)
xopt_, fopt_, stop_dict = opt.optimize()

if fopt_ < fopt:
fopt = fopt_
if fopt_ > best:
best = fopt_
wait_count = 0
if self.verbose:
print('[DEBUG] restart : {} - funcalls : {} - Fopt : {}'.format(iteration + 1,
Expand All @@ -419,15 +446,14 @@ def _multistart(self, obj_func):
wait_count += 1

eval_budget -= stop_dict['funcalls']
optima.append(xopt_)
foptima.append(-fopt_)
xopt.append(xopt_)
fopt.append(fopt_)

if eval_budget <= 0 or wait_count >= self._wait_iter:
break

# sort the optima in descending order
idx = np.argsort(foptima)[::-1]
return optima[idx[0]], foptima[idx[0]]
# maximization: sort the optima in descending order
idx = np.argsort(fopt)[::-1]
return xopt[idx[0]], fopt[idx[0]]

def _check_params(self):
assert hasattr(self.obj_func, '__call__')
Expand Down

0 comments on commit 335be09

Please sign in to comment.