In [1]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from feature_importance import shap_feature_importance

In [2]:
X = pd.read_csv('../design/X_initial', header=None)
X.columns = ['feature_' + str(i) for i in range(X.shape[1])]
X = X.iloc[:, :-1]
X = X.iloc[:len(X.columns), :]

y = pd.read_csv('../design/Y_initial', header=None).iloc[:len(X.columns), :]
y.columns = ['target']

In [3]:
X.shape, y.shape

((317, 317), (317, 1))

In [4]:
from domain import read_configs, controllable_params, transform_domain
from globals import set_model_type

set_model_type('BO')

read_configs([
        "../networks/configtx/configtx.yaml",
        "../networks/compose/docker/peercfg-org1/core.yaml",
        "../networks/compose/docker/peercfg-org2/core.yaml",
        "../networks/compose/docker/peercfg-org3/core.yaml",
        "../networks/compose/docker/peercfg-org4/core.yaml",
        # "../networks/compose/docker/ordcfg/orderer.yaml",
])

domain = controllable_params
print(domain[:3])
len(domain)

Setting model type to: BO
[{'name': 'Orderer.BatchSize.AbsoluteMaxBytes|MB', 'type': 'discrete', 'bounds': (63, 122), 'config idx': 0, 'default value': 99}, {'name': 'Orderer.BatchSize.MaxMessageCount', 'type': 'discrete', 'bounds': (7, 18), 'config idx': 0, 'default value': 10}, {'name': 'Orderer.BatchSize.PreferredMaxBytes|KB', 'type': 'discrete', 'bounds': (357, 668), 'config idx': 0, 'default value': 512}]


317

In [5]:
input_transformer = lambda x: x
output_transformer = lambda x: x

def set_input_transformer(input_transformer_arg):
    global input_transformer
    input_transformer = input_transformer_arg

def set_output_transformer(output_transformer_arg):
    global output_transformer
    output_transformer = output_transformer_arg

objective_call_counter = 0

#TODO: account for integer varibles in initial domain. Round all values to the nearest integer
def objective(x):
    # print("Partial X:", x[:5])
    global objective_call_counter
    objective_call_counter += 1
    print("Objective called")
    # print("X:", x.shape)
    x = input_transformer(x)
    # print("Transformed X:", x.shape)
    # benchmark
    return_value = 1.1
    # print("Return value:", return_value)
    return_value = output_transformer(return_value)
    # print("Transformed return value:", return_value)
    return return_value

In [6]:
import numpy as np
from pySOT.surrogate import GPRegressor
from pySOT.strategy import DYCORSStrategy
from poap.controller import SerialController
from pySOT.optimization_problems import OptimizationProblem
from sklearn.gaussian_process import GaussianProcessRegressor
from pySOT.experimental_design.experimental_design import ExperimentalDesign

def init_DYCORS(domain, n_trials, X, y, dim, is_rembo = False):

    if is_rembo:
        # transform domain to be lower-dimsional. All variables are continuous and with no bounds
        domain = [{'type': 'continuous', 'bounds': [-1e4, 1e4]} for _ in range(dim)]
        # X and y are already transformed

        # check if row-vectors from X are in bounds, if not - clip
        for i in range(X.shape[0]):
            for j in range(X.shape[1]):
                if X[i, j] < domain[j]['bounds'][0]:
                    X[i, j] = domain[j]['bounds'][0]
                elif X[i, j] > domain[j]['bounds'][1]:
                    X[i, j] = domain[j]['bounds'][1]

    class CustomProblem(OptimizationProblem):
        def __init__(self, dim, lb, ub):
            self.dim = dim
            self.lb = lb
            self.ub = ub
            self.cont_var = np.array([i for i, item in enumerate(domain) if item['type'] == 'continuous'])
            self.int_var = np.array([i for i, item in enumerate(domain) if item['type'] == 'discrete'])
            self.info = "Hyperledger Fabric Tuning"

        def eval(self, x):
            return objective(x)
            
    class PrecomputedDesign(ExperimentalDesign):
        def __init__(self, dim, X):
            self.dim = dim
            self.num_pts = X.shape[0]
            self.X = X


        def generate_points(self, lb, ub, int_var=None):
            # print("X shape:", self.X.shape)
            # print("X: ", self.X[:2, :])
            return self.X
            

    lb = np.array([item['bounds'][0] for item in domain])
    ub = np.array([item['bounds'][1] for item in domain])

    # print("Bound shapes: ", lb.shape, ub.shape)
    regressor = GaussianProcessRegressor(random_state=13)
    regressor.fit(X, y)

    regressor_dycors = GPRegressor(dim=dim, lb=lb, ub=ub, gp=regressor)
    problem = CustomProblem(dim=dim, lb=lb, ub=ub)
    design = PrecomputedDesign(dim=dim, X=X)

    strat = DYCORSStrategy(max_evals=design.num_pts + n_trials,
                       surrogate=regressor_dycors,
                       opt_prob=problem,
                       exp_design=design,
                       asynchronous=False,
                       batch_size=1,
                       num_cand=1,
                       )
    opt = SerialController(objective=problem.eval)
    opt.strategy = strat

    return opt

In [7]:
import GPyOpt

def init_BO(domain, X, y, acquisition_type, dim, is_rembo=False): # EI or LCB or MPI

    if is_rembo:
        # transform domain to be lower-dimsional. All variables are continuous and with no bounds
        domain = [{'type': 'continuous', 'bounds': [-1e4, 1e4]} for _ in range(dim)]
        # X and y are already transformed

        # check if row-vectors from X are in bounds, if not - clip
        for i in range(X.shape[0]):
            for j in range(X.shape[1]):
                if X[i, j] < domain[j]['bounds'][0]:
                    X[i, j] = domain[j]['bounds'][0]
                elif X[i, j] > domain[j]['bounds'][1]:
                    X[i, j] = domain[j]['bounds'][1]

    opt = GPyOpt.methods.BayesianOptimization(
            f=objective,
            domain=transform_domain(domain),
            acquisition_type=acquisition_type,
            initial_design_type='random', # placeholder
            initial_design_numdata=1,
            maximize=True,
            normalize_Y=True,
            num_cores=6,
            X=X,
            Y=y
    )
    return opt

In [8]:
from typing import List, Tuple
from feature_importance import pca_feature_importance, sa_feature_importance

In [9]:
# def transform_domain(controllable_params):
from typing import Callable

#TODO: random
class BayesianOptimizer:
    def __init__(self,
                X: pd.DataFrame,
                y: pd.DataFrame,
                domain: list,
                AF: str,
                DR: str,
                n_iter: int,
                n_relevant_features: int = 15,
                ):
        
        self.__check_params(AF, DR)
        
        self.X = X
        self.y = y
        self.AF = AF
        self.DR = DR
        self.domain = domain
        self.n_relevant_features = n_relevant_features
        self.model = None
        self.n_iter = n_iter

        self.__init_model()

    def __select_important_features(self):
        if self.DR == 'shap':
            return shap_feature_importance(self.X, self.y).head(self.n_relevant_features).index.to_list()
        elif self.DR == 'pca':
            return pca_feature_importance(self.X, self.y).head(self.n_relevant_features).index.to_list()
        elif self.DR == 'sa':
            return sa_feature_importance(self.X, self.y, [item['bounds'] for item in self.domain]).head(self.n_relevant_features).index.to_list()
        else:
            raise ValueError(f"Variable selection for {self.DR} is not supported")
        
    def __check_params(self, AF: str, DR: str):    
        if AF not in ['EI', 'UCB', 'MPI', 'DYCORS']:
            raise ValueError(f"Acquisition function {AF} not supported")
        if DR not in ['shap', 'pca', 'sa', 'rembo']:
            raise ValueError(f"Dimension reduction technique {DR} not supported")


    def __init_model(self):
        self.model = None

        if self.DR in ['shap', 'pca', 'sa']:
            indices = self.__select_important_features()
            self.domain = [self.domain[i] for i in indices]
            self.X = self.X.iloc[:, indices]
            # no need to cut y
        
        elif self.DR == 'rembo':
            self.projection = np.random.normal(size=(len(self.domain), self.n_relevant_features))
            pinv = np.linalg.pinv(self.projection)
            # print(self.X.shape)
            self.X = self.X.to_numpy() @ pinv.T
            # print(self.X.shape)

            lb = np.array([item['bounds'][0] for item in self.domain])
            ub = np.array([item['bounds'][1] for item in self.domain])
            set_input_transformer(lambda x: np.clip(x @ self.projection.copy().T, lb.copy(), ub.copy())) # x.T ?
        
        if self.AF == 'UCB':
            set_output_transformer(lambda x: -x)
            self.AF = 'LCB'

        is_rembo = False
        if self.DR == 'rembo':
            is_rembo = True
        else:
            self.X = self.X.to_numpy()
            self.y = self.y.to_numpy()

        if self.AF == 'DYCORS':
            # init DYCORS
            self.model = init_DYCORS(self.domain, n_trials=self.n_iter, X=self.X, y=self.y, dim=self.n_relevant_features, is_rembo=is_rembo)
        else:
            # init GPyOpt BO
            self.model = init_BO(self.domain, self.X, self.y, acquisition_type=self.AF, dim=self.n_relevant_features, is_rembo=is_rembo)

    def run(self):
        if self.AF == 'DYCORS':
            self.model.run()
        else:
            self.model.run_optimization(max_iter=self.n_iter)
        pass
    

In [10]:
from copy import deepcopy
for AF in ['EI', 'UCB', 'MPI', 'DYCORS']:
    for DR in ['shap', 'pca', 'sa', 'rembo']:
        print(f"Running {AF} with {DR}")
        bo = BayesianOptimizer(deepcopy(X), deepcopy(y), deepcopy(domain), AF, DR, n_iter=3)
        bo.run()
        print("Objective call counter:", objective_call_counter)
        objective_call_counter = 0

Running EI with shap
Objective called
Objective called
Objective called
Objective call counter: 3
Running EI with pca
Objective called
Objective called
Objective called
Objective call counter: 3
Running EI with sa
Objective called
Objective called
Objective called
Objective call counter: 3
Running EI with rembo
Objective called
Objective called
Objective called
Objective call counter: 3
Running UCB with shap
Objective called
Objective called
Objective called
Objective call counter: 3
Running UCB with pca
Objective called
Objective called
Objective called
Objective call counter: 3
Running UCB with sa
Objective called
Objective called
Objective called
Objective call counter: 3
Running UCB with rembo
Objective called
Objective called
Objective called
Objective call counter: 3
Running MPI with shap
Objective called
Objective called
Objective called
Objective call counter: 3
Running MPI with pca
Objective called
Objective called
Objective called
Objective call counter: 3
Running MPI with sa