In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys
sys.path.append('../../src/')
import stochastic_benchmark as SB

from wishart_ws import postprocess_linear, postprocess_random

# from collections import defaultdict
# import dill
# import seaborn as sns
# import matplotlib.pyplot as plt
# import matplotlib.patches as mpatches
import numpy as np
import os
# import pandas as pd
# import glob
# import seaborn as sns
# import seaborn.objects as so

import bootstrap
# import df_utils
import interpolate
import random_exploration
import sequential_exploration
import names
import stats
import success_metrics
# import training
from utils_ws import *


In [None]:
# Set up basic information 
alpha = '0.5'
# path to working directory
here = os.getcwd()
parameter_names = ['sweeps', 'replicas', 'pcold', 'phot']
instance_cols = ['instance'] #indicates how instances should be grouped, default is ['instance']

## Response information 
response_key = 'PerfRatio' # Column with the response, default is 'PerfRatio'
response_dir = 1 # whether we want to maximize (1) or minimize (-1), default is 1

## Optimizations informations
recover = True #Whether we want to read dataframes when available, default is True
reduce_mem = True #Whether we want to segment bootstrapping and interpolation to reduce memory usage, default is True
smooth = True #Whether virtual best should be monontonized, default is True

sb = SB.stochastic_benchmark(parameter_names, here, instance_cols, response_key, response_dir, recover, reduce_mem, smooth)

In [None]:
# Set up bootstrapped parameters
shared_args = {'response_col':'Energy',\
                  'resource_col':'MeanTime',\
                  'response_dir':-1,\
                  'confidence_level':68,\
                  'random_value':0.}
metric_args = {}
metric_args['Response'] = {'opt_sense':-1}
metric_args['SuccessProb'] = {'gap':1.0, 'response_dir':-1}
metric_args['RTT'] = {'fail_value': np.nan, 'RTT_factor':1.,\
                        'gap':1.0, 's':0.99}

def update_rules(self, df):  #These update the bootstrap parameters for each group 
    GTMinEnergy = df['GTMinEnergy'].iloc[0] 
    self.shared_args['best_value'] = GTMinEnergy #update best value for each instance
    self.metric_args['RTT']['RTT_factor'] = df['MeanTime'].iloc[0]

agg = 'count' #aggregated column
#success metric we want to calculate
sms = [success_metrics.Response,
        success_metrics.PerfRatio,
        success_metrics.InvPerfRatio,
        success_metrics.SuccessProb,
        success_metrics.Resource,
        success_metrics.RTT]
boots_range = range(50, 1001, 50) 
ssOrderCols = ['warmstart={}_hpo_order={}'.format(h, hpo_trial) for h in [0, 1] for hpo_trial in range(10)]
bsParams = bootstrap.BootstrapParameters(shared_args=shared_args,
                                            update_rule=update_rules,
                                            agg=agg,
                                            metric_args=metric_args,
                                            success_metrics=sms,
                                            keep_cols=ssOrderCols)
bs_iter_class = bootstrap.BSParams_range_iter()
bsparams_iter = bs_iter_class(bsParams, boots_range)

#How names should be parsed from raw filesnames
def group_name_fcn(raw_filename):
    raw_filename = os.path.basename(raw_filename)
    start_idx = raw_filename.index('inst')
    end_idx = raw_filename.index('.')
    return raw_filename[start_idx: end_idx]

# Run bootstrap
sb.run_Bootstrap(bsparams_iter, group_name_fcn)



In [None]:
# Interpolate 
def resource_fcn(df):
    return df['sweeps'] * df['replicas'] * df['boots']
iParams = interpolate.InterpolationParameters(resource_fcn,
                                                    parameters=parameter_names,
                                                    ignore_cols = ssOrderCols)

sb.run_Interpolate(iParams)

In [None]:
# Set up Stats computations
train_test_split = 0.8
metrics = ['Response', 'RTT', 'PerfRatio', 'SuccProb', 'MeanTime', 'InvPerfRatio']
stParams = stats.StatsParameters(metrics=metrics, stats_measures=[stats.Median()])

sb.run_Stats(stParams, train_test_split)

In [None]:
# If you do not want to run step by step, initialize parameters as above and run initAll
#sb.initAll(bsparams_iter, iParams, stParams, resource_fcn, train_test_split, group_name_fcn)

In [None]:
# Run virtual best baseline
sb.run_baseline()
sb.run_ProjectionExperiment('TrainingStats', lambda x : postprocess_linear(x), 'linear')
sb.run_ProjectionExperiment('TrainingResults', lambda x : postprocess_linear(x), 'linear')

In [None]:
#Set up Random search parameters and sequential search paramters

# Make sure search budgets align with the baselines - needed for the distance
recipes,_ = sb.baseline.evaluate()
recipes.reset_index(inplace=True)
resource_values = list(recipes['resource'])
budgets = [i*10**j for i in [1, 1.5, 2, 3, 5, 7]
            for j in [3, 4, 5]] + [1e6]
budgets = np.unique([take_closest(resource_values, b) for b in budgets])

restrict='grid_search'

# which columns determin the order in sequential search experiments
ssOrderCols0 = ['warmstart=0_hpo_order={}'.format(hpo_trial) for hpo_trial in range(10)] 
ssOrderCols1 = ['warmstart=1_hpo_order={}'.format(hpo_trial) for hpo_trial in range(10)] 


# Which column you are optimizing. Different from initialization b/c aggregated metrics include the name
key = names.param2filename({'Key': 'PerfRatio', 'Metric':'median'}, '')

rsParams = random_exploration.RandomSearchParameters(
    budgets=budgets,
    parameter_names=parameter_names,
    key=key)
    
ssParams0 = sequential_exploration.SequentialSearchParameters(
    budgets=budgets,
    order_cols=ssOrderCols0,
    parameter_names=parameter_names,
    key='Key=PerfRatio')

ssParams1 = sequential_exploration.SequentialSearchParameters(
    budgets=budgets,
    order_cols=ssOrderCols1,
    parameter_names=parameter_names,
    key='Key=PerfRatio')

sb.run_RandomSearchExperiment(rsParams, postprocess=postprocess_random, postprocess_name='custom')
sb.run_SequentialSearchExperiment(ssParams0, id_name='cold', postprocess=postprocess_random, postprocess_name='custom')
sb.run_SequentialSearchExperiment(ssParams1, id_name='warm', postprocess=postprocess_random, postprocess_name='custom')

In [None]:
sb.initPlotting()
sb.plots.set_xlims((10**3,  10**6))

In [None]:
fig, axs = sb.plots.plot_performance()
# axs.set_ylim(0.93, 1.001)
# keep pink static, cold start
# fig.savefig('Performance.pdf')

In [None]:
fig, axs = sb.plots.plot_parameters_distance()
axs.set_yscale('log')
# p.savefig('Parameters_distance.pdf')

In [None]:
figs, axes = sb.plots.plot_meta_parameters()
for experiment,v in figs.items():
    for param, fig in v.items():
        # if param == 'tau':
        #     ax = p.axes[0]
            # ax.set_yscale('log')
        #Tau on log scale, frac on linear, definitions for axis
        fig.savefig('{}_metaparameters={}.pdf'.format(experiment, param))
    

In [None]:
figs, axes = sb.plots.plot_parameters_separate()
for param, axs in axes.items():
    if param == 'sweeps':
        axs.set_ylim(1, 50)
    # fig.savefig('Recommended_parameter={}_scale={}.pdf'.format(param, sb.plots.xscale))

In [None]:
fig, axes = sb.plots.plot_parameters_together()
for param, axs in axes.items():
    if param == 'sweeps':
        axs.set_ylim(1, 50)
# fig.savefig('Recommended_allparams_scale={}.pdf'.format(sb.plots.xscale))