In [None]:
import numpy as np
from pathlib import Path
import pymc as pm
import arviz as az
import multiprocessing as mp
from estival.wrappers import pymc as epm
from estival.sampling import tools as esamp
from estival.wrappers import nevergrad as eng
from estival.utils.parallel import map_parallel
import nevergrad as ng

from tbdynamics.constants import BURN_IN, OPTI_DRAWS
from autumn.infrastructure.remote import springboard
from tbdynamics.model import build_model
from tbdynamics.calib_utils import get_bcm

In [None]:
def calibrate(out_path, draws, tune):
    bcm = get_bcm()
    lhs_samples = bcm.sample.lhs(16)
    lhs_lle = esamp.likelihood_extras_for_samples(lhs_samples, bcm)
    lhs_sorted = lhs_lle.sort_values("loglikelihood", ascending=False)
    start_lhs = lhs_samples[lhs_sorted.index].iloc[0:8]

   
    def optimize_ng(idx_sample):
        idx, sample = idx_sample
        opt = eng.optimize_model(bcm, budget=OPTI_DRAWS, opt_class=ng.optimizers.TwoPointsDE, obj_function=bcm.logposterior, suggested=sample, num_workers=4)
        rec = opt.minimize(OPTI_DRAWS)
        return idx, rec.value[1]

    opt_samples = map_parallel(optimize_ng, start_lhs.iterrows(), n_workers=2, mode='process')
    opt_samples = bcm.sample.convert(opt_samples)
    init_samps = opt_samples.iloc[0:8].convert("list_of_dicts")
    n_chains = 8
    n_samples = 100
    with pm.Model() as pm_model:
        variables = epm.use_model(bcm)
        idata_raw = pm.sample(step=[pm.DEMetropolisZ(variables)], draws=draws, tune=tune, cores=8, discard_tuned_samples=False, chains=n_chains, progressbar=False, initvals=init_samps)
    idata_raw.to_netcdf(str(out_path / 'calib_full_out.nc'))
    
    burnt_idata = idata_raw.sel(draw=np.s_[BURN_IN:])
    idata_extract = az.extract(burnt_idata, num_samples=n_samples)
    bcm.sample.convert(idata_extract).to_hdf5(out_path / 'calib_extract_out.h5')
    
    spaghetti_res = esamp.model_results_for_samples(idata_extract, bcm)
    spaghetti_res.results.to_hdf(str(out_path / 'results.hdf'), 'spaghetti')

    like_df = esamp.likelihood_extras_for_idata(idata_raw, bcm)
    like_df.to_hdf(str(out_path / 'results.hdf'), 'likelihood')

def run_calibration(bridge: springboard.task.TaskBridge, draws, tune):
    import multiprocessing as mp
    mp.set_start_method('forkserver')
    idata_raw = calibrate(bridge.out_path, draws, tune)
    bridge.logger.info('Calibration complete')


In [None]:
# OUT_PATH = Path.cwd() /'runs'
# draws = 100
# tune = 100

In [None]:
# calibrate(OUT_PATH, draws, tune)

In [None]:
draws = 50000
tune = 10000

commands = [
    'git clone https://github.com/longbui/tbdynamics.git',
    'pip install -r requirements.txt'
    'pip install -e ./tbdynamics',
]

mspec = springboard.EC2MachineSpec(8, 2, 'compute')
run_str = f'd{int(draws / 1000)}k-t{int(tune / 1000)}k-b{int(BURN_IN / 1000)}k'
tspec_args = {'draws': draws, 'tune': tune}
tspec = springboard.TaskSpec(run_calibration, tspec_args)
run_path = springboard.launch.get_autumn_project_run_path('tbdynamics', 'first_calibration', run_str)
runner = springboard.launch.launch_synced_autumn_task(tspec, mspec, run_path, branch='tb-calib', extra_commands=commands)


In [None]:

print(runner.tail())

In [None]:
runner.wait()

In [None]:
print(runner.get_iodump())