In [1]:
import warnings
warnings.filterwarnings("ignore")
import numpy as np
from pathlib import Path
import pymc as pm
import arviz as az
import multiprocessing as mp
from estival.wrappers import pymc as epm
from estival.sampling import tools as esamp
from estival.wrappers import nevergrad as eng
from estival.utils.parallel import map_parallel
import nevergrad as ng

from tbdynamics.constants import BURN_IN, OPTI_DRAWS
from autumn.infrastructure.remote import springboard
from tbdynamics.calib_utils import get_bcm
import pandas as pd
from estival.utils.sample import SampleTypes




In [2]:
params = {
    # "start_population_size": 2300000.0,
    # "contact_rate": 0.006962237803672445,
    # "seed_time": 1810.0,
    # "seed_num": 10.0,
    # "seed_duration": 3.0,
    # "rr_infection_latent": 0.25,
    # "rr_infection_recovered": 0.35,
    # "progression_multiplier": 1.8,
    # "screening_scaleup_shape": 0.1,
    # "screening_inflection_time": 1993.0,
    # "screening_end_asymp": 0.65,
    # "smear_positive_death_rate": 0.4290512333229253,
    # "smear_negative_death_rate": 0.028841335859094236,
    # "smear_positive_self_recovery": 0.26738283049585143,
    # "smear_negative_self_recovery": 0.185673229300567,
    #######################################################
    "start_population_size": 2400000,
    # "contact_rate": 0.00745895616585752,
    "rr_infection_latent": 0.25891122276497425,
    "rr_infection_recovered": 0.34957924926059264,
    "progression_multiplier": 1.6839943586933996,
    "seed_time": 1814.7241223970148,
    "seed_num": 36.509919333921204,
    "seed_duration": 16.644044537120465,
    "smear_positive_death_rate": 0.42934402690060847,
    "smear_negative_death_rate": 0.021003428835764996,
    "smear_positive_self_recovery": 0.25431711539772306,
    "smear_negative_self_recovery": 0.17302797078670518,
    "screening_scaleup_shape": 0.13113566147445102,
    "screening_inflection_time": 1993.4460082855571,
    "screening_end_asymp": 0.5758339575609973,
    "detection_reduction": 0.7562874736663389,
}
bcm = get_bcm(params)

In [3]:
def calibrate(out_path, bcm, draws, tune):
    def optimize_ng_with_idx(item):
        idx, sample = item
        opt = eng.optimize_model(bcm, budget=100, opt_class=ng.optimizers.TwoPointsDE, suggested = sample, num_workers=4)
        rec= opt.minimize(100)
        return idx, rec.value[1]

    lhs_samples = bcm.sample.lhs(16, ci=0.67)
    lhs_lle = esamp.likelihood_extras_for_samples(lhs_samples, bcm)
    lhs_sorted = lhs_lle.sort_values("loglikelihood", ascending=False)
    opt_samples_idx = map_parallel(optimize_ng_with_idx, lhs_sorted.iterrows())
    best_opt_samps = bcm.sample.convert(opt_samples_idx)
    init_samps = best_opt_samps.convert(SampleTypes.LIST_OF_DICTS)[0:4]
    n_chains = 4
    n_samples = 200
    with pm.Model() as pm_model:
        variables = epm.use_model(bcm)
        idata_raw = pm.sample(
            step=[pm.DEMetropolisZ(variables)],
            draws=draws,
            tune=tune,
            cores=4,
            discard_tuned_samples=False,
            chains=n_chains,
            progressbar=True,
            initvals=init_samps,
        )
    idata_raw.to_netcdf(str(out_path / "calib_full_out.nc"))

    burnt_idata = idata_raw.sel(draw=np.s_[BURN_IN:])
    idata_extract = az.extract(burnt_idata, num_samples=n_samples)
    bcm.sample.convert(idata_extract).to_hdf5(out_path / "calib_extract_out.h5")

    spaghetti_res = esamp.model_results_for_samples(idata_extract, bcm)
    spaghetti_res.results.to_hdf(str(out_path / "results.hdf"), "spaghetti")

    like_df = esamp.likelihood_extras_for_idata(idata_raw, bcm)
    like_df.to_hdf(str(out_path / "results.hdf"), "likelihood")


def run_calibration(bridge: springboard.task.TaskBridge, bcm, draws, tune):
    import multiprocessing as mp
    mp.set_start_method("forkserver")
    idata_raw = calibrate(bridge.out_path, bcm, draws, tune)
    bridge.logger.info("Calibration complete")

In [4]:
OUT_PATH = Path.cwd() / 'runs/r1004'
draws= 3000
tune = 1000
calibrate(OUT_PATH,bcm, draws, tune)

Multiprocess sampling (4 chains in 4 jobs)
DEMetropolisZ: [contact_rate]


Output()

Sampling 4 chains for 1_000 tune and 3_000 draw iterations (4_000 + 12_000 draws total) took 355 seconds.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details


In [5]:
# idata_raw

In [6]:
# draws = 100000
# tune = 20000

# commands = [
#     'git clone --branch tb-covid https://github.com/longbui/tbdynamics.git',
#     'pip install -e ./tbdynamics',
# ]

# mspec = springboard.EC2MachineSpec(6, 2, 'compute')
# run_str = f'd{int(draws / 1000)}k-t{int(tune / 1000)}k-b{int(BURN_IN / 1000)}k'
# tspec_args = {'bcm': bcm,'draws': draws, 'tune': tune}
# tspec = springboard.TaskSpec(run_calibration, tspec_args)
# run_path =  springboard.launch.get_autumn_project_run_path('tbdynamics', 's10043_calibration', run_str)
# runner = springboard.launch.launch_synced_autumn_task(tspec, mspec, run_path, branch=None, extra_commands=commands)


In [7]:
# print(runner.tail())

In [8]:
# from autumn.infrastructure.remote import springboard
# rts = springboard.task.RemoteTaskStore()
# rts.cd('projects/tbdynamics/s10043_calibration')
# rts.ls()

# mt.get_log("crash")

In [9]:
# mt = rts.get_managed_task('2024-04-11T2322-d100k-t20k-b20k')

In [10]:
# print(mt.get_log('crash'))

In [11]:
# print(runner.get_log("crash"))