# PEtab benchmark model with BayesFlow

In [None]:
# pip install git+https://github.com/Benchmarking-Initiative/Benchmark-Models-PEtab.git@master#subdirectory=src/python
# pypesto, amici, petab, fides, joblib

In [1]:
import os

if "KERAS_BACKEND" not in os.environ:
    os.environ["KERAS_BACKEND"] = "jax"
else:
    print(f"Using '{os.environ['KERAS_BACKEND']}' backend")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from copy import deepcopy
import pickle
from joblib import Parallel, delayed
from typing import Union
from collections import defaultdict

import benchmark_models_petab as benchmark_models
import petab
import pypesto.optimize as optimize
import pypesto.sample as sample
import pypesto.petab
import pypesto.visualize as visualize
from pypesto.visualize.model_fit import visualize_optimized_model_fit
from scipy import stats

import keras
import bayesflow as bf

import amici
import logging
amici.swig_wrappers.logger.setLevel(logging.CRITICAL)
pypesto.logging.log(level=logging.ERROR, name="pypesto.petab", console=True)

from petab_helper import scale_values, values_to_linear_scale, amici_pred_to_array, apply_noise_to_data

# print all model names
print(benchmark_models.MODELS)

INFO:2025-09-22 10:36:37,415:jax._src.xla_bridge:822: Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: dlopen(libtpu.so, 0x0001): tried: 'libtpu.so' (no such file), '/System/Volumes/Preboot/Cryptexes/OSlibtpu.so' (no such file), '/opt/homebrew/lib/libtpu.so' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/opt/homebrew/lib/libtpu.so' (no such file), '/usr/lib/libtpu.so' (no such file, not in dyld cache), 'libtpu.so' (no such file)
INFO:jax._src.xla_bridge:Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: dlopen(libtpu.so, 0x0001): tried: 'libtpu.so' (no such file), '/System/Volumes/Preboot/Cryptexes/OSlibtpu.so' (no such file), '/opt/homebrew/lib/libtpu.so' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/opt/homebrew/lib/libtpu.so' (no such file), '/usr/lib/libtpu.so' (no such file, not in dyld cache), 'libtpu.so' (no such file)
INFO:bayesflow:Using backend 'jax'


['Alkan_SciSignal2018', 'Armistead_CellDeathDis2024', 'Bachmann_MSB2011', 'Beer_MolBioSystems2014', 'Bertozzi_PNAS2020', 'Blasi_CellSystems2016', 'Boehm_JProteomeRes2014', 'Borghans_BiophysChem1997', 'Brannmark_JBC2010', 'Bruno_JExpBot2016', 'Chen_MSB2009', 'Crauste_CellSystems2017', 'Elowitz_Nature2000', 'Fiedler_BMCSystBiol2016', 'Froehlich_CellSystems2018', 'Fujita_SciSignal2010', 'Giordano_Nature2020', 'Isensee_JCB2018', 'Lang_PLOSComputBiol2024', 'Laske_PLOSComputBiol2019', 'Lucarelli_CellSystems2018', 'Okuonghae_ChaosSolitonsFractals2020', 'Oliveira_NatCommun2021', 'Perelson_Science1996', 'Rahman_MBS2016', 'Raia_CancerResearch2011', 'Raimundez_PCB2020', 'SalazarCavazos_MBoC2020', 'Schwen_PONE2014', 'Smith_BMCSystBiol2013', 'Sneyd_PNAS2002', 'Weber_BMC2015', 'Zhao_QuantBiol2020', 'Zheng_PNAS2012']


In [2]:
# generate petab problem
#job_id = int(os.environ.get('SLURM_ARRAY_TASK_ID', 0))
n_cpus = 10 #int(os.environ.get('SLURM_CPUS_PER_TASK', 1))
problem_name = "Beer_MolBioSystems2014" #"Raimundez_PCB2020", "Beer_MolBioSystems2014", "Boehm_JProteomeRes2014"
storage = '' # f'plots/{problem_name}/'
petab_problem = benchmark_models.get_problem(problem_name)

# decrease upper bounds for offset, scaling and noise parameters
scale_params_id = [name for name in petab_problem.parameter_df.index.values if name[:6] == 'offset' or name[:5] == 'scale']
petab_problem.parameter_df.loc[scale_params_id, 'upperBound'] = 100  # instead of 1000
sd_params_id = [name for name in petab_problem.parameter_df.index.values if name[:3] == 'sd_']
petab_problem.parameter_df.loc[sd_params_id, 'upperBound'] = 10  # instead of 1000

if problem_name == "Raimundez_PCB2020":
    # Elba added normal priors for the scaling params
    scale_params_id = [name for name in petab_problem.parameter_df.index.values if name[:2] == 's_']
    petab_problem.parameter_df.loc[scale_params_id, 'objectivePriorType'] = "normal"
    petab_problem.parameter_df.loc[scale_params_id, 'objectivePriorParameters'] = "1;10"
    petab_problem.parameter_df.loc[scale_params_id, 'parameterScale'] = "lin"

# add normal prior (on scale) around real parameters values
real_data_params = petab_problem.parameter_df.nominalValue
std = 0.5
for i in real_data_params.index:
    if petab_problem.parameter_df.loc[i, 'estimate'] == 0:
        continue
    # set prior mean depending on scale
    mean = scale_values(real_data_params.loc[i], petab_problem.parameter_df.loc[i, 'parameterScale'])
    if not 'objectivePriorType' in petab_problem.parameter_df or pd.isna(petab_problem.parameter_df.loc[i, 'objectivePriorType']):
        petab_problem.parameter_df.loc[i, 'objectivePriorType'] = "parameterScaleNormal"
        petab_problem.parameter_df.loc[i, 'objectivePriorParameters'] = f"{mean};{std}"

for i, row in petab_problem.parameter_df.iterrows():
    if 'objectivePriorType' in row and not pd.isna(row['objectivePriorType']):
        if row['estimate'] == 0:
            print(f"Parameter {i} has a {row['objectivePriorType']} prior but is not estimated, setting to nan")
            petab_problem.parameter_df.loc[i, 'objectivePriorType'] = np.nan
        # validate petab problem, if scale for parameter is defined, prior must be on the same scale
        if row['parameterScale'] != 'lin' and not row['objectivePriorType'].startswith('parameterScale'):
            raise ValueError(f"Parameter {i} has parameterScale {row['parameterScale']} but {row['objectivePriorType']} prior")

# load problem
importer = pypesto.petab.PetabImporter(petab_problem, simulator_type="amici")
factory = importer.create_objective_creator()

model = factory.create_model(verbose=False)
amici_predictor = factory.create_predictor()
amici_predictor.amici_objective.amici_solver.setAbsoluteTolerance(1e-8)

# Creating the pypesto problem from PEtab
pypesto_problem = importer.create_problem(
    startpoint_kwargs={"check_fval": True, "check_grad": True}
)

INFO:petab.v1.lint:Checking model...
INFO:petab.v1.lint:Checking measurement table...
INFO:petab.v1.lint:Checking condition table...
INFO:petab.v1.lint:Checking observable table...
INFO:petab.v1.lint:Checking parameter table...
INFO:petab.v1.lint:PEtab format check completed successfully.


In [3]:
def prior():
    lb = petab_problem.parameter_df['lowerBound'].values
    ub = petab_problem.parameter_df['upperBound'].values
    param_names_id = petab_problem.parameter_df.index.values
    param_scale = petab_problem.parameter_df['parameterScale'].values
    if 'objectivePriorType' in petab_problem.parameter_df.columns:
        prior_type = petab_problem.parameter_df['objectivePriorType'].values
    else:
        prior_type = [np.nan] * len(param_names_id)
    estimate_param = petab_problem.parameter_df['estimate'].values

    prior_dict = {}
    for i, name in enumerate(param_names_id):
        if estimate_param[i] == 0:
            prior_dict[name] = petab_problem.parameter_df['nominalValue'].values[i]  # linear space
        elif prior_type[i] == 'uniform':  # linear space
            prior_dict[name] = np.random.uniform(low=lb[i], high=ub[i])
        elif prior_type[i] == 'parameterScaleUniform' or pd.isna(prior_type[i]):
            # scale bounds to scaled space
            lb_scaled_i = scale_values(lb[i], param_scale[i])
            ub_scaled_i = scale_values(ub[i], param_scale[i])
            val = np.random.uniform(low=lb_scaled_i, high=ub_scaled_i)
            # scale to linear space
            prior_dict[name] = values_to_linear_scale(val, param_scale[i])
        elif prior_type[i] == 'parameterScaleNormal':
            mean, std = petab_problem.parameter_df['objectivePriorParameters'].values[i].split(';')
            lb_scaled_i = scale_values(lb[i], param_scale[i])
            ub_scaled_i = scale_values(ub[i], param_scale[i])
            a, b = (lb_scaled_i - float(mean)) / float(std), (ub_scaled_i - float(mean)) / float(std)
            rv = stats.truncnorm.rvs(loc=float(mean), scale=float(std), a=a, b=b)
            # scale to linear space
            prior_dict[name] = values_to_linear_scale(rv, param_scale[i])
        elif prior_type[i] == 'normal':
            mean, std = petab_problem.parameter_df['objectivePriorParameters'].values[i].split(';')
            a, b = (lb[i] - float(mean)) / float(std), (ub[i] - float(mean)) / float(std)
            rv = stats.truncnorm.rvs(loc=float(mean), scale=float(std), a=a, b=b)
            prior_dict[name] = rv
        elif prior_type[i] == 'laplace':
            loc, scale = petab_problem.parameter_df['objectivePriorParameters'].values[i].split(';')
            for t in range(10):
                rv = np.random.laplace(loc=float(loc), scale=float(scale))
                if lb[i] <= rv <= ub[i]:  # sample from truncated laplace
                    break
            prior_dict[name] = rv
        else:
            raise ValueError("Unknown prior type:", prior_type[i])
        # scale params and make list
        prior_dict[name] = np.array([scale_values(prior_dict[name], param_scale[i])])

    # prepare variables for simulation
    x = np.array([prior_dict[name][0] for name in pypesto_problem.x_names])
    prior_dict['amici_params'] = x  # scaled parameters for amici
    return prior_dict

def simulator_amici(amici_params):
    pred = amici_predictor(amici_params)  # expect amici_params to be scaled
    sim, failed = amici_pred_to_array(pred, amici_params,
                                      factory=factory, petab_problem=petab_problem, pypesto_problem=pypesto_problem)
    return dict(sim_data=sim, sim_failed=failed)

In [4]:
prior_sample = prior()
test = simulator_amici(prior_sample['amici_params'])
test['sim_data'].shape, prior_sample['amici_params'].shape, np.nansum(test['sim_data'])

((714, 38), (72,), 9860.631272676585)

In [5]:
# # plot prior
# n_rows = len(pypesto_problem.x_names) // 6
# n_cols = int(np.ceil(len(pypesto_problem.x_names) / n_rows))
# fig, axs = plt.subplots(n_rows, n_cols, figsize=(2*n_rows, 2*n_cols), layout='constrained')
# axs = axs.flatten()
# samples = [prior() for i in range(1000)]
# for i, name in enumerate(pypesto_problem.x_names):
#     samples_i = np.array([s[name] for s in samples]).flatten()
#     axs[i].hist(samples_i, density=True)
#     axs[i].set_title(name)
#     # axs[i].axvline(scale_values(petab_problem.parameter_df['nominalValue'][i],
#     #                              petab_problem.parameter_df['parameterScale'][i]), color='red', linestyle='--')
#     # axs[i].axvline(scale_values(petab_problem.parameter_df['lowerBound'][i],
#     #                             petab_problem.parameter_df['parameterScale'][i]), color='blue', linestyle='--')
#     # axs[i].axvline(scale_values(petab_problem.parameter_df['upperBound'][i],
#     #                             petab_problem.parameter_df['parameterScale'][i]), color='blue', linestyle='--')
# plt.show()

In [6]:
def run_mcmc(petab_problem, pypesto_problem, true_params=None, n_optimization_starts=0, n_chains=10, n_samples=10000,
             n_procs=10, verbose=False) -> Union[pypesto.result.Result, tuple[pypesto.result.Result, petab.Problem, pypesto.Problem]]:
    _petab_problem = deepcopy(petab_problem)
    if true_params is None:
        # use true data
        pass
    else:
        # this is needed to create a new measurement df and recompile the problem for amici
        pred = amici_predictor(true_params)
        _, failed = amici_pred_to_array(pred, true_params,
                                      factory=factory, petab_problem=petab_problem, pypesto_problem=pypesto_problem)
        if failed:
            print("Simulation failed for true parameters")
            return None, None, None
        _measurement_df = factory.prediction_to_petab_measurement_df(pred) # to create new measurement df
        _measurement_df = apply_noise_to_data(_measurement_df, true_params, field='measurement',
                                              pypesto_problem=pypesto_problem, petab_problem=_petab_problem)
        _petab_problem.measurement_df = _measurement_df
    _importer = pypesto.petab.PetabImporter(_petab_problem, simulator_type="amici")
    _factory = _importer.create_objective_creator()
    _model = _factory.create_model(verbose=False)

    _pypesto_problem = _importer.create_problem(
        startpoint_kwargs={"check_fval": True, "check_grad": True}
    )

    if isinstance(_pypesto_problem.objective, pypesto.objective.AggregatedObjective):
        _pypesto_problem.objective._objectives[0].amici_solver.setAbsoluteTolerance(1e-8)
        #_pypesto_problem.objective._objectives[0].amici_solver.setSensitivityMethod(amici.SensitivityMethod.adjoint)
    else:
        _pypesto_problem.objective.amici_solver.setAbsoluteTolerance(1e-8)
        #_pypesto_problem.objective.amici_solver.setSensitivityMethod(amici.SensitivityMethod.adjoint)

    if n_optimization_starts == 0:
        print("Skipping optimization, sample start points for chains from prior")
        _result = None
        x0 = [_pypesto_problem.get_reduced_vector(prior()['amici_params']) for _ in range(n_chains)]
    else:
        # do the optimization
        _result = optimize.minimize(
            problem=_pypesto_problem,
            optimizer=optimize.FidesOptimizer(verbose=0),
            #optimizer=optimize.ScipyOptimizer(method='L-BFGS-B'),
            n_starts=n_optimization_starts,
            engine=pypesto.engine.MultiProcessEngine(n_procs=n_procs) if n_procs > 1 else None,
            progress_bar=verbose
        )
        x0 = [_pypesto_problem.get_reduced_vector(_result.optimize_result.x[0])]
        if x0[0] is None:
            print("Warning: x0 contains nan, replace with prior sample")
            x0[0] = _pypesto_problem.get_reduced_vector(prior()['amici_params'])
        x0 += [_pypesto_problem.get_reduced_vector(prior()['amici_params']) for _ in range(n_chains - 1)]

    _sampler = sample.AdaptiveParallelTemperingSampler(
        internal_sampler=sample.AdaptiveMetropolisSampler(
            options=dict(decay_constant=0.7, threshold_sample=2000)
        ),
        n_chains=n_chains,
        options=dict(show_progress=verbose)
    )

    _result = sample.sample(
        problem=_pypesto_problem,
        n_samples=n_samples,
        sampler=_sampler,
        result=_result,
        x0=x0
    )
    sample.geweke_test(_result)

    if true_params is None:
        return _result
    return _result, _petab_problem, _pypesto_problem

In [7]:
def get_mcmc_posterior_samples(res):
    burn_in = sample.geweke_test(res)
    if burn_in == res.sample_result.trace_x.shape[1]:
        print("Warning: All samples are considered burn-in.")
        _samples = res.sample_result.trace_x[0]  # only use first chain
    else:
        _samples = res.sample_result.trace_x[0, burn_in:]  # only use first chain
    #_samples = pypesto_problem.get_full_vector(_samples)
    #scales = petab_problem.parameter_df.loc[res.problem.x_names, 'parameterScale'].values
    #_samples = values_to_linear_scale(_samples, scales)
    return _samples

In [8]:
n_optimization_starts = 0
test_params = prior()
#print(test_params)
new_result, new_petab_problem, new_pypesto_problem = run_mcmc(
    petab_problem=petab_problem,
    pypesto_problem=pypesto_problem,
    true_params=test_params['amici_params'],
    n_optimization_starts=n_optimization_starts,
    n_samples=1e3,
    n_procs=n_cpus,
    n_chains=3,
    verbose=True
)

if n_optimization_starts > 0:
    visualize.waterfall(new_result, size=(6, 4))
    ax = visualize.parameters(new_result, size=(6, 25))
    visualize_optimized_model_fit(petab_problem=new_petab_problem, result=new_result, pypesto_problem=new_pypesto_problem);

#print(test_params['amici_params']-new_result.optimize_result.x[0])

KeyboardInterrupt: 

In [None]:
# n_optimization_starts = 20
# result = run_mcmc(
#     petab_problem=petab_problem,
#     pypesto_problem=pypesto_problem,
#     n_optimization_starts=n_optimization_starts,
#     n_samples=1e3
# )
#
# if n_optimization_starts > 0:
#     visualize.waterfall(result, size=(6, 4))
#     visualize.parameters(result, size=(6, 25))
#     visualize_optimized_model_fit(petab_problem=petab_problem, result=result, pypesto_problem=pypesto_problem);

In [None]:
#ax = visualize.sampling_parameter_traces(result, size=(20, 20), full_trace=False, use_problem_bounds=False);
#visualize.sampling_scatter(result, size=(13, 6));

# BayesFlow workflow

In [9]:
simulator = bf.make_simulator([prior, simulator_amici])
simulator.sample(2).keys(), simulator.sample(2)['sim_data'].shape

(dict_keys(['Bacmax_typeIDT1_ExpID1', 'Bacmax_typeIDT1_ExpID2', 'Bacmax_typeIDT1_ExpID3', 'Bacmax_typeIDT1_ExpID4', 'Bacmax_typeIDT1_ExpID5', 'Bacmax_typeIDT1_ExpID6', 'Bacmax_typeIDT3_ExpID1', 'Bacmax_typeIDT3_ExpID2', 'Bacmax_typeIDT3_ExpID3', 'Bacmax_typeIDT3_ExpID4', 'Bacmax_typeIDT3_ExpID5', 'Bacmax_typeIDT3_ExpID6', 'Bacmax_typeIDT5_ExpID1', 'Bacmax_typeIDT5_ExpID2', 'Bacmax_typeIDT5_ExpID3', 'Bacmax_typeIDT5_ExpID4', 'Bacmax_typeIDT5_ExpID5', 'Bacmax_typeIDT5_ExpID6', 'Bacmax_typeIDwt_ExpID4', 'beta_typeIDT1_ExpID1', 'beta_typeIDT1_ExpID2', 'beta_typeIDT1_ExpID3', 'beta_typeIDT1_ExpID4', 'beta_typeIDT1_ExpID5', 'beta_typeIDT1_ExpID6', 'beta_typeIDT3_ExpID1', 'beta_typeIDT3_ExpID2', 'beta_typeIDT3_ExpID3', 'beta_typeIDT3_ExpID4', 'beta_typeIDT3_ExpID5', 'beta_typeIDT3_ExpID6', 'beta_typeIDT5_ExpID1', 'beta_typeIDT5_ExpID2', 'beta_typeIDT5_ExpID3', 'beta_typeIDT5_ExpID4', 'beta_typeIDT5_ExpID5', 'beta_typeIDT5_ExpID6', 'beta_typeIDwt_ExpID4', 'init_Bac', 'kdegi_typeIDT1', 'kdegi_t

In [10]:
num_training_sets = 512*64
num_validation_sets = 100

In [11]:
@delayed
def sample_and_simulate():
    """Single iteration of sampling and simulation"""
    prior_sample = prior()
    test = simulator_amici(prior_sample['amici_params'])

    # Combine both dictionaries
    result = {**prior_sample, **test}
    return result

def simulate_parallel(n_samples):
    """Parallel sampling and simulation"""
    results = Parallel(n_jobs=n_cpus, verbose=100)(
        sample_and_simulate() for _ in range(n_samples)
    )
    results_dict = defaultdict(list)

    for r in results:
        for key, value in r.items():
            results_dict[key].append(value)
    for key, value_list in results_dict.items():
        results_dict[key] = np.array(value_list)
    return results_dict

In [12]:
if os.path.exists(f"{storage}validation_data_petab_{problem_name}.pkl"):
    with open(f'{storage}validation_data_petab_{problem_name}.pkl', 'rb') as f:
        validation_data = pickle.load(f)
    try:
        with open(f'{storage}training_data_petab_{problem_name}.pkl', 'rb') as f:
            training_data = pickle.load(f)
    except FileNotFoundError:
        training_data = None
        print("Training data not found")
else:
    training_data = simulate_parallel(num_training_sets)
    validation_data = simulate_parallel(num_validation_sets)

    with open(f'{storage}training_data_petab_{problem_name}.pkl', 'wb') as f:
        pickle.dump(training_data, f)
    with open(f'{storage}validation_data_petab_{problem_name}.pkl', 'wb') as f:
        pickle.dump(validation_data, f)

# remove failed simulations
if not training_data is None:
    train_mask = ~training_data['sim_failed']
    for key in training_data.keys():
        training_data[key] = training_data[key][train_mask]
    print(f"Failed Training data: {np.sum(~train_mask)} / {len(train_mask)}")
val_mask = ~validation_data['sim_failed']
for key in validation_data.keys():
    validation_data[key] = validation_data[key][val_mask]
print(f"Failed Validation data: {np.sum(~val_mask)} / {len(val_mask)}")

test_mean = np.nanmean(np.log(validation_data['sim_data']+1), axis=(0,1), keepdims=True)
test_std = np.nanstd(np.log(validation_data['sim_data']+1), axis=(0,1), keepdims=True)
print(validation_data['sim_data'].shape)

Training data not found
Failed Validation data: 0 / 100
(100, 714, 38)


In [13]:
param_names = [name for i, name in enumerate(pypesto_problem.x_names) if i in pypesto_problem.x_free_indices]
lbs = np.array([lb for i, lb in enumerate(petab_problem.lb_scaled) if i in pypesto_problem.x_free_indices])
ubs = np.array([ub for i, ub in enumerate(petab_problem.ub_scaled) if i in pypesto_problem.x_free_indices])

adapter = (
    bf.adapters.Adapter()
    .drop('amici_params')  # only used for simulation
    .to_array()
    .convert_dtype("float64", "float32")
    .concatenate(param_names, into="inference_variables")
    .constrain("inference_variables", lower=lbs, upper=ubs, inclusive='both')  # after concatenate such that we can apply an array as constraint

    .as_time_series("sim_data")
    .log("sim_data", p1=True)
    #.standardize("sim_data", mean=test_mean, std=test_std)
    #.nan_to_num("sim_data", default_value=-3.0)
    .rename("sim_data", "summary_variables")
)

In [14]:
# # check how the distributions look like
# test_params = adapter.forward(validation_data)['inference_variables']
#
# n_rows = len(param_names) // 6
# n_cols = int(np.ceil(len(param_names) / n_rows))
# fig, ax = plt.subplots(n_rows, n_cols, figsize=(2*n_rows, 2*n_cols), layout='constrained')
# ax = ax.flatten()
# for i, name in enumerate(param_names):
#     samples = test_params[:, i]
#     ax[i].hist(samples, density=True)
#     ax[i].set_title(name)
# plt.show()

In [15]:
# check how the data distribution looks like (disable nan_to_num in adapter to see nans)
test_data = adapter.forward(validation_data)['summary_variables']
# n_features = test_data.shape[-1]
#
# n_rows = n_features // 5
# n_cols = int(np.ceil(n_features / n_rows))
# fig, ax = plt.subplots(nrows=n_rows, ncols=n_cols, figsize=(2*n_rows, 2*n_cols))
# ax = ax.flatten()
# for i in range(n_features):
#     ax[i].hist(test_data[:, :, i].flatten(), density=True)
# plt.show()

In [16]:
# print some stats about the data
print('Minimum', np.min(test_data))
print('Maximum', np.max(test_data))
print('Mean', np.mean(test_data))
print('Standard Deviation', np.std(test_data))
print('Nan Values', np.isnan(test_data).sum())

Minimum 0.0
Maximum 4.2376857
Mean 0.30746585
Standard Deviation 0.41734025
Nan Values 0


In [17]:
from model_settings import EPOCHS, BATCH_SIZE, MODELS, NUM_SAMPLES_INFERENCE, SAMPLER_SETTINGS

In [18]:
model_name = list(MODELS.keys())[-1]
conf_tuple = MODELS[model_name]
print(model_name)

'diffusion_cosine_noise'

In [19]:
workflow = bf.BasicWorkflow(
    simulator=simulator,
    adapter=adapter,
    summary_network=bf.networks.FusionTransformer(summary_dim=len(param_names)*2),  # FusionTransformer
    inference_network=conf_tuple[0](**conf_tuple[1]),
    standardize='all'
)

In [20]:
model_path = f'{storage}petab_benchmark_diffusion_model_{problem_name}_{model_name}.keras'
if not os.path.exists(model_path):
    history = workflow.fit_offline(
        training_data,
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        validation_data=validation_data,
        verbose=2
    )
    #workflow.approximator.save(model_path)
else:
    workflow.approximator = keras.models.load_model(model_path)

INFO:bayesflow:Fitting on dataset instance of OfflineDataset.
INFO:bayesflow:Building on a test batch.


1/1 - 10s - 10s/step - loss: 22.5965 - val_loss: 13.5632


In [None]:
diagnostics_plots = workflow.plot_default_diagnostics(test_data=validation_data, num_samples=NUM_SAMPLES_INFERENCE,
                                                      calibration_ecdf_kwargs={"difference": True, 'stacked': True})
#for k in diagnostics_plots.keys():
#    diagnostics_plots[k].savefig(f"{storage}petab_benchmark_{problem_name}_{model_name}_{k}.pdf")

In [22]:
if model_name.startswith('diffusion'):
    for solver_name in SAMPLER_SETTINGS:
        diagnostics = workflow.compute_default_diagnostics(test_data=validation_data, num_samples=NUM_SAMPLES_INFERENCE, approximator_kwargs=SAMPLER_SETTINGS[solver_name])
        print(solver_name)
        print(diagnostics.median(axis=1))
        print('\n')

{'start_time': 1.0, 'stop_time': 0.0, 'method': 'rk45', 'steps': 100}
ode
NRMSE                    1.935771
Posterior Contraction    1.000000
Calibration Error        0.500000
dtype: float64


{'start_time': 1.0, 'stop_time': 0.0, 'method': 'euler_maruyama', 'steps': 100}
sde
NRMSE                    1.935771
Posterior Contraction    1.000000
Calibration Error        0.500000
dtype: float64


{'start_time': 1.0, 'stop_time': 0.0, 'method': 'euler_maruyama', 'steps': 100, 'corrector_steps': 1}
sde-pc
NRMSE                    1.935771
Posterior Contraction    1.000000
Calibration Error        0.500000
dtype: float64




# MCMC sampling for comparison

In [None]:
def run_mcmc_single(petab_prob, pypesto_prob, true_params, n_starts, n_mcmc_samples, n_final_samples, n_chains):
    import amici
    import logging
    amici.swig_wrappers.logger.setLevel(logging.CRITICAL)
    pypesto.logging.log(level=logging.ERROR, name="pypesto.petab", console=True)

    try:
        r, _, _ = run_mcmc(
            petab_problem=petab_prob,
            pypesto_problem=pypesto_prob,
            true_params=true_params,
            n_optimization_starts=n_starts,
            n_samples=n_mcmc_samples,
            n_chains=n_chains,
            n_procs=1
        )
    except np.linalg.LinAlgError as e:
        print("LinAlgError during MCMC:", e)
        return np.full((n_final_samples, len(pypesto_prob.x_free_indices)), np.nan)

    if r is None:
        return np.full((n_final_samples, len(pypesto_prob.x_free_indices)), np.nan)

    ps = get_mcmc_posterior_samples(r)
    # num_samples random samples from posterior
    idx = np.random.choice(ps.shape[0], size=n_final_samples)
    return ps[idx]

In [None]:
mcmc_path = f'{storage}mcmc_samples_{problem_name}.pkl'
if os.path.exists(mcmc_path):
    with open(mcmc_path, 'rb') as f:
        mcmc_posterior_samples = pickle.load(f)
else:
    mcmc_posterior_samples = Parallel(n_jobs=n_cpus, verbose=10)(
        delayed(run_mcmc_single)(
            petab_prob=petab_problem,
            pypesto_prob=pypesto_problem,
            true_params=params,
            n_starts=10,
            n_mcmc_samples=1e5,
            n_final_samples=num_samples,
            n_chains=10
        ) for params in validation_data['amici_params']
    )
    mcmc_posterior_samples = np.array(mcmc_posterior_samples)

    with open(mcmc_path, 'wb') as f:
        pickle.dump(mcmc_posterior_samples, f)
mcmc_mask = ~np.isnan(mcmc_posterior_samples.sum(axis=(1,2)))

In [None]:
fig = bf.diagnostics.recovery(
    estimates=mcmc_posterior_samples[mcmc_mask * val_mask],
    targets=pypesto_problem.get_reduced_vector(validation_data['amici_params'].T).T[mcmc_mask],
    variable_names=param_names,
)
fig.savefig(f"{storage}petab_benchmark_{problem_name}_mcmc_recovery.png")

fig = bf.diagnostics.calibration_ecdf(
    estimates=mcmc_posterior_samples[mcmc_mask * val_mask],
    targets=pypesto_problem.get_reduced_vector(validation_data['amici_params'].T).T[mcmc_mask],
    variable_names=param_names,
    difference=True,
    stacked=True
)
fig.savefig(f"{storage}petab_benchmark_{problem_name}_mcmc_calibration.png")