In [1]:
import pandas as pd
from pathlib import Path
import pylatex as pl
from pylatex.utils import NoEscape
import pymc as pm
import arviz as az
import nevergrad as ng
import plotly.express as px

from estival.model import BayesianCompartmentalModel
from estival.optimization.nevergrad import optimize_model
from estival.priors import UniformPrior
from estival.targets import NegativeBinomialTarget, CustomTarget
from estival.calibration import pymc as epm
from tbdynamics import model
from tbdynamics.inputs import *
from tbdynamics.utils import build_contact_matrix
import plotly.graph_objects as go
from general_utils.parameter_utils import load_param_info
from general_utils.doc_utils import TextElement, TableElement, FigElement, add_element_to_document, \
    save_pyplot_add_to_doc, save_plotly_add_to_doc, compile_doc, generate_doc




In [2]:
PROJECT_PATH = Path().resolve()
DATA_PATH =  PROJECT_PATH / "data"
SUPPLEMENT_PATH = PROJECT_PATH / "supplement"
OUTPUT_PATH = PROJECT_PATH / "outputs"
Path(OUTPUT_PATH).mkdir(parents=True, exist_ok=True)

In [3]:
new_calibration = False

In [4]:
pd.options.plotting.backend = "plotly"
time_start = 1800
time_end = 2020
time_step = 1

doc_sections = {}
compartments = [
    "susceptible",
    "early_latent",
    "late_latent",
    "infectious",
    "on_treatment",
    "recovered",
]
infectious_compartments = [
    "infectious",
    "on_treatment",
]

latent_compartments = [
    "early_latent",
    "late_latent",
]
age_strata = [0,5,15,35,50]

In [5]:
params = {
    "contact_rate": 0.009414102898074345,
    "start_population_size": 227344.75719536067,
    "cdr_adjustment": 0.6,
    "progression_multiplier": 1.1,
    "infectious_seed": 1,
    "rr_infection_latent": 0.2,
    "rr_infection_recovered": 0.2,
    "infect_death_rate_unstratified": 0.21,
    "on_treatment_infect_multiplier": 0.08,
    'smear_positive_death_rate':0.364337776897486,
    'smear_negative_death_rate': 0.027588310343242016, 
    'smear_positive_self_recovery': 0.20344728302826143,
    'smear_negative_self_recovery': 0.22723824998716693,
    'rr_progression_diabetes': 4.5
}

In [6]:
tb_model, build_text = model.build_base_model(compartments, infectious_compartments, time_start, time_end, time_step)
add_element_to_document("Model construction", TextElement(build_text), doc_sections)
build_text

"The base model consists of 6 states, representing the following states: susceptible, early_latent, late_latent, infectious, on_treatment, recovered. Only the ['infectious', 'on_treatment'] compartment contributes to the force of infection. The model is run from 1800 to 2020. "

In [7]:
start_text = model.set_starting_conditions(tb_model)
add_element_to_document("Model construction", TextElement(start_text), doc_sections)
start_text

'The simulation starts with Parameter start_population_size million fully susceptible persons, with infectious persons introduced later through strain seeding as described below. '

In [8]:
entry_text = model.add_entry_flow(tb_model)
add_element_to_document("Model construction", TextElement(entry_text), doc_sections)
entry_text

'The birth process add newborns to the susceptible compartment of the model'

In [9]:
ndeath_text = model.add_natural_death_flow(tb_model)
add_element_to_document("Model construction", TextElement(ndeath_text), doc_sections)
ndeath_text

'The universal_death process add universal death to the model.'

In [10]:
infect_text = model.add_infection(tb_model)
for text in infect_text:
    add_element_to_document("Model construction", TextElement(text), doc_sections)
infect_text

('The infection process moves people from the susceptible compartment to the early_latent compartment, under the frequency-dependent transmission assumption. ',
 'The infection_from_latent process moves people from the late_latent compartment to the early_latent compartment, under the frequency-dependent transmission assumption. ',
 'The infection_from_recovered process moves people from the recovered compartment to the early_latent compartment, under the frequency-dependent transmission assumption. ')

In [11]:
latency_text = model.add_latency(tb_model)
for text in latency_text:
    add_element_to_document("Model construction", TextElement(text), doc_sections)
latency_text

('The stabilisation process moves people from the early_latent compartment to the late_latent compartment, under the frequency-dependent transmission assumption. ',
 'The early_activation process moves people from the early_latent compartment to the infectious compartment, under the frequency-dependent transmission assumption. ',
 'The late_activation process moves people from the late_latent compartment to the infectious compartment, under the frequency-dependent transmission assumption. ')

In [12]:
ideath_text = model.add_infect_death(tb_model)
add_element_to_document("Model construction", TextElement(ideath_text), doc_sections)
ideath_text

'The infect_death process moves people from the infectious'

In [13]:
sr_text = model.add_self_recovery(tb_model)
add_element_to_document("Model construction", TextElement(sr_text), doc_sections)
sr_text

'The self_recovery process moves people from the on_treatment compartment to the recovered, under the frequency-dependent transmission assumption. '

In [14]:
detection_text = model.add_detection(tb_model)
add_element_to_document("Model construction", TextElement(detection_text), doc_sections)
detection_text

'The detection process moves people from the infectious compartment to the on_treatment compartment, under the frequency-dependent transmission assumption. '

In [15]:
acf_text = model.add_acf(tb_model, fixed_parameters)
add_element_to_document("Model construction", TextElement(acf_text), doc_sections)
acf_text

'The acf_detection process moves people from the infectious compartment to the on_treatment, under the frequency-dependent transmission assumption. '

In [16]:
treatment_text = model.add_treatment_related_outcomes(tb_model)
for text in treatment_text:
    add_element_to_document("Model construction", TextElement(text), doc_sections)
treatment_text

('The treatment_recovery process moves people from the on_treatment compartment to the recovered compartment, under the frequency-dependent transmission assumption. ',
 'The treatment_death process moves people from the on_treatment compartment to the death, under the frequency-dependent transmission assumption. ',
 'The early_activation process moves people from the on_treatment compartment to the infectious compartment, under the frequency-dependent transmission assumption. ')

In [17]:
mfilename = "matrix.jpg"
matrix, matrix_fig_text = build_contact_matrix(age_strata, mfilename)
add_element_to_document("Mixing", FigElement(mfilename, caption=matrix_fig_text), doc_sections)

In [18]:
age_strat, age_strat_text = model.add_age_strat(compartments, infectious_compartments, age_strata, matrix, fixed_parameters)
add_element_to_document("Model construction", TextElement(age_strat_text), doc_sections, subsection_name="Stratifications")

In [19]:
tb_model.stratify_with(age_strat)

In [20]:
organ, organ_text = model.add_organ_strat(fixed_parameters,infectious_compartments)
add_element_to_document("Model construction", TextElement(organ_text), doc_sections, subsection_name="Stratifications")

In [21]:
tb_model.stratify_with(organ)

In [22]:
#gender = model.add_gender_strat(tb_model, age_strata, compartments, fixed_parameters)

TypeError: add_gender_strat() takes 3 positional arguments but 4 were given

In [None]:
#tb_model.stratify_with(gender)

In [23]:
model.request_output(tb_model, compartments,  latent_compartments, infectious_compartments)

In [24]:
priors = [
    UniformPrior("start_population_size", (150000, 300000)),
    UniformPrior("contact_rate", (0.0001, 0.02)),
    #UniformPrior("infectious_seed", [100, 2000]),
    UniformPrior("rr_infection_latent", (0.2, 0.5)),
    UniformPrior("rr_infection_recovered", (0.1, 0.5)),
    UniformPrior("smear_positive_death_rate", (0.335, 0.449)),
    UniformPrior("smear_negative_death_rate", (0.017, 0.035)),
    UniformPrior("smear_positive_self_recovery", (0.177, 0.288)),
    UniformPrior("smear_negative_self_recovery", (0.073, 0.209)),
    UniformPrior("cdr_adjustment", (0.6, 1.0)),\
    UniformPrior("rr_progression_diabetes", (2.0, 6.0))
    # UniformPrior("progression_multiplier", (0.1, 2.0)),
    # UniformPrior("cdr_adjustment", [0.6, 1.0]),
    # UniformPrior("infect_death_rate_dict.smear_positive", [0.335, 0.449]),
    # UniformPrior("infect_death_rate_dict.smear_negative", [0.017, 0.035]),
    # UniformPrior("self_recovery_rate_dict.smear_positive", [0.177, 0.288]),
    # UniformPrior("self_recovery_rate_dict.smear_negative", [0.073, 0.209]),
    # UniformPrior("rr_progression_diabetes", [1, 10]),
]
pop = pd.Series({2009: 1207100, 2019: 1194300})
notif = pd.Series({2011: 1495,2012: 1485,2013: 1369,2014:1405,2015:1642, 2016:1555, 2017:1440, 2018:1468, 2019:1417})
latent = pd.Series({2016:36})

def least_squares(modelled, obs, parameters, time_weights):
    return 0.0 - (((modelled - obs) ** 2.0)).sum()
targets = [
    CustomTarget("total_population", pop, least_squares),
   # CustomTarget("notifications", notif, least_squares),
    NegativeBinomialTarget("notifications", notif, 20.0),
    CustomTarget("percentage_latent", latent, least_squares)
]
# binom_targets = [
#     NegativeBinomialTarget("total_population", pop, 20.0),
# ]
calibration_model = BayesianCompartmentalModel(tb_model, params, priors, targets)

In [25]:
optimise_model = True
if optimise_model:
    print("Optimising with nevergrad \n Progression of loss function values:")
    optim_runner = optimize_model(calibration_model)
    for i in range(10):
        rec = optim_runner.minimize(100)
        print(rec.loss)
    optim_params = rec.value[1]
    params.update(optim_params)
    tb_model.run(parameters=params)
    print("Best calibration parameters found:")
    print(optim_params)

Optimising with nevergrad 
 Progression of loss function values:
951778453.8853403
951778453.8853403
913770687.3094556
903922399.4497097
903922399.4497097
871254325.0283002
871254325.0283002
871254325.0283002
871254325.0283002
871254325.0283002
Best calibration parameters found:
{'start_population_size': 263432.6269472283, 'contact_rate': 0.010887640336661777, 'rr_infection_latent': 0.3429024064938526, 'rr_infection_recovered': 0.22428123849245518, 'smear_positive_death_rate': 0.424710135647397, 'smear_negative_death_rate': 0.028402748311594183, 'smear_positive_self_recovery': 0.2215183650742013, 'smear_negative_self_recovery': 0.12514811459829198, 'cdr_adjustment': 0.7397745398331373, 'rr_progression_diabetes': 3.923608926480754}


In [None]:
tb_model.run(parameters=params)
derived_df_0 = tb_model.get_derived_outputs_df()

In [None]:
plots = {"total_population": {
      "title": "Population size",
      "output_key": "total_population",
      "times": [2009.0, 2019.0],
      "values": [1207100, 1194300],
      "quantiles": [0.025, 0.25, 0.5, 0.75, 0.975]
    },
     "notifications": {
      "title": "Notifications",
      "output_key": "notifications",
      "times": [2011.0, 2012.0, 2013.0, 2014.0, 2015.0, 2016.0, 2017.0, 2018.0, 2019.0],
      "values": [1495, 1485, 1369, 1405, 1642, 1555, 1440, 1468, 1417],
      "quantiles": [0.025, 0.25, 0.5, 0.75, 0.975]
    },
    "percentage_latent": {
      "title": "Percentage Latent",
      "output_key": "percentage_latent",
      "times": [2016.0],
      "values": [30.8],
      "quantiles": [0.025, 0.25, 0.5, 0.75, 0.975]
    },
    
    }

In [None]:
fig2_1 = px.line(
    derived_df_0,
    x=derived_df_0.index,
    y="total_population",
)
fig2_2 = px.scatter(x= plots['total_population']['times'], y = plots['total_population']['values'])
fig2_2.update_traces(marker=dict(color="red"))
fig2_3 = go.Figure(
    data=fig2_1.data + fig2_2.data,
)
fig2_3.update_layout(
    title="Modelled vs Data", title_x=0.5, xaxis_title="Year", yaxis_title="Population"
)
fig2_3.show()
total_fig_name = "total.jpg"
fig2_3.write_image(SUPPLEMENT_PATH /total_fig_name)

In [None]:
add_element_to_document("Outputs",FigElement(total_fig_name, caption="Notifications"), doc_sections)

In [None]:
notif_1 = px.line(
    derived_df_0,
    x=derived_df_0.index,
    y="notifications",
)
notif_2 = px.scatter(x= plots['notifications']['times'], y = plots['notifications']['values'])
notif_2.update_traces(marker=dict(color="red"))
notif_plot = go.Figure(
    data=notif_1.data + notif_2.data,
)
notif_plot.update_layout(
    title="Modelled vs Data", title_x=0.5, xaxis_title="Year", yaxis_title="Notifications"
)
notif_plot.show()
notif_fig_name = "notifications.jpg"
notif_plot.write_image(SUPPLEMENT_PATH / notif_fig_name)

In [None]:
add_element_to_document("Outputs",FigElement(notif_fig_name, caption="Notifications"), doc_sections)

In [None]:
prev_plot = px.line(
    derived_df_0,
    x=derived_df_0.index,
    y="prevalence_infectious",
)
prev_plot.show()
prev_plot.write_image(str(SUPPLEMENT_PATH) + "/prevalance.jpg")

In [None]:
inci_plot = px.line(
    derived_df_0,
    x=derived_df_0.index,
    y="incidence",
)
inci_plot.show()
inci_plot.write_image(str(SUPPLEMENT_PATH) + "/incidence.jpg")

In [None]:
latent_1 = px.line(
    derived_df_0,
    x=derived_df_0.index,
    y="percentage_latent",
)
latent_2 = px.scatter(x= plots['percentage_latent']['times'], y = plots['percentage_latent']['values'])
latent_2.update_traces(marker=dict(color="red"))
latent_plot = go.Figure(
    data=latent_1.data + latent_2.data,
)
latent_plot.update_layout(
    title="Modelled vs Data", title_x=0.5, xaxis_title="Year", yaxis_title="Percentage latent"
)
latent_plot.show()
latent_plot.write_image(str(SUPPLEMENT_PATH) + "/latent.jpg")

In [None]:
supplement = generate_doc("Supplemental Appendix", "austcovid")
compile_doc(doc_sections, supplement)

In [None]:
iterations = 20000
burn_in = 2000
n_chains = 20
if new_calibration:
    with pm.Model() as pm_model:
        variables = epm.use_model(calibration_model)
        idata_raw = pm.sample(step=[pm.DEMetropolis(variables)], draws=iterations, tune=0, cores=16, chains=n_chains)
    idata_raw.to_netcdf(OUTPUT_PATH / "calibration_out.nc")
else:
    idata_raw = az.from_netcdf(OUTPUT_PATH / "calibration_out.nc")

idata = idata_raw.sel(draw=range(burn_in, iterations))  # Discard burn-in

Population sampling (20 chains)
INFO:pymc.sampling.mcmc:Population sampling (20 chains)
DEMetropolis: [start_population_size, contact_rate, rr_infection_latent, rr_infection_recovered, smear_positive_death_rate, smear_negative_death_rate, smear_positive_self_recovery, smear_negative_self_recovery, cdr_adjustment, rr_progression_diabetes]
INFO:pymc.sampling.mcmc:DEMetropolis: [start_population_size, contact_rate, rr_infection_latent, rr_infection_recovered, smear_positive_death_rate, smear_negative_death_rate, smear_positive_self_recovery, smear_negative_self_recovery, cdr_adjustment, rr_progression_diabetes]
Attempting to parallelize chains to all cores. You can turn this off with `pm.sample(cores=1)`.
INFO:pymc.sampling.population:Attempting to parallelize chains to all cores. You can turn this off with `pm.sample(cores=1)`.


Sampling 20 chains for 0 tune and 20_000 draw iterations (0 + 400_000 draws total) took 1741 seconds.
INFO:pymc.sampling.mcmc:Sampling 20 chains for 0 tune and 20_000 draw iterations (0 + 400_000 draws total) took 1741 seconds.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
INFO:pymc.stats.convergence:The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details
ERROR:pymc.stats.convergence:The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details


In [None]:
idata_raw.posterior.isel(draw=0).to_dataframe()

Unnamed: 0_level_0,draw,start_population_size,contact_rate,rr_infection_latent,rr_infection_recovered,smear_positive_death_rate,smear_negative_death_rate,smear_positive_self_recovery,smear_negative_self_recovery,cdr_adjustment,rr_progression_diabetes
chain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,0,225007.672165,0.01005,0.35003,0.300056,0.392025,0.026006,0.232471,0.141004,0.799798,4.000421
1,0,224963.93106,0.01005,0.349978,0.300222,0.391979,0.025998,0.232517,0.141001,0.799942,3.999563
2,0,225000.0,0.01005,0.35,0.3,0.392,0.026,0.2325,0.141,0.8,4.0
3,0,225000.0,0.01005,0.35,0.3,0.392,0.026,0.2325,0.141,0.8,4.0
4,0,225009.497025,0.010051,0.35005,0.299937,0.391977,0.025999,0.232486,0.141031,0.800009,4.000035
5,0,224999.303131,0.010056,0.350072,0.299991,0.391993,0.025995,0.232542,0.140949,0.799948,4.000686
6,0,224988.691268,0.01005,0.35,0.300087,0.391988,0.025996,0.232491,0.141025,0.800014,3.999194
7,0,225025.157192,0.010052,0.34993,0.300055,0.392,0.026006,0.232502,0.141015,0.799889,4.000959
8,0,225000.0,0.01005,0.35,0.3,0.392,0.026,0.2325,0.141,0.8,4.0
9,0,224976.751905,0.01006,0.350045,0.300044,0.392,0.026002,0.2325,0.141034,0.800068,4.000944


In [None]:
(idata.sample_stats.accepted.sum(axis=1) / idata.sample_stats.coords["draw"].size).to_dataframe()

Unnamed: 0_level_0,accepted
chain,Unnamed: 1_level_1
0,0.001
1,0.00125
2,0.001625
3,0.001625
4,0.0005
5,0.00175
6,0.001625
7,0.0015
8,0.000125
9,0.001125
