In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
from pathlib import Path
import plotly.express as px
import pymc as pm
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
import pandas as pd

from estival.model import BayesianCompartmentalModel
import estival.priors as esp
import estival.targets as est
from estival.wrappers import pymc as epm
from tbdynamics import model
from tbdynamics.utils import round_sigfig
from summer2 import CompartmentalModel
from summer2.parameters import Parameter
from summer2.functions.time import get_sigmoidal_interpolation_function
from summer2 import AgeStratification, Overwrite
from emutools.tex import StandardTexDoc



## Defining initial variables

In [3]:
pd.options.plotting.backend = 'plotly'
time_start = 1800
time_end = 2020
time_step = 1

doc_sections = {}
compartments = [
    "susceptible",
    "early_latent",
    "late_latent",
    "infectious",
    "on_treatment",
    "recovered",
]
infectious_compartments = [
    "infectious",
    "on_treatment",
]

latent_compartments = [
    "early_latent",
    "late_latent",
]
age_strata = [0,5,15,35,50]

## Load data

In [4]:
PROJECT_PATH = Path().resolve()
DATA_PATH = PROJECT_PATH / 'data'
SUPPLEMENT_PATH = PROJECT_PATH / 'supplement'
app_doc = StandardTexDoc(PROJECT_PATH / 'supplement', 'supplement', "TB dynamics Supplement", 'tb')

### Process Birth data

In [5]:
birth_rates = pd.read_csv(Path(DATA_PATH / 'camau_birth.csv'), index_col=0)['value']

### Process Death Data

In [6]:
# Path to the CSV file
file_path = './data/data.csv' #from UN Population

# Read the CSV file into a pandas DataFrame
data = pd.read_csv(file_path, usecols=['Age', 'Time', 'Population', 'Deaths'])
data = data.set_index(['Age', 'Time'])
data.index = data.index.swaplevel()
age_groups = set(data.index.get_level_values(1))
years = set(data.index.get_level_values(0))

In [7]:
def get_age_groups_in_range(age_groups, lower_limit, upper_limit):
    return [i for i in age_groups if '+' not in i and lower_limit <= int(i.split('-')[0]) <= upper_limit]

In [8]:
agegroup_request = [[0, 4], [5, 14], [15, 34], [35, 49], [50, 200]]
agegroup_map = {low: get_age_groups_in_range(age_groups, low, up) for low, up in agegroup_request}
agegroup_map[agegroup_request[-1][0]].append('100+')
mapped_rates = pd.DataFrame()
for year in years:
    for agegroup in agegroup_map:
        age_mask = [i in agegroup_map[agegroup] for i in data.index.get_level_values(1)]
        age_year_data = data.loc[age_mask].loc[year, :]
        total = age_year_data.sum()
        mapped_rates.loc[year, agegroup] = total['Deaths'] / total['Population']

mapped_rates.index = mapped_rates.index + 0.5
death_df = mapped_rates.loc[birth_rates.index]

## Creating model

In [9]:
tb_model = CompartmentalModel(
    times=(time_start, time_end),
    compartments=compartments,
    infectious_compartments=infectious_compartments,
    timestep=time_step,
)

In [10]:
## Set init pop
start_pop = Parameter("start_population_size")
init_pop = {
        "infectious": 0,
        "susceptible": start_pop - 0,
}
tb_model.set_initial_population(init_pop)

In [11]:
## Add birth process
process = "birth"
destination = "susceptible"
crude_birth_rate = get_sigmoidal_interpolation_function(
    birth_rates.index, birth_rates
)
tb_model.add_crude_birth_flow(
        process,
        crude_birth_rate,
        destination,
)

In [12]:
## Adding death process
process = "universal_death" ## will be adjusted later
universal_death_rate = 1.0
tb_model.add_universal_death_flows("universal_death", death_rate=universal_death_rate)

In [13]:
## Creating contact matrix
values = [
        [398.43289672, 261.82020387, 643.68286218, 401.62199159, 356.13449939],
        [165.78966683, 881.63067677, 532.84120554, 550.75979227, 285.62836724],
        [231.75164317, 311.38983781, 915.52884268, 673.30894113, 664.14577066],
        [141.94492435, 310.88835505, 786.13676958, 1134.31076003, 938.03403291],
        [67.30073632, 170.46333134, 647.30153978, 1018.81243422, 1763.57657715],
    ]
matrix = np.array(values).T

In [14]:
## Stratifying by age
strat = AgeStratification("age", age_strata, compartments)
strat.set_mixing_matrix(matrix)
universal_death_funcs, death_adjs = {}, {}
for age in age_strata:
    universal_death_funcs[age] = get_sigmoidal_interpolation_function(
        death_df.index, death_df[age]
    )
    death_adjs[str(age)] = Overwrite(universal_death_funcs[age])
strat.set_flow_adjustments("universal_death", death_adjs)
tb_model.stratify_with(strat)

In [15]:
## Request output
tb_model.request_output_for_compartments(
        "total_population", compartments, save_results=True
    )

DerivedOutput total_population {'request_type': 'comp', 'compartments': ['susceptible', 'early_latent', 'late_latent', 'infectious', 'on_treatment', 'recovered'], 'strata': {}, 'save_results': True}

In [21]:
tb_model._stratifications

[Stratification: age]

In [16]:
def request_compartment_output(model, output_name, ages,compartments, save_results=True):
    model.request_output_for_compartments(
            output_name, compartments, save_results=save_results
    )
    for age_stratum in ages:
        # For age-specific population calculations
        age_output_name = f"{output_name}Xage_{age_stratum}"
        model.request_output_for_compartments(
            age_output_name,
            compartments,
            strata={'age': age_stratum},
            save_results=save_results,
        )

In [17]:
compartments

['susceptible',
 'early_latent',
 'late_latent',
 'infectious',
 'on_treatment',
 'recovered']

In [18]:
tb_model._original_compartment_names

[susceptible, early_latent, late_latent, infectious, on_treatment, recovered]

In [19]:
request_compartment_output(tb_model, "population", age_strata, compartments)

AssertionError: No compartment matches: ['susceptible', 'early_latent', 'late_latent', 'infectious', 'on_treatment', 'recovered'] {50: 0}

## Optimizing model

In [None]:
params = {
    "start_population_size": 227344.75719536067,
}
priors = [
    esp.UniformPrior("start_population_size", (1, 300000)),
]
pop = pd.Series({2009: 1207100, 2019: 1194300})


targets = [
    est.NegativeBinomialTarget('total_population', pop, dispersion_param=2000),
]
calibration_model = BayesianCompartmentalModel(tb_model, params, priors, targets)

In [None]:
with pm.Model() as pmc_model:
    start_params = {k: np.clip(v, *calibration_model.priors[k].bounds(0.99)) for k, v in params.items() if k in calibration_model.priors}
    variables = epm.use_model(calibration_model)
    map_params = pm.find_MAP(start=start_params, vars=variables, include_transformed=False)
    map_params = {k: float(v) for k, v in map_params.items()}
    print('Best calibration parameters found:')
for i_param, param in enumerate(map_params):
    print(f'   {param}: {round_sigfig(map_params[param], 4)} (within bound {priors[i_param].bounds()}')

map_params

In [None]:
params.update(map_params)
tb_model.run(params)
params
derived_df_0 = tb_model.get_derived_outputs_df()

In [None]:
pop = tb_model.get_outputs_df()[['susceptibleXage_0', 'susceptibleXage_5', 'susceptibleXage_15', 'susceptibleXage_35', 'susceptibleXage_50']]

In [None]:
pop.columns=['pop_0', 'pop_5', 'pop_15', 'pop_35', 'pop_50']

In [None]:
pop.plot.area()

In [None]:
plots = {"total_population": {
      "title": "Population size",
      "output_key": "total_population",
      "times": [2009.0, 2019.0],
      "values": [1207100, 1194300],
      "quantiles": [0.025, 0.25, 0.5, 0.75, 0.975]
    },  
}

In [None]:
fig2_1 = px.line(
    derived_df_0,
    x=derived_df_0.index,
    y="total_population",
)
fig2_2 = px.scatter(x= plots['total_population']['times'], y = plots['total_population']['values'])
fig2_2.update_traces(marker=dict(color="red"))
fig2_3 = go.Figure(
    data=fig2_1.data + fig2_2.data,
)
fig2_3.update_layout(
    title="Modelled vs Data", title_x=0.5, xaxis_title="Year", yaxis_title="Population"
)
fig2_3