In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import warnings
import os

warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd


* Data: aggregate infections / test consumption for all policies, all pooling methods, dilution only
* Use: Figure 4, 5, 6 (Pareto plot and zoom-ins)
* Format: pandas DataFrame

In [3]:
import pickle

NUM_TRIALS = 150

param_config = {
    "country": ["US"],

    "pop_size_default": 10000,

    "init_prev": [0.01],
    "init_prev_default": 0.01,

    'horizon_default': 100,

    "num_groups_default": 5,
    "num_groups": [1,2,3,4,5,6,7],

    'pool_size': [5,10,15,20],
    'pool_size_default': 10,

    'LoD_default': 1240,
    'LoD': [1240],

    "edge_weight_default": 10,
    "edge_weight": [10],

    "alpha_default": 5.0,
    "alpha": [2.0],

    "peak_VL": [6.0],

    "distancing_scale": [50.0],

    "dilute": ["average"],

    "beta_default": 0.1, # transmissibility
    "sigma_default": 0.2, # rate E --> I_pre
    "lamda_default": 0.5, # rate I_pre --> I_(a)sym
    "gamma_default": 0.25 # rate I_sym --> R
}


In [4]:
import itertools

param_values = {}

for param in [
        "country", "pop_size", "init_prev", "num_groups", "pool_size", "horizon", 
        "beta", "sigma", "lamda", "gamma", "LoD", "edge_weight", "alpha", "peak_VL",
        "distancing_scale", "dilute"
    ]:
        if param in param_config:
            param_values[param] = param_config[param]
        else:
            param_values[param] = [param_config[param+"_default"]]

all_param_configs = [dict(zip(param_values.keys(), x)) for x in itertools.product(*param_values.values())]

In [5]:
import copy
from collections import defaultdict


all_data_dilution = []

for param_config_single in all_param_configs:

    if param_config_single["dilute"] != "average":
        continue

    traj_info = [param_config_single, {}]
    
    path = f"/home/yz685/corr_pooling_seirsplus/results/{param_config_single['country']}"
    for param in [
        "pop_size", "init_prev", "num_groups", "pool_size", "horizon", 
        "beta", "sigma", "lamda", "gamma", "LoD", "edge_weight", "alpha", "peak_VL", "distancing_scale", "dilute"
    ]:
        path += f"_{param}={param_config_single[param]}"

    data = copy.deepcopy(param_config_single)

    results = defaultdict(list)

    for pooling_strategy in ["naive", "correlated", "correlated_weak"]:
        for i in range(1, NUM_TRIALS+1):
            filepath = path + f"/{pooling_strategy}/results_{i}.pickle"
            try:
                with open(filepath, "rb") as f:
                    results[pooling_strategy].append(pickle.load(f))
            except Exception as e:
                print("error: ", e)
                continue

    if len(results["naive"]) == 0:
        continue

    for metric in ["cumInfections", "cum_num_tests"]:
        
        for pooling_strategy in ["naive", "correlated", "correlated_weak"]:

            results_tmp = []
            for SEED in range(1,NUM_TRIALS+1):
                try:
                    results_tmp.append(
                        [x[metric]\
                        for x in results[pooling_strategy][SEED]]
                    )
                except:
                    pass

            results_tmp_ = np.array([(xi+[xi[-1]]*(100-len(xi)))[:100] for xi in results_tmp], dtype=float)
            mean = np.nanmean(results_tmp_, axis=0)
            sem = np.nanstd(results_tmp_, axis=0)/np.sqrt(NUM_TRIALS)
            traj_info[1][metric+"_"+pooling_strategy] = [mean, sem] # TODO: confirm

            if metric in ["cumInfections", "cum_num_tests"]:
                data[f"{metric}_{pooling_strategy}_mean"] = mean[-1]
                data[f"{metric}_{pooling_strategy}_sem"] = sem[-1]
            else:
                data[f"{metric}_{pooling_strategy}_mean"] = np.nanmean(mean)
                data[f"{metric}_{pooling_strategy}_sem"] = np.nanmean(sem)

    all_data_dilution.append(data)


In [18]:
df = pd.DataFrame(all_data_dilution)
df = df[
    (df["init_prev"] == 0.01)
    & (df["distancing_scale"] == 50)
    & (df["alpha"] == 2.0)
    & (df["dilute"] == "average")
]


In [19]:
df.drop(
    columns = [
        'country', 'pop_size', 'init_prev', 
        'horizon', 'beta', 'sigma', 'lamda', 'gamma', 'LoD', 'edge_weight',
        'alpha', 'peak_VL', 'distancing_scale', 'dilute'
    ],
    inplace=True
)

In [20]:
df

Unnamed: 0,num_groups,pool_size,cumInfections_naive_mean,cumInfections_naive_sem,cumInfections_correlated_mean,cumInfections_correlated_sem,cumInfections_correlated_weak_mean,cumInfections_correlated_weak_sem,cum_num_tests_naive_mean,cum_num_tests_naive_sem,cum_num_tests_correlated_mean,cum_num_tests_correlated_sem,cum_num_tests_correlated_weak_mean,cum_num_tests_correlated_weak_sem
0,1,5,2048.228188,25.231764,2054.979866,25.823553,2015.080537,22.790576,204703.751678,56.611906,202356.449664,29.85204,203294.852349,37.019762
1,1,10,2183.90604,24.612165,2120.208054,24.198518,2143.677852,22.997049,117605.912752,191.625415,111171.053691,125.050814,113558.744966,143.790948
2,1,15,2225.201342,22.74825,2129.812081,23.53968,2159.194631,22.228717,94805.302013,275.280984,84234.610738,191.740914,87958.369128,211.178134
3,1,20,2246.275168,23.536967,2176.228188,23.771737,2186.604027,24.210349,87570.973154,374.688588,73493.463087,248.331018,78324.322148,297.346919
4,2,5,2141.812081,25.05434,2101.718121,24.173749,2119.181208,25.584131,107669.167785,88.068254,104487.724832,52.774902,105804.402685,69.174515
5,2,10,2247.85906,24.812002,2146.0,26.017065,2207.778523,24.417654,69351.127517,206.323205,61793.402685,141.955751,64534.328859,158.970689
6,2,15,2361.295302,24.116398,2194.395973,23.280378,2247.469799,24.494703,63646.892617,291.292635,51030.42953,185.865512,55026.469799,224.25582
7,2,20,2348.214765,24.266491,2255.946309,22.698043,2285.52349,22.1065,64049.691275,372.928507,48276.248322,230.198242,53398.61745,263.0449
8,3,5,2379.449664,26.289659,2244.456376,23.977653,2303.308725,23.270119,76224.939597,103.287128,72159.812081,58.157777,73828.671141,71.122709
9,3,10,2471.946309,24.161829,2297.778523,26.660175,2364.087248,23.924649,54815.147651,200.830514,46147.369128,149.220655,49322.751678,157.892001


In [21]:
df.to_csv("../../data/infections_testconsumption_dilution.csv", index=False)

* Data: aggregate infections for all policies, NP and CCP, and all test error models
* Use: Figure 7 and Figure EC3 (comparing NP inf - CP inf for different test error models)
* Format: pickled dictionaries

In [1]:
import pickle

NUM_TRIALS = 150

param_config = {
    "country": ["US"],

    "pop_size_default": 10000,

    "init_prev": [0.01],
    "init_prev_default": 0.01,

    'horizon_default': 100,

    "num_groups_default": 5,
    "num_groups": [1,2,3,4,5,6,7],

    'pool_size': [5,10,15,20],
    'pool_size_default': 10,

    'LoD_default': 1240,
    'LoD': [1240],

    "edge_weight_default": 10,
    "edge_weight": [10],

    "alpha_default": 5.0,
    "alpha": [2.0],

    "peak_VL": [6.0],

    "distancing_scale": [50.0],

    "dilute": ["average", "sum", "constant_0.5", "constant_0.7", "constant_1.0"],

    "beta_default": 0.1, # transmissibility
    "sigma_default": 0.2, # rate E --> I_pre
    "lamda_default": 0.5, # rate I_pre --> I_(a)sym
    "gamma_default": 0.25 # rate I_sym --> R
}

import itertools

param_values = {}

for param in [
        "country", "pop_size", "init_prev", "num_groups", "pool_size", "horizon", 
        "beta", "sigma", "lamda", "gamma", "LoD", "edge_weight", "alpha", "peak_VL",
        "distancing_scale", "dilute"
    ]:
        if param in param_config:
            param_values[param] = param_config[param]
        else:
            param_values[param] = [param_config[param+"_default"]]

all_param_configs = [dict(zip(param_values.keys(), x)) for x in itertools.product(*param_values.values())]

In [2]:
import copy
from collections import defaultdict
import numpy as np


final_inf_np = {}
final_inf_cp = {}

for param_config_single in all_param_configs:
    
    path = f"/home/yz685/corr_pooling_seirsplus/results/{param_config_single['country']}"
    for param in [
        "pop_size", "init_prev", "num_groups", "pool_size", "horizon", 
        "beta", "sigma", "lamda", "gamma", "LoD", "edge_weight", "alpha", "peak_VL", "distancing_scale", "dilute"
    ]:
        path += f"_{param}={param_config_single[param]}"

    results = defaultdict(list)

    for pooling_strategy in ["naive", "correlated_weak"]:
        for i in range(1, NUM_TRIALS+1):
            filepath = path + f"/{pooling_strategy}/results_{i}.pickle"
            try:
                with open(filepath, "rb") as f:
                    results[pooling_strategy].append(pickle.load(f))
            except Exception as e:
                print("error: ", e)
                continue

    if len(results["naive"]) == 0:
        continue

    cumInfections_by_trial = {}

    for metric in ["cumInfections"]:
        
        for pooling_strategy in ["naive", "correlated_weak"]:

            results_tmp = []
            for seed_idx in range(NUM_TRIALS):
                try:
                    results_tmp.append(
                        [x[metric]\
                        for x in results[pooling_strategy][seed_idx]]
                    )
                except:
                    pass

            results_tmp_ = np.array([(xi+[xi[-1]]*(100-len(xi)))[:100] for xi in results_tmp], dtype=float)
            if metric == "cumInfections":
                cumInfections_by_trial[pooling_strategy] = results_tmp_[:,-1]
    
    final_inf_np[(param_config_single["num_groups"], param_config_single["pool_size"], param_config_single["dilute"])] = cumInfections_by_trial["naive"]
    final_inf_cp[(param_config_single["num_groups"], param_config_single["pool_size"], param_config_single["dilute"])] = cumInfections_by_trial["correlated_weak"]

# save final_inf_np, final_inf_cp
with open("../../data/infections_np_nodil.pickle", "wb") as f:
    pickle.dump(final_inf_np, f)
with open("../../data/infections_ccp_nodil.pickle", "wb") as f:
    pickle.dump(final_inf_cp, f)

* Data: detailed trajectory info for (5,10) under the dilution model
* Use: Figure 3
* Format: pickled nested list-dictionary

In [3]:
import pickle

NUM_TRIALS = 150

param_config = {
    "country": ["US"],

    "pop_size_default": 10000,

    "init_prev": [0.01],
    "init_prev_default": 0.01,

    'horizon_default': 100,

    "num_groups_default": 5,
    "num_groups": [5],

    'pool_size': [10],
    'pool_size_default': 10,

    'LoD_default': 1240,
    'LoD': [1240],

    "edge_weight_default": 10,
    "edge_weight": [10],

    "alpha_default": 5.0,
    "alpha": [2.0],

    "peak_VL": [6.0],

    "distancing_scale": [50.0],

    "dilute": ["average"],

    "beta_default": 0.1, # transmissibility
    "sigma_default": 0.2, # rate E --> I_pre
    "lamda_default": 0.5, # rate I_pre --> I_(a)sym
    "gamma_default": 0.25 # rate I_sym --> R
}

import itertools

param_values = {}

for param in [
        "country", "pop_size", "init_prev", "num_groups", "pool_size", "horizon", 
        "beta", "sigma", "lamda", "gamma", "LoD", "edge_weight", "alpha", "peak_VL",
        "distancing_scale", "dilute"
    ]:
        if param in param_config:
            param_values[param] = param_config[param]
        else:
            param_values[param] = [param_config[param+"_default"]]

all_param_configs = [dict(zip(param_values.keys(), x)) for x in itertools.product(*param_values.values())]

In [4]:
import copy
from collections import defaultdict


trajectories = [] # list of tuples (dict of param_config_single, dict of {metric: [mean, std]})

for param_config_single in all_param_configs:

    traj_info = [param_config_single, {}]
    
    path = f"/home/yz685/corr_pooling_seirsplus/results/{param_config_single['country']}"
    for param in [
        "pop_size", "init_prev", "num_groups", "pool_size", "horizon", 
        "beta", "sigma", "lamda", "gamma", "LoD", "edge_weight", "alpha", "peak_VL", "distancing_scale", "dilute"
    ]:
        path += f"_{param}={param_config_single[param]}"

    results = defaultdict(list)

    for pooling_strategy in ["naive", "correlated", "correlated_weak"]:
        for i in range(1, NUM_TRIALS+1):
            filepath = path + f"/{pooling_strategy}/results_{i}.pickle"
            try:
                with open(filepath, "rb") as f:
                    results[pooling_strategy].append(pickle.load(f))
            except Exception as e:
                # print("error: ", e)
                continue

    if len(results["naive"]) == 0:
        continue

    metrics = set(results["correlated"][0][0].keys()) - set(["day", "VL_in_positive_pools"])

    for metric in metrics:
        
        for pooling_strategy in ["naive", "correlated", "correlated_weak"]:

            results_tmp = []
            for SEED in range(NUM_TRIALS):
                try:
                    results_tmp.append(
                        [x[metric]\
                        for x in results[pooling_strategy][SEED]]
                    )
                except:
                    pass

            results_tmp_ = np.array([(xi+[xi[-1]]*(100-len(xi)))[:100] for xi in results_tmp], dtype=float)
            mean = np.nanmean(results_tmp_, axis=0)
            sem = np.nanstd(results_tmp_, axis=0)/np.sqrt(NUM_TRIALS)
            traj_info[1][metric+"_"+pooling_strategy] = [mean, sem] 

    trajectories.append(traj_info)

with open("../../data/trajectory_info_numgroups=5_poolsize=10.pickle", "wb") as f:
    pickle.dump(trajectories, f)


  mean = np.nanmean(results_tmp_, axis=0)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  results_tmp_ = np.array([(xi+[xi[-1]]*(100-len(xi)))[:100] for xi in results_tmp], dtype=float)
